gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; The following define_subst rules are used to produce patterns representing
  22 ;; the implicit zeroing effect of 64-bit Advanced SIMD operations, in effect
  23 ;; a vec_concat with zeroes.  The order of the vec_concat operands differs
  24 ;; for big-endian so we have a separate define_subst rule for each endianness.
  25 (define_subst "add_vec_concat_subst_le"
  26   [(set (match_operand:VDZ 0)
  27         (match_operand:VDZ 1))]
  28   "!BYTES_BIG_ENDIAN"
  29   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
  30         (vec_concat:<VDBL>
  31          (match_dup 1)
  32          (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")))])
  33
  34 (define_subst "add_vec_concat_subst_be"
  35   [(set (match_operand:VDZ 0)
  36         (match_operand:VDZ 1))]
  37   "BYTES_BIG_ENDIAN"
  38   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
  39         (vec_concat:<VDBL>
  40          (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")
  41          (match_dup 1)))])
  42
  43 ;; The subst_attr definitions used to annotate patterns further in the file.
  44 ;; Patterns that need to have the above substitutions added to them should
  45 ;; have <vczle><vczbe> added to their name.
  46 (define_subst_attr "vczle" "add_vec_concat_subst_le" "" "_vec_concatz_le")
  47 (define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
  48
  49 (define_expand "mov<mode>"
  50   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
  51         (match_operand:VALL_F16 1 "general_operand"))]
  52   "TARGET_FLOAT"
  53   "
  54   /* Force the operand into a register if it is not an
  55      immediate whose use can be replaced with xzr.
  56      If the mode is 16 bytes wide, then we will be doing
  57      a stp in DI mode, so we check the validity of that.
  58      If the mode is 8 bytes wide, then we will do doing a
  59      normal str, so the check need not apply.  */
  60   if (GET_CODE (operands[0]) == MEM
  61       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  62            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  63                 && aarch64_mem_pair_operand (operands[0], DImode))
  64                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  65       operands[1] = force_reg (<MODE>mode, operands[1]);
  66
  67   /* If a constant is too complex to force to memory (e.g. because it
  68      contains CONST_POLY_INTs), build it up from individual elements instead.
  69      We should only need to do this before RA; aarch64_legitimate_constant_p
  70      should ensure that we don't try to rematerialize the constant later.  */
  71   if (GET_CODE (operands[1]) == CONST_VECTOR
  72       && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
  73     {
  74       aarch64_expand_vector_init (operands[0], operands[1]);
  75       DONE;
  76     }
  77   "
  78 )
  79
  80 (define_expand "movmisalign<mode>"
  81   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
  82         (match_operand:VALL_F16 1 "general_operand"))]
  83   "TARGET_FLOAT && !STRICT_ALIGNMENT"
  84 {
  85   /* This pattern is not permitted to fail during expansion: if both arguments
  86      are non-registers (e.g. memory := constant, which can be created by the
  87      auto-vectorizer), force operand 1 into a register.  */
  88   if (!register_operand (operands[0], <MODE>mode)
  89       && !register_operand (operands[1], <MODE>mode))
  90     operands[1] = force_reg (<MODE>mode, operands[1]);
  91 })
  92
  93 (define_insn "aarch64_simd_dup<mode>"
  94   [(set (match_operand:VDQ_I 0 "register_operand")
  95         (vec_duplicate:VDQ_I
  96           (match_operand:<VEL> 1 "register_operand")))]
  97   "TARGET_SIMD"
  98   {@ [ cons: =0 , 1  ; attrs: type      ]
  99      [ w        , w  ; neon_dup<q>      ] dup\t%0.<Vtype>, %1.<Vetype>[0]
 100      [ w        , ?r ; neon_from_gp<q>  ] dup\t%0.<Vtype>, %<vwcore>1
 101   }
 102 )
 103
 104 (define_insn "aarch64_simd_dup<mode>"
 105   [(set (match_operand:VDQF_F16 0 "register_operand")
 106         (vec_duplicate:VDQF_F16
 107           (match_operand:<VEL> 1 "register_operand")))]
 108   "TARGET_SIMD"
 109   {@ [ cons: =0 , 1 ; attrs: type      ]
 110      [ w        , w ; neon_dup<q>      ] dup\t%0.<Vtype>, %1.<Vetype>[0]
 111      [ w        , r ; neon_from_gp<q>  ] dup\t%0.<Vtype>, %<vwcore>1
 112   }
 113 )
 114
 115 (define_insn "aarch64_dup_lane<mode>"
 116   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 117         (vec_duplicate:VALL_F16
 118           (vec_select:<VEL>
 119             (match_operand:VALL_F16 1 "register_operand" "w")
 120             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
 121           )))]
 122   "TARGET_SIMD"
 123   {
 124     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 125     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 126   }
 127   [(set_attr "type" "neon_dup<q>")]
 128 )
 129
 130 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
 131   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 132         (vec_duplicate:VALL_F16_NO_V2Q
 133           (vec_select:<VEL>
 134             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
 135             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
 136           )))]
 137   "TARGET_SIMD"
 138   {
 139     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 140     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 141   }
 142   [(set_attr "type" "neon_dup<q>")]
 143 )
 144
 145 (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>"
 146   [(set (match_operand:VDMOV 0 "nonimmediate_operand")
 147         (match_operand:VDMOV 1 "general_operand"))]
 148   "TARGET_FLOAT
 149    && (register_operand (operands[0], <MODE>mode)
 150        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 151   {@ [cons: =0, 1; attrs: type, arch, length]
 152      [w , m ; neon_load1_1reg<q> , *        , *] ldr\t%d0, %1
 153      [r , m ; load_8             , *        , *] ldr\t%x0, %1
 154      [m , Dz; store_8            , *        , *] str\txzr, %0
 155      [m , w ; neon_store1_1reg<q>, *        , *] str\t%d1, %0
 156      [m , r ; store_8            , *        , *] str\t%x1, %0
 157      [w , w ; neon_logic<q>      , simd     , *] mov\t%0.<Vbtype>, %1.<Vbtype>
 158      [w , w ; neon_logic<q>      , *        , *] fmov\t%d0, %d1
 159      [?r, w ; neon_to_gp<q>      , base_simd, *] umov\t%0, %1.d[0]
 160      [?r, w ; neon_to_gp<q>      , *        , *] fmov\t%x0, %d1
 161      [?w, r ; f_mcr              , *        , *] fmov\t%d0, %1
 162      [?r, r ; mov_reg            , *        , *] mov\t%0, %1
 163      [w , Dn; neon_move<q>       , simd     , *] << aarch64_output_simd_mov_immediate (operands[1], 64);
 164      [w , Dz; f_mcr              , *        , *] fmov\t%d0, xzr
 165      [w , Dx; neon_move          , simd     , 8] #
 166   }
 167   "CONST_INT_P (operands[1])
 168    && aarch64_simd_special_constant_p (operands[1], <MODE>mode)
 169    && FP_REGNUM_P (REGNO (operands[0]))"
 170   [(const_int 0)]
 171   {
 172     aarch64_maybe_generate_simd_constant (operands[0], operands[1], <MODE>mode);
 173     DONE;
 174   }
 175 )
 176
 177 (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
 178   [(set (match_operand:VQMOV 0 "nonimmediate_operand")
 179         (match_operand:VQMOV 1 "general_operand"))]
 180   "TARGET_FLOAT
 181    && (register_operand (operands[0], <MODE>mode)
 182        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 183   {@ [cons: =0, 1; attrs: type, arch, length]
 184      [w  , m ; neon_load1_1reg<q> , *   , 4] ldr\t%q0, %1
 185      [Umn, Dz; store_16           , *   , 4] stp\txzr, xzr, %0
 186      [m  , w ; neon_store1_1reg<q>, *   , 4] str\t%q1, %0
 187      [w  , w ; neon_logic<q>      , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
 188      [w  , w ; *                  , sve , 4] mov\t%Z0.d, %Z1.d
 189      [?r , w ; multiple           , *   , 8] #
 190      [?w , r ; multiple           , *   , 8] #
 191      [?r , r ; multiple           , *   , 8] #
 192      [w  , Dn; neon_move<q>       , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
 193      [w  , Dz; fmov               , *   , 4] fmov\t%d0, xzr
 194      [w  , Dx; neon_move          , simd, 8] #
 195   }
 196   "&& reload_completed
 197    && ((REG_P (operands[0])
 198         && REG_P (operands[1])
 199         && !(FP_REGNUM_P (REGNO (operands[0]))
 200              && FP_REGNUM_P (REGNO (operands[1]))))
 201        || (aarch64_simd_special_constant_p (operands[1], <MODE>mode)
 202            && FP_REGNUM_P (REGNO (operands[0]))))"
 203   [(const_int 0)]
 204   {
 205     if (GP_REGNUM_P (REGNO (operands[0]))
 206         && GP_REGNUM_P (REGNO (operands[1])))
 207       aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 208     else
 209       {
 210         if (FP_REGNUM_P (REGNO (operands[0]))
 211             && <MODE>mode == V2DImode
 212             && aarch64_maybe_generate_simd_constant (operands[0], operands[1],
 213                                                      <MODE>mode))
 214           ;
 215         else
 216           aarch64_split_simd_move (operands[0], operands[1]);
 217       }
 218     DONE;
 219   }
 220 )
 221
 222 ;; When storing lane zero we can use the normal STR and its more permissive
 223 ;; addressing modes.
 224
 225 (define_insn "aarch64_store_lane0<mode>"
 226   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 227         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 228                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 229   "TARGET_FLOAT
 230    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 231   "str\\t%<Vetype>1, %0"
 232   [(set_attr "type" "neon_store1_1reg<q>")]
 233 )
 234
 235 (define_insn "aarch64_simd_stp<mode>"
 236   [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand")
 237         (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand")))]
 238   "TARGET_SIMD"
 239   {@ [ cons: =0 , 1 ; attrs: type            ]
 240      [ Umn      , w ; neon_stp               ] stp\t%<Vetype>1, %<Vetype>1, %y0
 241      [ Umn      , r ; store_<ldpstp_vel_sz>  ] stp\t%<vwcore>1, %<vwcore>1, %y0
 242   }
 243 )
 244
 245 (define_expand "@aarch64_split_simd_mov<mode>"
 246   [(set (match_operand:VQMOV 0)
 247         (match_operand:VQMOV 1))]
 248   "TARGET_FLOAT"
 249   {
 250     rtx dst = operands[0];
 251     rtx src = operands[1];
 252
 253     if (GP_REGNUM_P (REGNO (src)))
 254       {
 255         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 256         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 257         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 258
 259         emit_move_insn (dst_low_part, src_low_part);
 260         emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
 261                                                src_high_part));
 262       }
 263     else
 264       {
 265         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 266         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 267         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 268         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 269         emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
 270         emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
 271       }
 272     DONE;
 273   }
 274 )
 275
 276 (define_expand "aarch64_get_half<mode>"
 277   [(set (match_operand:<VHALF> 0 "register_operand")
 278         (vec_select:<VHALF>
 279           (match_operand:VQMOV 1 "register_operand")
 280           (match_operand 2 "ascending_int_parallel")))]
 281   "TARGET_FLOAT"
 282   {
 283     if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
 284       {
 285         emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
 286         DONE;
 287       }
 288   }
 289 )
 290
 291 (define_expand "aarch64_get_low<mode>"
 292   [(match_operand:<VHALF> 0 "register_operand")
 293    (match_operand:VQMOV 1 "register_operand")]
 294   "TARGET_FLOAT"
 295   {
 296     rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 297     emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
 298     DONE;
 299   }
 300 )
 301
 302 (define_expand "aarch64_get_high<mode>"
 303   [(match_operand:<VHALF> 0 "register_operand")
 304    (match_operand:VQMOV 1 "register_operand")]
 305   "TARGET_FLOAT"
 306   {
 307     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 308     emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
 309     DONE;
 310   }
 311 )
 312
 313 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
 314   [(set (match_operand:<VHALF> 0 "register_operand")
 315         (vec_select:<VHALF>
 316           (match_operand:VQMOV_NO2E 1 "register_operand")
 317           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half")))]
 318   "TARGET_FLOAT"
 319   {@ [ cons: =0 , 1 ; attrs: type   , arch      ]
 320      [ w        , w ; mov_reg       , simd      ] #
 321      [ ?r       , w ; neon_to_gp<q> , base_simd ] umov\t%0, %1.d[0]
 322      [ ?r       , w ; f_mrc         , *         ] fmov\t%0, %d1
 323   }
 324   "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
 325   [(set (match_dup 0) (match_dup 1))]
 326   {
 327     operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
 328   }
 329   [(set_attr "length" "4")]
 330 )
 331
 332 (define_insn "aarch64_simd_mov_from_<mode>high"
 333   [(set (match_operand:<VHALF> 0 "register_operand")
 334         (vec_select:<VHALF>
 335           (match_operand:VQMOV_NO2E 1 "register_operand")
 336           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half")))]
 337   "TARGET_FLOAT"
 338   {@ [ cons: =0 , 1 ; attrs: type   , arch  ]
 339      [ w        , w ; neon_dup<q>   , simd  ] dup\t%d0, %1.d[1]
 340      [ w        , w ; *             , sve   ] ext\t%Z0.b, %Z0.b, %Z0.b, #8
 341      [ ?r       , w ; neon_to_gp<q> , simd  ] umov\t%0, %1.d[1]
 342      [ ?r       , w ; f_mrc         , *     ] fmov\t%0, %1.d[1]
 343   }
 344   [(set_attr "length" "4")]
 345 )
 346
 347 (define_insn "orn<mode>3<vczle><vczbe>"
 348  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 349        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 350                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 351  "TARGET_SIMD"
 352  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 353   [(set_attr "type" "neon_logic<q>")]
 354 )
 355
 356 (define_insn "bic<mode>3<vczle><vczbe>"
 357  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 358        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 359                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 360  "TARGET_SIMD"
 361  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 362   [(set_attr "type" "neon_logic<q>")]
 363 )
 364
 365 (define_insn "add<mode>3<vczle><vczbe>"
 366   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 367         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 368                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 369   "TARGET_SIMD"
 370   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 371   [(set_attr "type" "neon_add<q>")]
 372 )
 373
 374 (define_insn "sub<mode>3<vczle><vczbe>"
 375   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 376         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 377                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 378   "TARGET_SIMD"
 379   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 380   [(set_attr "type" "neon_sub<q>")]
 381 )
 382
 383 (define_insn "mul<mode>3<vczle><vczbe>"
 384   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 385         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 386                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 387   "TARGET_SIMD"
 388   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 389   [(set_attr "type" "neon_mul_<Vetype><q>")]
 390 )
 391
 392 (define_insn "bswap<mode>2"
 393   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 394         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 395   "TARGET_SIMD"
 396   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 397   [(set_attr "type" "neon_rev<q>")]
 398 )
 399
 400 (define_insn "aarch64_rbit<mode><vczle><vczbe>"
 401   [(set (match_operand:VB 0 "register_operand" "=w")
 402         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 403                    UNSPEC_RBIT))]
 404   "TARGET_SIMD"
 405   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 406   [(set_attr "type" "neon_rbit")]
 407 )
 408
 409 (define_expand "ctz<mode>2"
 410   [(set (match_operand:VS 0 "register_operand")
 411         (ctz:VS (match_operand:VS 1 "register_operand")))]
 412   "TARGET_SIMD"
 413   {
 414      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 415      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 416                                              <MODE>mode, 0);
 417      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 418      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 419      DONE;
 420   }
 421 )
 422
 423 (define_expand "@xorsign<mode>3"
 424   [(match_operand:VHSDF 0 "register_operand")
 425    (match_operand:VHSDF 1 "register_operand")
 426    (match_operand:VHSDF 2 "register_operand")]
 427   "TARGET_SIMD"
 428 {
 429
 430   machine_mode imode = <V_INT_EQUIV>mode;
 431   rtx v_bitmask = gen_reg_rtx (imode);
 432   rtx op1x = gen_reg_rtx (imode);
 433   rtx op2x = gen_reg_rtx (imode);
 434
 435   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 436   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 437
 438   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 439
 440   emit_move_insn (v_bitmask,
 441                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 442                                                      HOST_WIDE_INT_M1U << bits));
 443
 444   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 445   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 446   emit_move_insn (operands[0],
 447                   lowpart_subreg (<MODE>mode, op1x, imode));
 448   DONE;
 449 }
 450 )
 451
 452 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 453 ;; fact that their usage need to guarantee that the source vectors are
 454 ;; contiguous.  It would be wrong to describe the operation without being able
 455 ;; to describe the permute that is also required, but even if that is done
 456 ;; the permute would have been created as a LOAD_LANES which means the values
 457 ;; in the registers are in the wrong order.
 458 (define_insn "aarch64_fcadd<rot><mode><vczle><vczbe>"
 459   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 460         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 461                        (match_operand:VHSDF 2 "register_operand" "w")]
 462                        FCADD))]
 463   "TARGET_COMPLEX"
 464   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 465   [(set_attr "type" "neon_fcadd")]
 466 )
 467
 468 (define_expand "cadd<rot><mode>3"
 469   [(set (match_operand:VHSDF 0 "register_operand")
 470         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 471                        (match_operand:VHSDF 2 "register_operand")]
 472                        FCADD))]
 473   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 474 )
 475
 476 (define_insn "aarch64_fcmla<rot><mode><vczle><vczbe>"
 477   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 478         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 479                                    (match_operand:VHSDF 3 "register_operand" "w")]
 480                                    FCMLA)
 481                     (match_operand:VHSDF 1 "register_operand" "0")))]
 482   "TARGET_COMPLEX"
 483   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 484   [(set_attr "type" "neon_fcmla")]
 485 )
 486
 487
 488 (define_insn "aarch64_fcmla_lane<rot><mode><vczle><vczbe>"
 489   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 490         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 491                                    (match_operand:VHSDF 3 "register_operand" "w")
 492                                    (match_operand:SI 4 "const_int_operand" "n")]
 493                                    FCMLA)
 494                     (match_operand:VHSDF 1 "register_operand" "0")))]
 495   "TARGET_COMPLEX"
 496 {
 497   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 498   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 499 }
 500   [(set_attr "type" "neon_fcmla")]
 501 )
 502
 503 (define_insn "aarch64_fcmla_laneq<rot>v4hf<vczle><vczbe>"
 504   [(set (match_operand:V4HF 0 "register_operand" "=w")
 505         (plus:V4HF (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 506                                  (match_operand:V8HF 3 "register_operand" "w")
 507                                  (match_operand:SI 4 "const_int_operand" "n")]
 508                                  FCMLA)
 509                    (match_operand:V4HF 1 "register_operand" "0")))]
 510   "TARGET_COMPLEX"
 511 {
 512   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 513   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 514 }
 515   [(set_attr "type" "neon_fcmla")]
 516 )
 517
 518 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 519   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 520         (plus:VQ_HSF (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 521                                      (match_operand:<VHALF> 3 "register_operand" "w")
 522                                      (match_operand:SI 4 "const_int_operand" "n")]
 523                                      FCMLA)
 524                      (match_operand:VQ_HSF 1 "register_operand" "0")))]
 525   "TARGET_COMPLEX"
 526 {
 527   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 528   operands[4]
 529     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 530   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 531 }
 532   [(set_attr "type" "neon_fcmla")]
 533 )
 534
 535 ;; The complex mla/mls operations always need to expand to two instructions.
 536 ;; The first operation does half the computation and the second does the
 537 ;; remainder.  Because of this, expand early.
 538 (define_expand "cml<fcmac1><conj_op><mode>4"
 539   [(set (match_operand:VHSDF 0 "register_operand")
 540         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 541                                    (match_operand:VHSDF 2 "register_operand")]
 542                                    FCMLA_OP)
 543                     (match_operand:VHSDF 3 "register_operand")))]
 544   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 545 {
 546   rtx tmp = gen_reg_rtx (<MODE>mode);
 547   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3],
 548                                                  operands[2], operands[1]));
 549   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
 550                                                  operands[2], operands[1]));
 551   DONE;
 552 })
 553
 554 ;; The complex mul operations always need to expand to two instructions.
 555 ;; The first operation does half the computation and the second does the
 556 ;; remainder.  Because of this, expand early.
 557 (define_expand "cmul<conj_op><mode>3"
 558   [(set (match_operand:VHSDF 0 "register_operand")
 559         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 560                        (match_operand:VHSDF 2 "register_operand")]
 561                        FCMUL_OP))]
 562   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 563 {
 564   rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
 565   rtx res1 = gen_reg_rtx (<MODE>mode);
 566   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
 567                                                  operands[2], operands[1]));
 568   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
 569                                                  operands[2], operands[1]));
 570   DONE;
 571 })
 572
 573 ;; These expands map to the Dot Product optab the vectorizer checks for
 574 ;; and to the intrinsics patttern.
 575 ;; The auto-vectorizer expects a dot product builtin that also does an
 576 ;; accumulation into the provided register.
 577 ;; Given the following pattern
 578 ;;
 579 ;; for (i=0; i<len; i++) {
 580 ;;     c = a[i] * b[i];
 581 ;;     r += c;
 582 ;; }
 583 ;; return result;
 584 ;;
 585 ;; This can be auto-vectorized to
 586 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 587 ;;
 588 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 589 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 590 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 591 ;; ...
 592 ;;
 593 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 594 (define_insn "<sur>dot_prod<vsi2qi><vczle><vczbe>"
 595   [(set (match_operand:VS 0 "register_operand" "=w")
 596         (plus:VS
 597           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
 598                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
 599                       DOTPROD)
 600           (match_operand:VS 3 "register_operand" "0")))]
 601   "TARGET_DOTPROD"
 602   "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
 603   [(set_attr "type" "neon_dot<q>")]
 604 )
 605
 606 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
 607 ;; (vector) Dot Product operation and the vectorized optab.
 608 (define_insn "usdot_prod<vsi2qi><vczle><vczbe>"
 609   [(set (match_operand:VS 0 "register_operand" "=w")
 610         (plus:VS
 611           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
 612                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
 613           UNSPEC_USDOT)
 614           (match_operand:VS 3 "register_operand" "0")))]
 615   "TARGET_I8MM"
 616   "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
 617   [(set_attr "type" "neon_dot<q>")]
 618 )
 619
 620 ;; These instructions map to the __builtins for the Dot Product
 621 ;; indexed operations.
 622 (define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
 623   [(set (match_operand:VS 0 "register_operand" "=w")
 624         (plus:VS
 625           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 626                       (match_operand:V8QI 3 "register_operand" "<h_con>")
 627                       (match_operand:SI 4 "immediate_operand" "i")]
 628                       DOTPROD)
 629           (match_operand:VS 1 "register_operand" "0")))]
 630   "TARGET_DOTPROD"
 631   {
 632     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 633     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 634   }
 635   [(set_attr "type" "neon_dot<q>")]
 636 )
 637
 638 (define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
 639   [(set (match_operand:VS 0 "register_operand" "=w")
 640         (plus:VS
 641           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 642                       (match_operand:V16QI 3 "register_operand" "<h_con>")
 643                       (match_operand:SI 4 "immediate_operand" "i")]
 644                       DOTPROD)
 645           (match_operand:VS 1 "register_operand" "0")))]
 646   "TARGET_DOTPROD"
 647   {
 648     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 649     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 650   }
 651   [(set_attr "type" "neon_dot<q>")]
 652 )
 653
 654 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
 655 ;; (by element) Dot Product operations.
 656 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
 657   [(set (match_operand:VS 0 "register_operand" "=w")
 658         (plus:VS
 659           (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
 660                       (match_operand:VB 3 "register_operand" "w")
 661                       (match_operand:SI 4 "immediate_operand" "i")]
 662           DOTPROD_I8MM)
 663           (match_operand:VS 1 "register_operand" "0")))]
 664   "TARGET_I8MM"
 665   {
 666     int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
 667     int lane = INTVAL (operands[4]);
 668     operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
 669     return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
 670   }
 671   [(set_attr "type" "neon_dot<VS:q>")]
 672 )
 673
 674 (define_expand "copysign<mode>3"
 675   [(match_operand:VHSDF 0 "register_operand")
 676    (match_operand:VHSDF 1 "register_operand")
 677    (match_operand:VHSDF 2 "nonmemory_operand")]
 678   "TARGET_SIMD"
 679 {
 680   machine_mode int_mode = <V_INT_EQUIV>mode;
 681   rtx v_bitmask = gen_reg_rtx (int_mode);
 682   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 683
 684   emit_move_insn (v_bitmask,
 685                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 686                                                      HOST_WIDE_INT_M1U << bits));
 687
 688   /* copysign (x, -1) should instead be expanded as orr with the sign
 689      bit.  */
 690   if (!REG_P (operands[2]))
 691     {
 692       rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
 693       if (GET_CODE (op2_elt) == CONST_DOUBLE
 694           && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
 695         {
 696           emit_insn (gen_ior<v_int_equiv>3 (
 697             lowpart_subreg (int_mode, operands[0], <MODE>mode),
 698             lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
 699           DONE;
 700         }
 701     }
 702
 703   operands[2] = force_reg (<MODE>mode, operands[2]);
 704   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 705                                          operands[2], operands[1]));
 706   DONE;
 707 }
 708 )
 709
 710 (define_insn "mul_lane<mode>3"
 711  [(set (match_operand:VMULD 0 "register_operand" "=w")
 712        (mult:VMULD
 713          (vec_duplicate:VMULD
 714            (vec_select:<VEL>
 715              (match_operand:<VCOND> 2 "register_operand" "<h_con>")
 716              (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 717          (match_operand:VMULD 1 "register_operand" "w")))]
 718   "TARGET_SIMD"
 719   {
 720     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
 721     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
 722   }
 723   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 724 )
 725
 726 (define_insn "mul_laneq<mode>3"
 727   [(set (match_operand:VMUL 0 "register_operand" "=w")
 728      (mult:VMUL
 729        (vec_duplicate:VMUL
 730           (vec_select:<VEL>
 731             (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
 732             (parallel [(match_operand:SI 3 "immediate_operand")])))
 733       (match_operand:VMUL 1 "register_operand" "w")))]
 734   "TARGET_SIMD"
 735   {
 736     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
 737     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
 738   }
 739   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 740 )
 741
 742 (define_insn "mul_n<mode>3"
 743  [(set (match_operand:VMUL 0 "register_operand" "=w")
 744        (mult:VMUL
 745          (vec_duplicate:VMUL
 746            (match_operand:<VEL> 2 "register_operand" "<h_con>"))
 747          (match_operand:VMUL 1 "register_operand" "w")))]
 748   "TARGET_SIMD"
 749   "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
 750   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 751 )
 752
 753 (define_insn "@aarch64_rsqrte<mode>"
 754   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 755         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 756                      UNSPEC_RSQRTE))]
 757   "TARGET_SIMD"
 758   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 759   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 760
 761 (define_insn "@aarch64_rsqrts<mode>"
 762   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 763         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 764                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 765          UNSPEC_RSQRTS))]
 766   "TARGET_SIMD"
 767   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 768   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 769
 770 (define_expand "rsqrt<mode>2"
 771   [(set (match_operand:VALLF 0 "register_operand")
 772         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 773                      UNSPEC_RSQRT))]
 774   "TARGET_SIMD"
 775 {
 776   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 777   DONE;
 778 })
 779
 780 (define_insn "aarch64_ursqrte<mode>"
 781 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
 782       (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
 783                    UNSPEC_RSQRTE))]
 784 "TARGET_SIMD"
 785 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 786 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 787
 788 (define_insn "*aarch64_mul3_elt_to_64v2df"
 789   [(set (match_operand:DF 0 "register_operand" "=w")
 790      (mult:DF
 791        (vec_select:DF
 792          (match_operand:V2DF 1 "register_operand" "w")
 793          (parallel [(match_operand:SI 2 "immediate_operand")]))
 794        (match_operand:DF 3 "register_operand" "w")))]
 795   "TARGET_SIMD"
 796   {
 797     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 798     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 799   }
 800   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 801 )
 802
 803 (define_insn "neg<mode>2<vczle><vczbe>"
 804   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 805         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 806   "TARGET_SIMD"
 807   "neg\t%0.<Vtype>, %1.<Vtype>"
 808   [(set_attr "type" "neon_neg<q>")]
 809 )
 810
 811 (define_insn "abs<mode>2<vczle><vczbe>"
 812   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 813         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 814   "TARGET_SIMD"
 815   "abs\t%0.<Vtype>, %1.<Vtype>"
 816   [(set_attr "type" "neon_abs<q>")]
 817 )
 818
 819 ;; The intrinsic version of integer ABS must not be allowed to
 820 ;; combine with any operation with an integrated ABS step, such
 821 ;; as SABD.
 822 (define_insn "aarch64_abs<mode><vczle><vczbe>"
 823   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 824           (unspec:VSDQ_I_DI
 825             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 826            UNSPEC_ABS))]
 827   "TARGET_SIMD"
 828   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 829   [(set_attr "type" "neon_abs<q>")]
 830 )
 831
 832 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 833 ;; This isn't accurate as ABS treats always its input as a signed value.
 834 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 835 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 836 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 837 (define_insn "aarch64_<su>abd<mode><vczle><vczbe>"
 838   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 839         (minus:VDQ_BHSI
 840           (USMAX:VDQ_BHSI
 841             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 842             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 843           (<max_opp>:VDQ_BHSI
 844             (match_dup 1)
 845             (match_dup 2))))]
 846   "TARGET_SIMD"
 847   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 848   [(set_attr "type" "neon_abd<q>")]
 849 )
 850
 851 (define_expand "<su>abd<mode>3"
 852   [(match_operand:VDQ_BHSI 0 "register_operand")
 853    (USMAX:VDQ_BHSI
 854      (match_operand:VDQ_BHSI 1 "register_operand")
 855      (match_operand:VDQ_BHSI 2 "register_operand"))]
 856   "TARGET_SIMD"
 857   {
 858     emit_insn (gen_aarch64_<su>abd<mode> (operands[0], operands[1], operands[2]));
 859     DONE;
 860   }
 861 )
 862
 863 (define_insn "aarch64_<su>abdl<mode>"
 864   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 865         (zero_extend:<VWIDE>
 866           (minus:VD_BHSI
 867             (USMAX:VD_BHSI
 868               (match_operand:VD_BHSI 1 "register_operand" "w")
 869               (match_operand:VD_BHSI 2 "register_operand" "w"))
 870             (<max_opp>:VD_BHSI
 871               (match_dup 1)
 872               (match_dup 2)))))]
 873   "TARGET_SIMD"
 874   "<su>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 875   [(set_attr "type" "neon_abd<q>")]
 876 )
 877
 878 (define_insn "aarch64_<su>abdl2<mode>_insn"
 879   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 880         (zero_extend:<VDBLW>
 881           (minus:<VHALF>
 882             (USMAX:<VHALF>
 883               (vec_select:<VHALF>
 884                 (match_operand:VQW 1 "register_operand" "w")
 885                 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))
 886               (vec_select:<VHALF>
 887                 (match_operand:VQW 2 "register_operand" "w")
 888                 (match_dup 3)))
 889             (<max_opp>:<VHALF>
 890               (vec_select:<VHALF>
 891                 (match_dup 1)
 892                 (match_dup 3))
 893               (vec_select:<VHALF>
 894                 (match_dup 2)
 895                 (match_dup 3))))))]
 896
 897   "TARGET_SIMD"
 898   "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 899   [(set_attr "type" "neon_abd<q>")]
 900 )
 901
 902 (define_expand "aarch64_<su>abdl2<mode>"
 903   [(match_operand:<VDBLW> 0 "register_operand")
 904    (USMAX:VQW
 905      (match_operand:VQW 1 "register_operand")
 906      (match_operand:VQW 2 "register_operand"))]
 907   "TARGET_SIMD"
 908   {
 909     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 910     emit_insn (gen_aarch64_<su>abdl2<mode>_insn (operands[0], operands[1],
 911                                                  operands[2], hi));
 912     DONE;
 913   }
 914 )
 915
 916 (define_insn "aarch64_<su>abdl<mode>_hi_internal"
 917   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 918         (abs:<VWIDE>
 919           (minus:<VWIDE>
 920             (ANY_EXTEND:<VWIDE>
 921               (vec_select:<VHALF>
 922                 (match_operand:VQW 1 "register_operand" "w")
 923                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
 924             (ANY_EXTEND:<VWIDE>
 925               (vec_select:<VHALF>
 926                 (match_operand:VQW 2 "register_operand" "w")
 927                 (match_dup 3))))))]
 928   "TARGET_SIMD"
 929   "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 930   [(set_attr "type" "neon_abd_long")]
 931 )
 932
 933 (define_insn "aarch64_<su>abdl<mode>_lo_internal"
 934   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 935         (abs:<VWIDE>
 936           (minus:<VWIDE>
 937             (ANY_EXTEND:<VWIDE>
 938               (vec_select:<VHALF>
 939                 (match_operand:VQW 1 "register_operand" "w")
 940                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
 941             (ANY_EXTEND:<VWIDE>
 942               (vec_select:<VHALF>
 943                 (match_operand:VQW 2 "register_operand" "w")
 944                 (match_dup 3))))))]
 945   "TARGET_SIMD"
 946   "<su>abdl\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 947   [(set_attr "type" "neon_abd_long")]
 948 )
 949
 950 (define_expand "vec_widen_<su>abd_hi_<mode>"
 951   [(match_operand:<VWIDE> 0 "register_operand")
 952    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
 953    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
 954   "TARGET_SIMD"
 955   {
 956     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 957     emit_insn (gen_aarch64_<su>abdl<mode>_hi_internal (operands[0], operands[1],
 958                                                        operands[2], p));
 959     DONE;
 960   }
 961 )
 962
 963 (define_expand "vec_widen_<su>abd_lo_<mode>"
 964   [(match_operand:<VWIDE> 0 "register_operand")
 965    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
 966    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
 967   "TARGET_SIMD"
 968   {
 969     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 970     emit_insn (gen_aarch64_<su>abdl<mode>_lo_internal (operands[0], operands[1],
 971                                                        operands[2], p));
 972     DONE;
 973   }
 974 )
 975
 976 (define_insn "aarch64_<su>abal<mode>"
 977   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 978         (plus:<VWIDE>
 979           (zero_extend:<VWIDE>
 980             (minus:VD_BHSI
 981               (USMAX:VD_BHSI
 982                 (match_operand:VD_BHSI 2 "register_operand" "w")
 983                 (match_operand:VD_BHSI 3 "register_operand" "w"))
 984               (<max_opp>:VD_BHSI
 985                 (match_dup 2)
 986                 (match_dup 3))))
 987           (match_operand:<VWIDE> 1 "register_operand" "0")))]
 988   "TARGET_SIMD"
 989   "<su>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
 990   [(set_attr "type" "neon_arith_acc<q>")]
 991 )
 992
 993 (define_insn "aarch64_<su>abal2<mode>_insn"
 994   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 995         (plus:<VDBLW>
 996           (zero_extend:<VDBLW>
 997             (minus:<VHALF>
 998               (USMAX:<VHALF>
 999                 (vec_select:<VHALF>
1000                   (match_operand:VQW 2 "register_operand" "w")
1001                   (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
1002                 (vec_select:<VHALF>
1003                   (match_operand:VQW 3 "register_operand" "w")
1004                   (match_dup 4)))
1005               (<max_opp>:<VHALF>
1006                 (vec_select:<VHALF>
1007                   (match_dup 2)
1008                   (match_dup 4))
1009                 (vec_select:<VHALF>
1010                   (match_dup 3)
1011                   (match_dup 4)))))
1012           (match_operand:<VDBLW> 1 "register_operand" "0")))]
1013   "TARGET_SIMD"
1014   "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1015   [(set_attr "type" "neon_arith_acc<q>")]
1016 )
1017
1018 (define_expand "aarch64_<su>abal2<mode>"
1019   [(match_operand:<VDBLW> 0 "register_operand")
1020    (match_operand:<VDBLW> 1 "register_operand")
1021    (USMAX:VQW
1022      (match_operand:VQW 2 "register_operand")
1023      (match_operand:VQW 3 "register_operand"))]
1024   "TARGET_SIMD"
1025   {
1026     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1027     emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
1028                                                  operands[2], operands[3], hi));
1029     DONE;
1030   }
1031 )
1032
1033 (define_expand "aarch64_<su>adalp<mode>"
1034   [(set (match_operand:<VDBLW> 0 "register_operand")
1035         (plus:<VDBLW>
1036           (plus:<VDBLW>
1037             (vec_select:<VDBLW>
1038               (ANY_EXTEND:<V2XWIDE>
1039                 (match_operand:VDQV_L 2 "register_operand"))
1040               (match_dup 3))
1041             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1042               (match_dup 4)))
1043           (match_operand:<VDBLW> 1 "register_operand")))]
1044  "TARGET_SIMD"
1045  {
1046    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
1047    operands[3] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
1048    operands[4] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
1049  }
1050 )
1051
1052 (define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn"
1053   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1054         (plus:<VDBLW>
1055           (plus:<VDBLW>
1056             (vec_select:<VDBLW>
1057               (ANY_EXTEND:<V2XWIDE>
1058                 (match_operand:VDQV_L 2 "register_operand" "w"))
1059               (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half" ""))
1060             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1061               (match_operand:<V2XWIDE> 4 "vect_par_cnst_even_or_odd_half" "")))
1062         (match_operand:<VDBLW> 1 "register_operand" "0")))]
1063  "TARGET_SIMD
1064   && !rtx_equal_p (operands[3], operands[4])"
1065  "<su>adalp\t%0.<Vwhalf>, %2.<Vtype>"
1066   [(set_attr "type" "neon_reduc_add<q>")]
1067 )
1068
1069 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
1070 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
1071 ;; reduction of the difference into a V4SI vector and accumulate that into
1072 ;; operand 3 before copying that into the result operand 0.
1073 ;; Perform that with a sequence of:
1074 ;; UABDL2       tmp.8h, op1.16b, op2.16b
1075 ;; UABAL        tmp.8h, op1.8b, op2.8b
1076 ;; UADALP       op3.4s, tmp.8h
1077 ;; MOV          op0, op3 // should be eliminated in later passes.
1078 ;;
1079 ;; For TARGET_DOTPROD we do:
1080 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
1081 ;; UABD tmp2.16b, op1.16b, op2.16b
1082 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
1083 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
1084 ;;
1085 ;; The signed version just uses the signed variants of the above instructions
1086 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
1087 ;; unsigned.
1088
1089 (define_expand "<su>sadv16qi"
1090   [(use (match_operand:V4SI 0 "register_operand"))
1091    (USMAX:V16QI (match_operand:V16QI 1 "register_operand")
1092                 (match_operand:V16QI 2 "register_operand"))
1093    (use (match_operand:V4SI 3 "register_operand"))]
1094   "TARGET_SIMD"
1095   {
1096     if (TARGET_DOTPROD)
1097       {
1098         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
1099         rtx abd = gen_reg_rtx (V16QImode);
1100         emit_insn (gen_aarch64_<su>abdv16qi (abd, operands[1], operands[2]));
1101         emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
1102         DONE;
1103       }
1104     rtx reduc = gen_reg_rtx (V8HImode);
1105     emit_insn (gen_aarch64_<su>abdl2v16qi (reduc, operands[1],
1106                                             operands[2]));
1107     emit_insn (gen_aarch64_<su>abalv8qi (reduc, reduc,
1108                                          gen_lowpart (V8QImode, operands[1]),
1109                                          gen_lowpart (V8QImode,
1110                                                       operands[2])));
1111     emit_insn (gen_aarch64_<su>adalpv8hi (operands[3], operands[3], reduc));
1112     emit_move_insn (operands[0], operands[3]);
1113     DONE;
1114   }
1115 )
1116
1117 (define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
1118   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1119         (plus:VDQ_BHSI (minus:VDQ_BHSI
1120                          (USMAX:VDQ_BHSI
1121                            (match_operand:VDQ_BHSI 2 "register_operand" "w")
1122                            (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1123                          (<max_opp>:VDQ_BHSI
1124                            (match_dup 2)
1125                            (match_dup 3)))
1126                        (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1127   "TARGET_SIMD"
1128   "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1129   [(set_attr "type" "neon_arith_acc<q>")]
1130 )
1131
1132 (define_insn "fabd<mode>3<vczle><vczbe>"
1133   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
1134         (abs:VHSDF_HSDF
1135           (minus:VHSDF_HSDF
1136             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
1137             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
1138   "TARGET_SIMD"
1139   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
1140   [(set_attr "type" "neon_fp_abd_<stype><q>")]
1141 )
1142
1143 ;; For AND (vector, register) and BIC (vector, immediate)
1144 (define_insn "and<mode>3<vczle><vczbe>"
1145   [(set (match_operand:VDQ_I 0 "register_operand")
1146         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1147                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm")))]
1148   "TARGET_SIMD"
1149   {@ [ cons: =0 , 1 , 2   ]
1150      [ w        , w , w   ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1151      [ w        , 0 , Db  ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_BIC);
1152   }
1153   [(set_attr "type" "neon_logic<q>")]
1154 )
1155
1156 ;; For ORR (vector, register) and ORR (vector, immediate)
1157 (define_insn "ior<mode>3<vczle><vczbe>"
1158   [(set (match_operand:VDQ_I 0 "register_operand")
1159         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1160                    (match_operand:VDQ_I 2 "aarch64_orr_imm_sve_advsimd")))]
1161   "TARGET_SIMD"
1162   {@ [ cons: =0 , 1 , 2; attrs: arch ]
1163      [ w        , w , w  ; simd      ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1164      [ w        , 0 , vsl; sve       ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
1165      [ w        , 0 , Do ; simd      ] \
1166        << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \
1167                                              AARCH64_CHECK_ORR);
1168   }
1169   [(set_attr "type" "neon_logic<q>")]
1170 )
1171
1172 (define_insn "xor<mode>3<vczle><vczbe>"
1173   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1174         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1175                  (match_operand:VDQ_I 2 "register_operand" "w")))]
1176   "TARGET_SIMD"
1177   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
1178   [(set_attr "type" "neon_logic<q>")]
1179 )
1180
1181 (define_insn "one_cmpl<mode>2<vczle><vczbe>"
1182   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1183         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1184   "TARGET_SIMD"
1185   "not\t%0.<Vbtype>, %1.<Vbtype>"
1186   [(set_attr "type" "neon_logic<q>")]
1187 )
1188
1189 (define_insn "aarch64_simd_vec_set<mode>"
1190   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1191         (vec_merge:VALL_F16
1192             (vec_duplicate:VALL_F16
1193                 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
1194             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1195             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1196   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1197   {
1198    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1199    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1200    switch (which_alternative)
1201      {
1202      case 0:
1203         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1204      case 1:
1205         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1206      case 2:
1207         return "ld1\\t{%0.<Vetype>}[%p2], %1";
1208      default:
1209         gcc_unreachable ();
1210      }
1211   }
1212   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1213 )
1214
1215 (define_insn "aarch64_simd_vec_set_zero<mode>"
1216   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1217         (vec_merge:VALL_F16
1218             (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
1219             (match_operand:VALL_F16 3 "register_operand" "0")
1220             (match_operand:SI 2 "immediate_operand" "i")))]
1221   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1222   {
1223     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1224     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1225     return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
1226   }
1227 )
1228
1229 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1230   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1231         (vec_merge:VALL_F16
1232             (vec_duplicate:VALL_F16
1233               (vec_select:<VEL>
1234                 (match_operand:VALL_F16 3 "register_operand" "w")
1235                 (parallel
1236                   [(match_operand:SI 4 "immediate_operand" "i")])))
1237             (match_operand:VALL_F16 1 "register_operand" "0")
1238             (match_operand:SI 2 "immediate_operand" "i")))]
1239   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1240   {
1241     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1242     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1243     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1244
1245     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1246   }
1247   [(set_attr "type" "neon_ins<q>")]
1248 )
1249
1250 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1251   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1252         (vec_merge:VALL_F16_NO_V2Q
1253             (vec_duplicate:VALL_F16_NO_V2Q
1254               (vec_select:<VEL>
1255                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1256                 (parallel
1257                   [(match_operand:SI 4 "immediate_operand" "i")])))
1258             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1259             (match_operand:SI 2 "immediate_operand" "i")))]
1260   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1261   {
1262     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1263     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1264     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1265                                            INTVAL (operands[4]));
1266
1267     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1268   }
1269   [(set_attr "type" "neon_ins<q>")]
1270 )
1271
1272 (define_expand "signbit<mode>2"
1273   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1274    (use (match_operand:VDQSF 1 "register_operand"))]
1275   "TARGET_SIMD"
1276 {
1277   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1278   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1279                                                         shift_amount);
1280   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1281
1282   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1283                                                  shift_vector));
1284   DONE;
1285 })
1286
1287 (define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
1288  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1289        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1290                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1291  "TARGET_SIMD"
1292  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1293   [(set_attr "type" "neon_shift_imm<q>")]
1294 )
1295
1296 (define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
1297  [(set (match_operand:VDQ_I 0 "register_operand")
1298        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1299                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm")))]
1300  "TARGET_SIMD"
1301  {@ [ cons: =0 , 1 , 2  ; attrs: type        ]
1302     [ w        , w , D1 ; neon_compare<q>    ] cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1303     [ w        , w , Dr ; neon_shift_imm<q>  ] sshr\t%0.<Vtype>, %1.<Vtype>, %2
1304   }
1305 )
1306
1307 (define_insn "aarch64_<sra_op>sra_n<mode>_insn"
1308  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1309         (plus:VDQ_I
1310            (SHIFTRT:VDQ_I
1311                 (match_operand:VDQ_I 2 "register_operand" "w")
1312                 (match_operand:VDQ_I 3 "aarch64_simd_rshift_imm"))
1313            (match_operand:VDQ_I 1 "register_operand" "0")))]
1314   "TARGET_SIMD"
1315   "<sra_op>sra\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1316   [(set_attr "type" "neon_shift_acc<q>")]
1317 )
1318
1319 (define_insn "aarch64_<sra_op>rsra_n<mode>_insn"
1320  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
1321         (plus:VSDQ_I_DI
1322           (truncate:VSDQ_I_DI
1323             (SHIFTRT:<V2XWIDE>
1324               (plus:<V2XWIDE>
1325                 (<SHIFTEXTEND>:<V2XWIDE>
1326                   (match_operand:VSDQ_I_DI 2 "register_operand" "w"))
1327                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
1328               (match_operand:VSDQ_I_DI 3 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>")))
1329           (match_operand:VSDQ_I_DI 1 "register_operand" "0")))]
1330   "TARGET_SIMD
1331    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
1332   "<sra_op>rsra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1333   [(set_attr "type" "neon_shift_acc<q>")]
1334 )
1335
1336 (define_expand "aarch64_<sra_op>sra_n<mode>"
1337  [(set (match_operand:VDQ_I 0 "register_operand")
1338         (plus:VDQ_I
1339            (SHIFTRT:VDQ_I
1340                 (match_operand:VDQ_I 2 "register_operand")
1341                 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))
1342            (match_operand:VDQ_I 1 "register_operand")))]
1343   "TARGET_SIMD"
1344   {
1345     operands[3]
1346       = aarch64_simd_gen_const_vector_dup (<MODE>mode, UINTVAL (operands[3]));
1347   }
1348 )
1349
1350 (define_expand "aarch64_<sra_op>rsra_n<mode>"
1351   [(match_operand:VSDQ_I_DI 0 "register_operand")
1352    (match_operand:VSDQ_I_DI 1 "register_operand")
1353    (SHIFTRT:VSDQ_I_DI
1354      (match_operand:VSDQ_I_DI 2 "register_operand")
1355      (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
1356   "TARGET_SIMD"
1357   {
1358     /* Use this expander to create the rounding constant vector, which is
1359        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
1360        RTL is generated when handling the DImode expanders.  */
1361     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
1362     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
1363     rtx shft = gen_int_mode (INTVAL (operands[3]), DImode);
1364     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
1365     if (VECTOR_MODE_P (<MODE>mode))
1366       {
1367         shft = gen_const_vec_duplicate (<MODE>mode, shft);
1368         rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
1369       }
1370
1371     emit_insn (gen_aarch64_<sra_op>rsra_n<mode>_insn (operands[0], operands[1],
1372                                                       operands[2], shft, rnd));
1373     DONE;
1374   }
1375 )
1376
1377 (define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
1378  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1379        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1380                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1381  "TARGET_SIMD"
1382   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1383   [(set_attr "type" "neon_shift_imm<q>")]
1384 )
1385
1386 (define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
1387  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1388        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1389                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1390  "TARGET_SIMD"
1391  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1392   [(set_attr "type" "neon_shift_reg<q>")]
1393 )
1394
1395 (define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
1396  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1397        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1398                     (match_operand:VDQ_I 2 "register_operand" "w")]
1399                    UNSPEC_ASHIFT_UNSIGNED))]
1400  "TARGET_SIMD"
1401  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1402   [(set_attr "type" "neon_shift_reg<q>")]
1403 )
1404
1405 (define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
1406  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1407        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1408                     (match_operand:VDQ_I 2 "register_operand" "w")]
1409                    UNSPEC_ASHIFT_SIGNED))]
1410  "TARGET_SIMD"
1411  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1412   [(set_attr "type" "neon_shift_reg<q>")]
1413 )
1414
1415 (define_expand "ashl<mode>3"
1416   [(match_operand:VDQ_I 0 "register_operand")
1417    (match_operand:VDQ_I 1 "register_operand")
1418    (match_operand:SI  2 "general_operand")]
1419  "TARGET_SIMD"
1420 {
1421   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1422   int shift_amount;
1423
1424   if (CONST_INT_P (operands[2]))
1425     {
1426       shift_amount = INTVAL (operands[2]);
1427       if (shift_amount >= 0 && shift_amount < bit_width)
1428         {
1429           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1430                                                        shift_amount);
1431           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1432                                                      operands[1],
1433                                                      tmp));
1434           DONE;
1435         }
1436     }
1437
1438   operands[2] = force_reg (SImode, operands[2]);
1439
1440   rtx tmp = gen_reg_rtx (<MODE>mode);
1441   emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1442                                                                operands[2],
1443                                                                0)));
1444   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1445   DONE;
1446 })
1447
1448 (define_expand "lshr<mode>3"
1449   [(match_operand:VDQ_I 0 "register_operand")
1450    (match_operand:VDQ_I 1 "register_operand")
1451    (match_operand:SI  2 "general_operand")]
1452  "TARGET_SIMD"
1453 {
1454   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1455   int shift_amount;
1456
1457   if (CONST_INT_P (operands[2]))
1458     {
1459       shift_amount = INTVAL (operands[2]);
1460       if (shift_amount > 0 && shift_amount <= bit_width)
1461         {
1462           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1463                                                        shift_amount);
1464           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1465                                                   operands[1],
1466                                                   tmp));
1467           DONE;
1468         }
1469     }
1470
1471   operands[2] = force_reg (SImode, operands[2]);
1472
1473   rtx tmp = gen_reg_rtx (SImode);
1474   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1475   emit_insn (gen_negsi2 (tmp, operands[2]));
1476   emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1477                                          convert_to_mode (<VEL>mode, tmp, 0)));
1478   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1479                                                       tmp1));
1480   DONE;
1481 })
1482
1483 (define_expand "ashr<mode>3"
1484   [(match_operand:VDQ_I 0 "register_operand")
1485    (match_operand:VDQ_I 1 "register_operand")
1486    (match_operand:SI  2 "general_operand")]
1487  "TARGET_SIMD"
1488 {
1489   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1490   int shift_amount;
1491
1492   if (CONST_INT_P (operands[2]))
1493     {
1494       shift_amount = INTVAL (operands[2]);
1495       if (shift_amount > 0 && shift_amount <= bit_width)
1496         {
1497           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1498                                                        shift_amount);
1499           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1500                                                   operands[1],
1501                                                   tmp));
1502           DONE;
1503         }
1504     }
1505
1506   operands[2] = force_reg (SImode, operands[2]);
1507
1508   rtx tmp = gen_reg_rtx (SImode);
1509   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1510   emit_insn (gen_negsi2 (tmp, operands[2]));
1511   emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1512                                                                 tmp, 0)));
1513   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1514                                                     tmp1));
1515   DONE;
1516 })
1517
1518 (define_expand "vashl<mode>3"
1519  [(match_operand:VDQ_I 0 "register_operand")
1520   (match_operand:VDQ_I 1 "register_operand")
1521   (match_operand:VDQ_I 2 "register_operand")]
1522  "TARGET_SIMD"
1523 {
1524   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1525                                               operands[2]));
1526   DONE;
1527 })
1528
1529 (define_expand "vashr<mode>3"
1530  [(match_operand:VDQ_I 0 "register_operand")
1531   (match_operand:VDQ_I 1 "register_operand")
1532   (match_operand:VDQ_I 2 "register_operand")]
1533  "TARGET_SIMD"
1534 {
1535   rtx neg = gen_reg_rtx (<MODE>mode);
1536   emit (gen_neg<mode>2 (neg, operands[2]));
1537   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1538                                                     neg));
1539   DONE;
1540 })
1541
1542 ;; DI vector shift
1543 (define_expand "aarch64_ashr_simddi"
1544   [(match_operand:DI 0 "register_operand")
1545    (match_operand:DI 1 "register_operand")
1546    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1547   "TARGET_SIMD"
1548   {
1549     /* An arithmetic shift right by 64 fills the result with copies of the sign
1550        bit, just like asr by 63 - however the standard pattern does not handle
1551        a shift by 64.  */
1552     if (INTVAL (operands[2]) == 64)
1553       operands[2] = GEN_INT (63);
1554     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1555     DONE;
1556   }
1557 )
1558
1559 (define_expand "vlshr<mode>3"
1560  [(match_operand:VDQ_I 0 "register_operand")
1561   (match_operand:VDQ_I 1 "register_operand")
1562   (match_operand:VDQ_I 2 "register_operand")]
1563  "TARGET_SIMD"
1564 {
1565   rtx neg = gen_reg_rtx (<MODE>mode);
1566   emit (gen_neg<mode>2 (neg, operands[2]));
1567   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1568                                                       neg));
1569   DONE;
1570 })
1571
1572 (define_expand "aarch64_lshr_simddi"
1573   [(match_operand:DI 0 "register_operand")
1574    (match_operand:DI 1 "register_operand")
1575    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1576   "TARGET_SIMD"
1577   {
1578     if (INTVAL (operands[2]) == 64)
1579       emit_move_insn (operands[0], const0_rtx);
1580     else
1581       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1582     DONE;
1583   }
1584 )
1585
1586 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1587 (define_insn "vec_shr_<mode><vczle><vczbe>"
1588   [(set (match_operand:VD 0 "register_operand" "=w")
1589         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1590                     (match_operand:SI 2 "immediate_operand" "i")]
1591                    UNSPEC_VEC_SHR))]
1592   "TARGET_SIMD"
1593   {
1594     if (BYTES_BIG_ENDIAN)
1595       return "shl %d0, %d1, %2";
1596     else
1597       return "ushr %d0, %d1, %2";
1598   }
1599   [(set_attr "type" "neon_shift_imm")]
1600 )
1601
1602 (define_expand "vec_set<mode>"
1603   [(match_operand:VALL_F16 0 "register_operand")
1604    (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
1605    (match_operand:SI 2 "immediate_operand")]
1606   "TARGET_SIMD"
1607   {
1608     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1609     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1610                                           GEN_INT (elem), operands[0]));
1611     DONE;
1612   }
1613 )
1614
1615
1616 (define_insn "aarch64_mla<mode><vczle><vczbe>"
1617  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1618        (plus:VDQ_BHSI (mult:VDQ_BHSI
1619                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1620                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1621                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1622  "TARGET_SIMD"
1623  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1624   [(set_attr "type" "neon_mla_<Vetype><q>")]
1625 )
1626
1627 (define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
1628  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1629        (plus:VDQHS
1630          (mult:VDQHS
1631            (vec_duplicate:VDQHS
1632               (vec_select:<VEL>
1633                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1634                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1635            (match_operand:VDQHS 3 "register_operand" "w"))
1636          (match_operand:VDQHS 4 "register_operand" "0")))]
1637  "TARGET_SIMD"
1638   {
1639     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1640     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1641   }
1642   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1643 )
1644
1645 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
1646  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1647        (plus:VDQHS
1648          (mult:VDQHS
1649            (vec_duplicate:VDQHS
1650               (vec_select:<VEL>
1651                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1652                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1653            (match_operand:VDQHS 3 "register_operand" "w"))
1654          (match_operand:VDQHS 4 "register_operand" "0")))]
1655  "TARGET_SIMD"
1656   {
1657     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1658     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1659   }
1660   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1661 )
1662
1663 (define_insn "aarch64_mla_n<mode><vczle><vczbe>"
1664  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1665         (plus:VDQHS
1666           (mult:VDQHS
1667             (vec_duplicate:VDQHS
1668               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1669             (match_operand:VDQHS 2 "register_operand" "w"))
1670           (match_operand:VDQHS 1 "register_operand" "0")))]
1671  "TARGET_SIMD"
1672  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1673   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1674 )
1675
1676 (define_insn "aarch64_mls<mode><vczle><vczbe>"
1677  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1678        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1679                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1680                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1681  "TARGET_SIMD"
1682  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1683   [(set_attr "type" "neon_mla_<Vetype><q>")]
1684 )
1685
1686 (define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
1687  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1688        (minus:VDQHS
1689          (match_operand:VDQHS 4 "register_operand" "0")
1690          (mult:VDQHS
1691            (vec_duplicate:VDQHS
1692               (vec_select:<VEL>
1693                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1694                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1695            (match_operand:VDQHS 3 "register_operand" "w"))))]
1696  "TARGET_SIMD"
1697   {
1698     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1699     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1700   }
1701   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1702 )
1703
1704 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
1705  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1706        (minus:VDQHS
1707          (match_operand:VDQHS 4 "register_operand" "0")
1708          (mult:VDQHS
1709            (vec_duplicate:VDQHS
1710               (vec_select:<VEL>
1711                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1712                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1713            (match_operand:VDQHS 3 "register_operand" "w"))))]
1714  "TARGET_SIMD"
1715   {
1716     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1717     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1718   }
1719   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1720 )
1721
1722 (define_insn "aarch64_mls_n<mode><vczle><vczbe>"
1723   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1724         (minus:VDQHS
1725           (match_operand:VDQHS 1 "register_operand" "0")
1726           (mult:VDQHS
1727             (vec_duplicate:VDQHS
1728               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1729             (match_operand:VDQHS 2 "register_operand" "w"))))]
1730   "TARGET_SIMD"
1731   "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1732   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1733 )
1734
1735 ;; Max/Min operations.
1736 (define_insn "<su><maxmin><mode>3<vczle><vczbe>"
1737  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1738        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1739                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1740  "TARGET_SIMD"
1741  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1742   [(set_attr "type" "neon_minmax<q>")]
1743 )
1744
1745 (define_expand "<su><maxmin>v2di3"
1746  [(set (match_operand:V2DI 0 "register_operand")
1747        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1748                     (match_operand:V2DI 2 "register_operand")))]
1749  "TARGET_SIMD"
1750 {
1751   enum rtx_code cmp_operator;
1752   rtx cmp_fmt;
1753
1754   switch (<CODE>)
1755     {
1756     case UMIN:
1757       cmp_operator = LTU;
1758       break;
1759     case SMIN:
1760       cmp_operator = LT;
1761       break;
1762     case UMAX:
1763       cmp_operator = GTU;
1764       break;
1765     case SMAX:
1766       cmp_operator = GT;
1767       break;
1768     default:
1769       gcc_unreachable ();
1770     }
1771
1772   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1773   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1774               operands[2], cmp_fmt, operands[1], operands[2]));
1775   DONE;
1776 })
1777
1778 ;; Pairwise Integer Max/Min operations.
1779 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1780  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1781        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1782                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1783                         MAXMINV))]
1784  "TARGET_SIMD"
1785  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1786   [(set_attr "type" "neon_minmax<q>")]
1787 )
1788
1789 ;; Pairwise FP Max/Min operations.
1790 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1791  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1792        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1793                       (match_operand:VHSDF 2 "register_operand" "w")]
1794                       FMAXMINV))]
1795  "TARGET_SIMD"
1796  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1797   [(set_attr "type" "neon_minmax<q>")]
1798 )
1799
1800 ;; vec_concat gives a new vector with the low elements from operand 1, and
1801 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1802 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1803 ;; What that means, is that the RTL descriptions of the below patterns
1804 ;; need to change depending on endianness.
1805
1806 ;; Narrowing operations.
1807
1808 (define_insn "aarch64_xtn2<mode>_insn_le"
1809   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1810         (vec_concat:<VNARROWQ2>
1811           (match_operand:<VNARROWQ> 1 "register_operand" "0")
1812           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1813   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1814   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1815   [(set_attr "type" "neon_move_narrow_q")]
1816 )
1817
1818 (define_insn "aarch64_xtn2<mode>_insn_be"
1819   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1820         (vec_concat:<VNARROWQ2>
1821           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1822           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1823   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1824   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1825   [(set_attr "type" "neon_move_narrow_q")]
1826 )
1827
1828 (define_expand "aarch64_xtn2<mode>"
1829   [(match_operand:<VNARROWQ2> 0 "register_operand")
1830    (match_operand:<VNARROWQ> 1 "register_operand")
1831    (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1832   "TARGET_SIMD"
1833   {
1834     if (BYTES_BIG_ENDIAN)
1835       emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1836                                                  operands[2]));
1837     else
1838       emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1839                                                  operands[2]));
1840     DONE;
1841   }
1842 )
1843
1844 (define_insn "*aarch64_narrow_trunc<mode>"
1845   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1846         (vec_concat:<VNARROWQ2>
1847           (truncate:<VNARROWQ>
1848             (match_operand:VQN 1 "register_operand" "w"))
1849           (truncate:<VNARROWQ>
1850             (match_operand:VQN 2 "register_operand" "w"))))]
1851   "TARGET_SIMD"
1852 {
1853   if (!BYTES_BIG_ENDIAN)
1854     return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1855   else
1856     return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1857 }
1858   [(set_attr "type" "neon_permute<q>")]
1859 )
1860
1861 ;; Packing doubles.
1862
1863 (define_expand "vec_pack_trunc_<mode>"
1864  [(match_operand:<VNARROWD> 0 "register_operand")
1865   (match_operand:VDN 1 "general_operand")
1866   (match_operand:VDN 2 "general_operand")]
1867  "TARGET_SIMD"
1868 {
1869   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1870   emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2]));
1871   emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1872   DONE;
1873 })
1874
1875 ;; Packing quads.
1876
1877 (define_expand "vec_pack_trunc_<mode>"
1878  [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1879        (vec_concat:<VNARROWQ2>
1880          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1881          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1882  "TARGET_SIMD"
1883  {
1884    rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1885    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1886    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1887
1888    emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1889
1890    if (BYTES_BIG_ENDIAN)
1891      emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1892                                                 operands[hi]));
1893    else
1894      emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1895                                                 operands[hi]));
1896    DONE;
1897  }
1898 )
1899
1900 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1901   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1902         (vec_concat:<VNARROWQ2>
1903           (truncate:<VNARROWQ>
1904             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1905               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1906           (truncate:<VNARROWQ>
1907             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1908               (match_dup 2)))))]
1909   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1910   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1911   [(set_attr "type" "neon_permute<q>")]
1912 )
1913
1914 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1915   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1916         (vec_concat:<VNARROWQ2>
1917           (truncate:<VNARROWQ>
1918             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1919               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1920           (truncate:<VNARROWQ>
1921             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1922               (match_dup 2)))))]
1923   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1924   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1925   [(set_attr "type" "neon_permute<q>")]
1926 )
1927
1928 ;; Widening operations.
1929
1930 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1931   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1932         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1933                                (match_operand:VQW 1 "register_operand" "w")
1934                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1935                             )))]
1936   "TARGET_SIMD"
1937   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1938   [(set_attr "type" "neon_shift_imm_long")]
1939 )
1940
1941 (define_insn_and_split "aarch64_simd_vec_unpack<su>_hi_<mode>"
1942   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1943         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1944                                (match_operand:VQW 1 "register_operand" "w")
1945                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1946                             )))]
1947   "TARGET_SIMD"
1948   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1949   "&& <CODE> == ZERO_EXTEND
1950    && aarch64_split_simd_shift_p (insn)"
1951   [(const_int 0)]
1952   {
1953     /* On many cores, it is cheaper to implement UXTL2 using a ZIP2 with zero,
1954        provided that the cost of the zero can be amortized over several
1955        operations.  We'll later recombine the zero and zip if there are
1956        not sufficient uses of the zero to make the split worthwhile.  */
1957     rtx res = simplify_gen_subreg (<MODE>mode, operands[0], <VWIDE>mode, 0);
1958     rtx zero = aarch64_gen_shareable_zero (<MODE>mode);
1959     emit_insn (gen_aarch64_zip2<mode> (res, operands[1], zero));
1960     DONE;
1961   }
1962   [(set_attr "type" "neon_shift_imm_long")]
1963 )
1964
1965 (define_expand "vec_unpack<su>_hi_<mode>"
1966   [(match_operand:<VWIDE> 0 "register_operand")
1967    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1968   "TARGET_SIMD"
1969   {
1970     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1971     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1972                                                           operands[1], p));
1973     DONE;
1974   }
1975 )
1976
1977 (define_expand "vec_unpack<su>_lo_<mode>"
1978   [(match_operand:<VWIDE> 0 "register_operand")
1979    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1980   "TARGET_SIMD"
1981   {
1982     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1983     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1984                                                           operands[1], p));
1985     DONE;
1986   }
1987 )
1988
1989 ;; Widening arithmetic.
1990
1991 (define_insn "*aarch64_<su>mlal_lo<mode>"
1992   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1993         (plus:<VWIDE>
1994           (mult:<VWIDE>
1995               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1996                  (match_operand:VQW 2 "register_operand" "w")
1997                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1998               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1999                  (match_operand:VQW 4 "register_operand" "w")
2000                  (match_dup 3))))
2001           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2002   "TARGET_SIMD"
2003   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2004   [(set_attr "type" "neon_mla_<Vetype>_long")]
2005 )
2006
2007 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2008   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2009         (plus:<VWIDE>
2010           (mult:<VWIDE>
2011               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2012                  (match_operand:VQW 2 "register_operand" "w")
2013                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2014               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2015                  (match_operand:VQW 4 "register_operand" "w")
2016                  (match_dup 3))))
2017           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2018   "TARGET_SIMD"
2019   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2020   [(set_attr "type" "neon_mla_<Vetype>_long")]
2021 )
2022
2023 (define_expand "aarch64_<su>mlal_hi<mode>"
2024   [(match_operand:<VWIDE> 0 "register_operand")
2025    (match_operand:<VWIDE> 1 "register_operand")
2026    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2027    (match_operand:VQW 3 "register_operand")]
2028   "TARGET_SIMD"
2029 {
2030   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2031   emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2032                                                  operands[2], p, operands[3]));
2033   DONE;
2034 }
2035 )
2036
2037 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2038   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2039         (plus:<VWIDE>
2040           (mult:<VWIDE>
2041             (ANY_EXTEND:<VWIDE>
2042               (vec_select:<VHALF>
2043                 (match_operand:VQ_HSI 2 "register_operand" "w")
2044                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2045             (vec_duplicate:<VWIDE>
2046               (ANY_EXTEND:<VWIDE_S>
2047                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2048           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2049   "TARGET_SIMD"
2050   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2051   [(set_attr "type" "neon_mla_<Vetype>_long")]
2052 )
2053
2054 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2055   [(match_operand:<VWIDE> 0 "register_operand")
2056    (match_operand:<VWIDE> 1 "register_operand")
2057    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2058    (match_operand:<VEL> 3 "register_operand")]
2059   "TARGET_SIMD"
2060 {
2061   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2062   emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2063              operands[1], operands[2], p, operands[3]));
2064   DONE;
2065 }
2066 )
2067
2068 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2069   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2070         (minus:<VWIDE>
2071           (match_operand:<VWIDE> 1 "register_operand" "0")
2072           (mult:<VWIDE>
2073               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2074                  (match_operand:VQW 2 "register_operand" "w")
2075                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2076               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2077                  (match_operand:VQW 4 "register_operand" "w")
2078                  (match_dup 3))))))]
2079   "TARGET_SIMD"
2080   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2081   [(set_attr "type" "neon_mla_<Vetype>_long")]
2082 )
2083
2084 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2085   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2086         (minus:<VWIDE>
2087           (match_operand:<VWIDE> 1 "register_operand" "0")
2088           (mult:<VWIDE>
2089               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2090                  (match_operand:VQW 2 "register_operand" "w")
2091                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2092               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2093                  (match_operand:VQW 4 "register_operand" "w")
2094                  (match_dup 3))))))]
2095   "TARGET_SIMD"
2096   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2097   [(set_attr "type" "neon_mla_<Vetype>_long")]
2098 )
2099
2100 (define_expand "aarch64_<su>mlsl_hi<mode>"
2101   [(match_operand:<VWIDE> 0 "register_operand")
2102    (match_operand:<VWIDE> 1 "register_operand")
2103    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2104    (match_operand:VQW 3 "register_operand")]
2105   "TARGET_SIMD"
2106 {
2107   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108   emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2109                                                  operands[2], p, operands[3]));
2110   DONE;
2111 }
2112 )
2113
2114 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2115   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2116         (minus:<VWIDE>
2117           (match_operand:<VWIDE> 1 "register_operand" "0")
2118           (mult:<VWIDE>
2119             (ANY_EXTEND:<VWIDE>
2120               (vec_select:<VHALF>
2121                 (match_operand:VQ_HSI 2 "register_operand" "w")
2122                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2123             (vec_duplicate:<VWIDE>
2124               (ANY_EXTEND:<VWIDE_S>
2125                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2126   "TARGET_SIMD"
2127   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2128   [(set_attr "type" "neon_mla_<Vetype>_long")]
2129 )
2130
2131 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2132   [(match_operand:<VWIDE> 0 "register_operand")
2133    (match_operand:<VWIDE> 1 "register_operand")
2134    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2135    (match_operand:<VEL> 3 "register_operand")]
2136   "TARGET_SIMD"
2137 {
2138   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2139   emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2140              operands[1], operands[2], p, operands[3]));
2141   DONE;
2142 }
2143 )
2144
2145 (define_insn "aarch64_<su>mlal<mode>"
2146   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2147         (plus:<VWIDE>
2148           (mult:<VWIDE>
2149             (ANY_EXTEND:<VWIDE>
2150               (match_operand:VD_BHSI 2 "register_operand" "w"))
2151             (ANY_EXTEND:<VWIDE>
2152               (match_operand:VD_BHSI 3 "register_operand" "w")))
2153           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2154   "TARGET_SIMD"
2155   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2156   [(set_attr "type" "neon_mla_<Vetype>_long")]
2157 )
2158
2159 (define_insn "aarch64_<su>mlal_n<mode>"
2160   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2161         (plus:<VWIDE>
2162           (mult:<VWIDE>
2163             (ANY_EXTEND:<VWIDE>
2164               (match_operand:VD_HSI 2 "register_operand" "w"))
2165             (vec_duplicate:<VWIDE>
2166               (ANY_EXTEND:<VWIDE_S>
2167                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2168           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2169   "TARGET_SIMD"
2170   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2171   [(set_attr "type" "neon_mla_<Vetype>_long")]
2172 )
2173
2174 (define_insn "aarch64_<su>mlsl<mode>"
2175   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2176         (minus:<VWIDE>
2177           (match_operand:<VWIDE> 1 "register_operand" "0")
2178           (mult:<VWIDE>
2179             (ANY_EXTEND:<VWIDE>
2180               (match_operand:VD_BHSI 2 "register_operand" "w"))
2181             (ANY_EXTEND:<VWIDE>
2182               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2183   "TARGET_SIMD"
2184   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2185   [(set_attr "type" "neon_mla_<Vetype>_long")]
2186 )
2187
2188 (define_insn "aarch64_<su>mlsl_n<mode>"
2189   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2190         (minus:<VWIDE>
2191           (match_operand:<VWIDE> 1 "register_operand" "0")
2192           (mult:<VWIDE>
2193             (ANY_EXTEND:<VWIDE>
2194               (match_operand:VD_HSI 2 "register_operand" "w"))
2195             (vec_duplicate:<VWIDE>
2196               (ANY_EXTEND:<VWIDE_S>
2197                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2198   "TARGET_SIMD"
2199   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2200   [(set_attr "type" "neon_mla_<Vetype>_long")]
2201 )
2202
2203 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2204  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2205        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2206                            (match_operand:VQW 1 "register_operand" "w")
2207                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2208                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2209                            (match_operand:VQW 2 "register_operand" "w")
2210                            (match_dup 3)))))]
2211   "TARGET_SIMD"
2212   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2213   [(set_attr "type" "neon_mul_<Vetype>_long")]
2214 )
2215
2216 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2217   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2218         (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2219                          (match_operand:VD_BHSI 1 "register_operand" "w"))
2220                       (ANY_EXTEND:<VWIDE>
2221                          (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2222   "TARGET_SIMD"
2223   "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2224   [(set_attr "type" "neon_mul_<Vetype>_long")]
2225 )
2226
2227 (define_expand "vec_widen_<su>mult_lo_<mode>"
2228   [(match_operand:<VWIDE> 0 "register_operand")
2229    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2230    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2231  "TARGET_SIMD"
2232  {
2233    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2234    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2235                                                        operands[1],
2236                                                        operands[2], p));
2237    DONE;
2238  }
2239 )
2240
2241 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2242  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2243       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2244                             (match_operand:VQW 1 "register_operand" "w")
2245                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2246                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2247                             (match_operand:VQW 2 "register_operand" "w")
2248                             (match_dup 3)))))]
2249   "TARGET_SIMD"
2250   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2251   [(set_attr "type" "neon_mul_<Vetype>_long")]
2252 )
2253
2254 (define_expand "vec_widen_<su>mult_hi_<mode>"
2255   [(match_operand:<VWIDE> 0 "register_operand")
2256    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2257    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2258  "TARGET_SIMD"
2259  {
2260    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2261    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2262                                                        operands[1],
2263                                                        operands[2], p));
2264    DONE;
2265
2266  }
2267 )
2268
2269 ;; vmull_lane_s16 intrinsics
2270 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2271   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2272         (mult:<VWIDE>
2273           (ANY_EXTEND:<VWIDE>
2274             (match_operand:<VCOND> 1 "register_operand" "w"))
2275           (vec_duplicate:<VWIDE>
2276             (ANY_EXTEND:<VWIDE_S>
2277               (vec_select:<VEL>
2278                 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2279                 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2280   "TARGET_SIMD"
2281   {
2282     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2283     return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2284   }
2285   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2286 )
2287
2288 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2289   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2290         (mult:<VWIDE>
2291           (ANY_EXTEND:<VWIDE>
2292             (vec_select:<VHALF>
2293               (match_operand:VQ_HSI 1 "register_operand" "w")
2294               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2295           (vec_duplicate:<VWIDE>
2296             (ANY_EXTEND:<VWIDE_S>
2297               (vec_select:<VEL>
2298                 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2299                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2300   "TARGET_SIMD"
2301   {
2302     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2303     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2304   }
2305   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2306 )
2307
2308 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2309   [(match_operand:<VWIDE> 0 "register_operand")
2310    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2311    (match_operand:<VCOND> 2 "register_operand")
2312    (match_operand:SI 3 "immediate_operand")]
2313   "TARGET_SIMD"
2314 {
2315   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2316   emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2317              operands[1], p, operands[2], operands[3]));
2318   DONE;
2319 }
2320 )
2321
2322 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2323   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2324         (mult:<VWIDE>
2325           (ANY_EXTEND:<VWIDE>
2326             (vec_select:<VHALF>
2327               (match_operand:VQ_HSI 1 "register_operand" "w")
2328               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2329           (vec_duplicate:<VWIDE>
2330             (ANY_EXTEND:<VWIDE_S>
2331               (vec_select:<VEL>
2332                 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2333                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2334   "TARGET_SIMD"
2335   {
2336     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2337     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2338   }
2339   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2340 )
2341
2342 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2343   [(match_operand:<VWIDE> 0 "register_operand")
2344    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2345    (match_operand:<VCONQ> 2 "register_operand")
2346    (match_operand:SI 3 "immediate_operand")]
2347   "TARGET_SIMD"
2348 {
2349   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2350   emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2351              operands[1], p, operands[2], operands[3]));
2352   DONE;
2353 }
2354 )
2355
2356 (define_insn "aarch64_<su>mull_n<mode>"
2357   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2358         (mult:<VWIDE>
2359           (ANY_EXTEND:<VWIDE>
2360             (match_operand:VD_HSI 1 "register_operand" "w"))
2361           (vec_duplicate:<VWIDE>
2362             (ANY_EXTEND:<VWIDE_S>
2363               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2364   "TARGET_SIMD"
2365   "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2366   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2367 )
2368
2369 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2370   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2371         (mult:<VWIDE>
2372           (ANY_EXTEND:<VWIDE>
2373             (vec_select:<VHALF>
2374               (match_operand:VQ_HSI 1 "register_operand" "w")
2375               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2376           (vec_duplicate:<VWIDE>
2377             (ANY_EXTEND:<VWIDE_S>
2378               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2379   "TARGET_SIMD"
2380   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2381   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2382 )
2383
2384 (define_expand "aarch64_<su>mull_hi_n<mode>"
2385   [(match_operand:<VWIDE> 0 "register_operand")
2386    (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2387    (match_operand:<VEL> 2 "register_operand")]
2388  "TARGET_SIMD"
2389  {
2390    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2391    emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2392                                                     operands[2], p));
2393    DONE;
2394  }
2395 )
2396
2397 ;; vmlal_lane_s16 intrinsics
2398 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2399   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2400         (plus:<VWIDE>
2401           (mult:<VWIDE>
2402             (ANY_EXTEND:<VWIDE>
2403               (match_operand:<VCOND> 2 "register_operand" "w"))
2404             (vec_duplicate:<VWIDE>
2405               (ANY_EXTEND:<VWIDE_S>
2406                 (vec_select:<VEL>
2407                   (match_operand:VDQHS 3 "register_operand" "<vwx>")
2408                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2409           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2410   "TARGET_SIMD"
2411   {
2412     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2413     return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2414   }
2415   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2416 )
2417
2418 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2419   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2420         (plus:<VWIDE>
2421           (mult:<VWIDE>
2422             (ANY_EXTEND:<VWIDE>
2423               (vec_select:<VHALF>
2424                 (match_operand:VQ_HSI 2 "register_operand" "w")
2425                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2426             (vec_duplicate:<VWIDE>
2427               (ANY_EXTEND:<VWIDE_S>
2428                 (vec_select:<VEL>
2429                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2430                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2431           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2432   "TARGET_SIMD"
2433   {
2434     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2435     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2436   }
2437   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2438 )
2439
2440 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2441   [(match_operand:<VWIDE> 0 "register_operand")
2442    (match_operand:<VWIDE> 1 "register_operand")
2443    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2444    (match_operand:<VCOND> 3 "register_operand")
2445    (match_operand:SI 4 "immediate_operand")]
2446   "TARGET_SIMD"
2447 {
2448   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2449   emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2450              operands[1], operands[2], p, operands[3], operands[4]));
2451   DONE;
2452 }
2453 )
2454
2455 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2456   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2457         (plus:<VWIDE>
2458           (mult:<VWIDE>
2459             (ANY_EXTEND:<VWIDE>
2460               (vec_select:<VHALF>
2461                 (match_operand:VQ_HSI 2 "register_operand" "w")
2462                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2463             (vec_duplicate:<VWIDE>
2464               (ANY_EXTEND:<VWIDE_S>
2465                 (vec_select:<VEL>
2466                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2467                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2468           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2469   "TARGET_SIMD"
2470   {
2471     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2472     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2473   }
2474   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2475 )
2476
2477 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2478   [(match_operand:<VWIDE> 0 "register_operand")
2479    (match_operand:<VWIDE> 1 "register_operand")
2480    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2481    (match_operand:<VCONQ> 3 "register_operand")
2482    (match_operand:SI 4 "immediate_operand")]
2483   "TARGET_SIMD"
2484 {
2485   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2486   emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2487              operands[1], operands[2], p, operands[3], operands[4]));
2488   DONE;
2489 }
2490 )
2491
2492 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2493   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2494    (minus:<VWIDE>
2495      (match_operand:<VWIDE> 1 "register_operand" "0")
2496      (mult:<VWIDE>
2497        (ANY_EXTEND:<VWIDE>
2498          (match_operand:<VCOND> 2 "register_operand" "w"))
2499        (vec_duplicate:<VWIDE>
2500          (ANY_EXTEND:<VWIDE_S>
2501            (vec_select:<VEL>
2502              (match_operand:VDQHS 3 "register_operand" "<vwx>")
2503              (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2504   "TARGET_SIMD"
2505   {
2506     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2507     return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2508   }
2509   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2510 )
2511
2512 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2513   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2514         (minus:<VWIDE>
2515           (match_operand:<VWIDE> 1 "register_operand" "0")
2516           (mult:<VWIDE>
2517             (ANY_EXTEND:<VWIDE>
2518               (vec_select:<VHALF>
2519                 (match_operand:VQ_HSI 2 "register_operand" "w")
2520                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2521             (vec_duplicate:<VWIDE>
2522               (ANY_EXTEND:<VWIDE_S>
2523                 (vec_select:<VEL>
2524                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2525                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2526           )))]
2527   "TARGET_SIMD"
2528   {
2529     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2530     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2531   }
2532   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2533 )
2534
2535 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2536   [(match_operand:<VWIDE> 0 "register_operand")
2537    (match_operand:<VWIDE> 1 "register_operand")
2538    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2539    (match_operand:<VCOND> 3 "register_operand")
2540    (match_operand:SI 4 "immediate_operand")]
2541   "TARGET_SIMD"
2542 {
2543   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2544   emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2545              operands[1], operands[2], p, operands[3], operands[4]));
2546   DONE;
2547 }
2548 )
2549
2550 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2551   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2552         (minus:<VWIDE>
2553           (match_operand:<VWIDE> 1 "register_operand" "0")
2554           (mult:<VWIDE>
2555             (ANY_EXTEND:<VWIDE>
2556               (vec_select:<VHALF>
2557                 (match_operand:VQ_HSI 2 "register_operand" "w")
2558                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2559             (vec_duplicate:<VWIDE>
2560               (ANY_EXTEND:<VWIDE_S>
2561                 (vec_select:<VEL>
2562                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2563                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2564           )))]
2565   "TARGET_SIMD"
2566   {
2567     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2568     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2569   }
2570   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2571 )
2572
2573 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2574   [(match_operand:<VWIDE> 0 "register_operand")
2575    (match_operand:<VWIDE> 1 "register_operand")
2576    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2577    (match_operand:<VCONQ> 3 "register_operand")
2578    (match_operand:SI 4 "immediate_operand")]
2579   "TARGET_SIMD"
2580 {
2581   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2582   emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2583              operands[1], operands[2], p, operands[3], operands[4]));
2584   DONE;
2585 }
2586 )
2587
2588 ;; FP vector operations.
2589 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
2590 ;; double-precision (64-bit) floating-point data types and arithmetic as
2591 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
2592 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2593 ;;
2594 ;; Floating-point operations can raise an exception.  Vectorizing such
2595 ;; operations are safe because of reasons explained below.
2596 ;;
2597 ;; ARMv8 permits an extension to enable trapped floating-point
2598 ;; exception handling, however this is an optional feature.  In the
2599 ;; event of a floating-point exception being raised by vectorised
2600 ;; code then:
2601 ;; 1.  If trapped floating-point exceptions are available, then a trap
2602 ;;     will be taken when any lane raises an enabled exception.  A trap
2603 ;;     handler may determine which lane raised the exception.
2604 ;; 2.  Alternatively a sticky exception flag is set in the
2605 ;;     floating-point status register (FPSR).  Software may explicitly
2606 ;;     test the exception flags, in which case the tests will either
2607 ;;     prevent vectorisation, allowing precise identification of the
2608 ;;     failing operation, or if tested outside of vectorisable regions
2609 ;;     then the specific operation and lane are not of interest.
2610
2611 ;; FP arithmetic operations.
2612
2613 (define_insn "add<mode>3<vczle><vczbe>"
2614  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2615        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2616                    (match_operand:VHSDF 2 "register_operand" "w")))]
2617  "TARGET_SIMD"
2618  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2619   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2620 )
2621
2622 (define_insn "sub<mode>3<vczle><vczbe>"
2623  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2624        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2625                     (match_operand:VHSDF 2 "register_operand" "w")))]
2626  "TARGET_SIMD"
2627  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2628   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2629 )
2630
2631 (define_insn "mul<mode>3<vczle><vczbe>"
2632  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2633        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2634                    (match_operand:VHSDF 2 "register_operand" "w")))]
2635  "TARGET_SIMD"
2636  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2637   [(set_attr "type" "neon_fp_mul_<stype><q>")]
2638 )
2639
2640 (define_expand "div<mode>3"
2641  [(set (match_operand:VHSDF 0 "register_operand")
2642        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2643                   (match_operand:VHSDF 2 "register_operand")))]
2644  "TARGET_SIMD"
2645 {
2646   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2647     DONE;
2648
2649   operands[1] = force_reg (<MODE>mode, operands[1]);
2650 })
2651
2652 (define_insn "*div<mode>3<vczle><vczbe>"
2653  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2654        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2655                  (match_operand:VHSDF 2 "register_operand" "w")))]
2656  "TARGET_SIMD"
2657  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2658   [(set_attr "type" "neon_fp_div_<stype><q>")]
2659 )
2660
2661 (define_insn "neg<mode>2<vczle><vczbe>"
2662  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2663        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2664  "TARGET_SIMD"
2665  "fneg\\t%0.<Vtype>, %1.<Vtype>"
2666   [(set_attr "type" "neon_fp_neg_<stype><q>")]
2667 )
2668
2669 (define_insn "abs<mode>2<vczle><vczbe>"
2670  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2671        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2672  "TARGET_SIMD"
2673  "fabs\\t%0.<Vtype>, %1.<Vtype>"
2674   [(set_attr "type" "neon_fp_abs_<stype><q>")]
2675 )
2676
2677 (define_expand "aarch64_float_mla<mode>"
2678   [(set (match_operand:VDQF_DF 0 "register_operand")
2679         (plus:VDQF_DF
2680           (mult:VDQF_DF
2681             (match_operand:VDQF_DF 2 "register_operand")
2682             (match_operand:VDQF_DF 3 "register_operand"))
2683           (match_operand:VDQF_DF 1 "register_operand")))]
2684   "TARGET_SIMD"
2685   {
2686     rtx scratch = gen_reg_rtx (<MODE>mode);
2687     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2688     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2689     DONE;
2690   }
2691 )
2692
2693 (define_expand "aarch64_float_mls<mode>"
2694   [(set (match_operand:VDQF_DF 0 "register_operand")
2695         (minus:VDQF_DF
2696           (match_operand:VDQF_DF 1 "register_operand")
2697           (mult:VDQF_DF
2698             (match_operand:VDQF_DF 2 "register_operand")
2699             (match_operand:VDQF_DF 3 "register_operand"))))]
2700   "TARGET_SIMD"
2701   {
2702     rtx scratch = gen_reg_rtx (<MODE>mode);
2703     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2704     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2705     DONE;
2706   }
2707 )
2708
2709 (define_expand "aarch64_float_mla_n<mode>"
2710   [(set (match_operand:VDQSF 0 "register_operand")
2711         (plus:VDQSF
2712           (mult:VDQSF
2713             (vec_duplicate:VDQSF
2714               (match_operand:<VEL> 3 "register_operand"))
2715             (match_operand:VDQSF 2 "register_operand"))
2716           (match_operand:VDQSF 1 "register_operand")))]
2717   "TARGET_SIMD"
2718   {
2719     rtx scratch = gen_reg_rtx (<MODE>mode);
2720     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2721     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2722     DONE;
2723   }
2724 )
2725
2726 (define_expand "aarch64_float_mls_n<mode>"
2727   [(set (match_operand:VDQSF 0 "register_operand")
2728         (minus:VDQSF
2729           (match_operand:VDQSF 1 "register_operand")
2730           (mult:VDQSF
2731             (vec_duplicate:VDQSF
2732               (match_operand:<VEL> 3 "register_operand"))
2733             (match_operand:VDQSF 2 "register_operand"))))]
2734   "TARGET_SIMD"
2735   {
2736     rtx scratch = gen_reg_rtx (<MODE>mode);
2737     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2738     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2739     DONE;
2740   }
2741 )
2742
2743 (define_expand "aarch64_float_mla_lane<mode>"
2744   [(set (match_operand:VDQSF 0 "register_operand")
2745         (plus:VDQSF
2746           (mult:VDQSF
2747             (vec_duplicate:VDQSF
2748               (vec_select:<VEL>
2749                 (match_operand:V2SF 3 "register_operand")
2750                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2751             (match_operand:VDQSF 2 "register_operand"))
2752           (match_operand:VDQSF 1 "register_operand")))]
2753   "TARGET_SIMD"
2754   {
2755     rtx scratch = gen_reg_rtx (<MODE>mode);
2756     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2757                                     operands[3], operands[4]));
2758     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2759     DONE;
2760   }
2761 )
2762
2763 (define_expand "aarch64_float_mls_lane<mode>"
2764   [(set (match_operand:VDQSF 0 "register_operand")
2765         (minus:VDQSF
2766           (match_operand:VDQSF 1 "register_operand")
2767           (mult:VDQSF
2768             (vec_duplicate:VDQSF
2769               (vec_select:<VEL>
2770                 (match_operand:V2SF 3 "register_operand")
2771                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2772             (match_operand:VDQSF 2 "register_operand"))))]
2773   "TARGET_SIMD"
2774   {
2775     rtx scratch = gen_reg_rtx (<MODE>mode);
2776     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2777                                     operands[3], operands[4]));
2778     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2779     DONE;
2780   }
2781 )
2782
2783 (define_expand "aarch64_float_mla_laneq<mode>"
2784   [(set (match_operand:VDQSF 0 "register_operand")
2785         (plus:VDQSF
2786           (mult:VDQSF
2787             (vec_duplicate:VDQSF
2788               (vec_select:<VEL>
2789                 (match_operand:V4SF 3 "register_operand")
2790                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2791             (match_operand:VDQSF 2 "register_operand"))
2792           (match_operand:VDQSF 1 "register_operand")))]
2793   "TARGET_SIMD"
2794   {
2795     rtx scratch = gen_reg_rtx (<MODE>mode);
2796     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2797                                      operands[3], operands[4]));
2798     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2799     DONE;
2800   }
2801 )
2802
2803 (define_expand "aarch64_float_mls_laneq<mode>"
2804   [(set (match_operand:VDQSF 0 "register_operand")
2805         (minus:VDQSF
2806           (match_operand:VDQSF 1 "register_operand")
2807           (mult:VDQSF
2808             (vec_duplicate:VDQSF
2809               (vec_select:<VEL>
2810                 (match_operand:V4SF 3 "register_operand")
2811                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2812             (match_operand:VDQSF 2 "register_operand"))))]
2813   "TARGET_SIMD"
2814   {
2815     rtx scratch = gen_reg_rtx (<MODE>mode);
2816     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2817                                      operands[3], operands[4]));
2818     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2819     DONE;
2820   }
2821 )
2822
2823 (define_insn "fma<mode>4<vczle><vczbe>"
2824   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2825        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2826                   (match_operand:VHSDF 2 "register_operand" "w")
2827                   (match_operand:VHSDF 3 "register_operand" "0")))]
2828   "TARGET_SIMD"
2829  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2830   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2831 )
2832
2833 (define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
2834   [(set (match_operand:VDQF 0 "register_operand" "=w")
2835     (fma:VDQF
2836       (vec_duplicate:VDQF
2837         (vec_select:<VEL>
2838           (match_operand:VDQF 1 "register_operand" "<h_con>")
2839           (parallel [(match_operand:SI 2 "immediate_operand")])))
2840       (match_operand:VDQF 3 "register_operand" "w")
2841       (match_operand:VDQF 4 "register_operand" "0")))]
2842   "TARGET_SIMD"
2843   {
2844     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2845     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2846   }
2847   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2848 )
2849
2850 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2851   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2852     (fma:VDQSF
2853       (vec_duplicate:VDQSF
2854         (vec_select:<VEL>
2855           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2856           (parallel [(match_operand:SI 2 "immediate_operand")])))
2857       (match_operand:VDQSF 3 "register_operand" "w")
2858       (match_operand:VDQSF 4 "register_operand" "0")))]
2859   "TARGET_SIMD"
2860   {
2861     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2862     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2863   }
2864   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2865 )
2866
2867 (define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
2868   [(set (match_operand:VMUL 0 "register_operand" "=w")
2869     (fma:VMUL
2870       (vec_duplicate:VMUL
2871           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2872       (match_operand:VMUL 2 "register_operand" "w")
2873       (match_operand:VMUL 3 "register_operand" "0")))]
2874   "TARGET_SIMD"
2875   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2876   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2877 )
2878
2879 (define_insn "*aarch64_fma4_elt_to_64v2df"
2880   [(set (match_operand:DF 0 "register_operand" "=w")
2881     (fma:DF
2882         (vec_select:DF
2883           (match_operand:V2DF 1 "register_operand" "w")
2884           (parallel [(match_operand:SI 2 "immediate_operand")]))
2885       (match_operand:DF 3 "register_operand" "w")
2886       (match_operand:DF 4 "register_operand" "0")))]
2887   "TARGET_SIMD"
2888   {
2889     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2890     return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2891   }
2892   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2893 )
2894
2895 (define_insn "fnma<mode>4<vczle><vczbe>"
2896   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2897         (fma:VHSDF
2898           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2899           (match_operand:VHSDF 2 "register_operand" "w")
2900           (match_operand:VHSDF 3 "register_operand" "0")))]
2901   "TARGET_SIMD"
2902   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2903   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2904 )
2905
2906 (define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
2907   [(set (match_operand:VDQF 0 "register_operand" "=w")
2908     (fma:VDQF
2909       (neg:VDQF
2910         (match_operand:VDQF 3 "register_operand" "w"))
2911       (vec_duplicate:VDQF
2912         (vec_select:<VEL>
2913           (match_operand:VDQF 1 "register_operand" "<h_con>")
2914           (parallel [(match_operand:SI 2 "immediate_operand")])))
2915       (match_operand:VDQF 4 "register_operand" "0")))]
2916   "TARGET_SIMD"
2917   {
2918     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2919     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2920   }
2921   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2922 )
2923
2924 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2925   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2926     (fma:VDQSF
2927       (neg:VDQSF
2928         (match_operand:VDQSF 3 "register_operand" "w"))
2929       (vec_duplicate:VDQSF
2930         (vec_select:<VEL>
2931           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2932           (parallel [(match_operand:SI 2 "immediate_operand")])))
2933       (match_operand:VDQSF 4 "register_operand" "0")))]
2934   "TARGET_SIMD"
2935   {
2936     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2937     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2938   }
2939   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2940 )
2941
2942 (define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
2943   [(set (match_operand:VMUL 0 "register_operand" "=w")
2944     (fma:VMUL
2945       (neg:VMUL
2946         (match_operand:VMUL 2 "register_operand" "w"))
2947       (vec_duplicate:VMUL
2948         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2949       (match_operand:VMUL 3 "register_operand" "0")))]
2950   "TARGET_SIMD"
2951   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2952   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2953 )
2954
2955 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2956   [(set (match_operand:DF 0 "register_operand" "=w")
2957     (fma:DF
2958       (vec_select:DF
2959         (match_operand:V2DF 1 "register_operand" "w")
2960         (parallel [(match_operand:SI 2 "immediate_operand")]))
2961       (neg:DF
2962         (match_operand:DF 3 "register_operand" "w"))
2963       (match_operand:DF 4 "register_operand" "0")))]
2964   "TARGET_SIMD"
2965   {
2966     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2967     return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2968   }
2969   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2970 )
2971
2972 ;; Vector versions of the floating-point frint patterns.
2973 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2974 (define_insn "<frint_pattern><mode>2<vczle><vczbe>"
2975   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2976         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2977                        FRINT))]
2978   "TARGET_SIMD"
2979   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2980   [(set_attr "type" "neon_fp_round_<stype><q>")]
2981 )
2982
2983 ;; Vector versions of the fcvt standard patterns.
2984 ;; Expands to lbtrunc, lround, lceil, lfloor
2985 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2986   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2987         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2988                                [(match_operand:VHSDF 1 "register_operand" "w")]
2989                                FCVT)))]
2990   "TARGET_SIMD"
2991   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2992   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2993 )
2994
2995 ;; HF Scalar variants of related SIMD instructions.
2996 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2997   [(set (match_operand:HI 0 "register_operand" "=w")
2998         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2999                       FCVT)))]
3000   "TARGET_SIMD_F16INST"
3001   "fcvt<frint_suffix><su>\t%h0, %h1"
3002   [(set_attr "type" "neon_fp_to_int_s")]
3003 )
3004
3005 (define_insn "<optab>_trunchfhi2"
3006   [(set (match_operand:HI 0 "register_operand" "=w")
3007         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3008   "TARGET_SIMD_F16INST"
3009   "fcvtz<su>\t%h0, %h1"
3010   [(set_attr "type" "neon_fp_to_int_s")]
3011 )
3012
3013 (define_insn "<optab>hihf2"
3014   [(set (match_operand:HF 0 "register_operand" "=w")
3015         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3016   "TARGET_SIMD_F16INST"
3017   "<su_optab>cvtf\t%h0, %h1"
3018   [(set_attr "type" "neon_int_to_fp_s")]
3019 )
3020
3021 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3022   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3023         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3024                                [(mult:VDQF
3025          (match_operand:VDQF 1 "register_operand" "w")
3026          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3027                                UNSPEC_FRINTZ)))]
3028   "TARGET_SIMD
3029    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3030                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3031   {
3032     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3033     char buf[64];
3034     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3035     output_asm_insn (buf, operands);
3036     return "";
3037   }
3038   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3039 )
3040
3041 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3042   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3043         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3044                                [(match_operand:VHSDF 1 "register_operand")]
3045                                 UNSPEC_FRINTZ)))]
3046   "TARGET_SIMD"
3047   {})
3048
3049 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3050   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3051         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3052                                [(match_operand:VHSDF 1 "register_operand")]
3053                                 UNSPEC_FRINTZ)))]
3054   "TARGET_SIMD"
3055   {})
3056
3057 (define_expand "ftrunc<VHSDF:mode>2"
3058   [(set (match_operand:VHSDF 0 "register_operand")
3059         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3060                        UNSPEC_FRINTZ))]
3061   "TARGET_SIMD"
3062   {})
3063
3064 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3065   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3066         (FLOATUORS:VHSDF
3067           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3068   "TARGET_SIMD"
3069   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3070   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3071 )
3072
3073 ;; Conversions between vectors of floats and doubles.
3074 ;; Contains a mix of patterns to match standard pattern names
3075 ;; and those for intrinsics.
3076
3077 ;; Float widening operations.
3078
3079 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3080   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3081         (float_extend:<VWIDE> (vec_select:<VHALF>
3082                                (match_operand:VQ_HSF 1 "register_operand" "w")
3083                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3084                             )))]
3085   "TARGET_SIMD"
3086   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3087   [(set_attr "type" "neon_fp_cvt_widen_s")]
3088 )
3089
3090 ;; Convert between fixed-point and floating-point (vector modes)
3091
3092 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3093   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3094         (unspec:<VHSDF:FCVT_TARGET>
3095           [(match_operand:VHSDF 1 "register_operand" "w")
3096            (match_operand:SI 2 "immediate_operand" "i")]
3097          FCVT_F2FIXED))]
3098   "TARGET_SIMD"
3099   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3100   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3101 )
3102
3103 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3104   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3105         (unspec:<VDQ_HSDI:FCVT_TARGET>
3106           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3107            (match_operand:SI 2 "immediate_operand" "i")]
3108          FCVT_FIXED2F))]
3109   "TARGET_SIMD"
3110   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3111   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3112 )
3113
3114 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3115 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3116 ;; the meaning of HI and LO changes depending on the target endianness.
3117 ;; While elsewhere we map the higher numbered elements of a vector to
3118 ;; the lower architectural lanes of the vector, for these patterns we want
3119 ;; to always treat "hi" as referring to the higher architectural lanes.
3120 ;; Consequently, while the patterns below look inconsistent with our
3121 ;; other big-endian patterns their behavior is as required.
3122
3123 (define_expand "vec_unpacks_lo_<mode>"
3124   [(match_operand:<VWIDE> 0 "register_operand")
3125    (match_operand:VQ_HSF 1 "register_operand")]
3126   "TARGET_SIMD"
3127   {
3128     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3129     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3130                                                        operands[1], p));
3131     DONE;
3132   }
3133 )
3134
3135 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3136   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3137         (float_extend:<VWIDE> (vec_select:<VHALF>
3138                                (match_operand:VQ_HSF 1 "register_operand" "w")
3139                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3140                             )))]
3141   "TARGET_SIMD"
3142   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3143   [(set_attr "type" "neon_fp_cvt_widen_s")]
3144 )
3145
3146 (define_expand "vec_unpacks_hi_<mode>"
3147   [(match_operand:<VWIDE> 0 "register_operand")
3148    (match_operand:VQ_HSF 1 "register_operand")]
3149   "TARGET_SIMD"
3150   {
3151     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3152     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3153                                                        operands[1], p));
3154     DONE;
3155   }
3156 )
3157 (define_insn "aarch64_float_extend_lo_<Vwide>"
3158   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3159         (float_extend:<VWIDE>
3160           (match_operand:VDF 1 "register_operand" "w")))]
3161   "TARGET_SIMD"
3162   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3163   [(set_attr "type" "neon_fp_cvt_widen_s")]
3164 )
3165
3166 ;; Float narrowing operations.
3167
3168 (define_insn "aarch64_float_trunc_rodd_df"
3169   [(set (match_operand:SF 0 "register_operand" "=w")
3170         (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3171                 UNSPEC_FCVTXN))]
3172   "TARGET_SIMD"
3173   "fcvtxn\\t%s0, %d1"
3174   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3175 )
3176
3177 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3178   [(set (match_operand:V2SF 0 "register_operand" "=w")
3179         (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3180                 UNSPEC_FCVTXN))]
3181   "TARGET_SIMD"
3182   "fcvtxn\\t%0.2s, %1.2d"
3183   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3184 )
3185
3186 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3187   [(set (match_operand:V4SF 0 "register_operand" "=w")
3188         (vec_concat:V4SF
3189           (match_operand:V2SF 1 "register_operand" "0")
3190           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3191                 UNSPEC_FCVTXN)))]
3192   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3193   "fcvtxn2\\t%0.4s, %2.2d"
3194   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3195 )
3196
3197 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3198   [(set (match_operand:V4SF 0 "register_operand" "=w")
3199         (vec_concat:V4SF
3200           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3201                 UNSPEC_FCVTXN)
3202           (match_operand:V2SF 1 "register_operand" "0")))]
3203   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3204   "fcvtxn2\\t%0.4s, %2.2d"
3205   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3206 )
3207
3208 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3209   [(match_operand:V4SF 0 "register_operand")
3210    (match_operand:V2SF 1 "register_operand")
3211    (match_operand:V2DF 2 "register_operand")]
3212   "TARGET_SIMD"
3213 {
3214   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3215                              ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3216                              : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3217   emit_insn (gen (operands[0], operands[1], operands[2]));
3218   DONE;
3219 }
3220 )
3221
3222 (define_insn "aarch64_float_truncate_lo_<mode><vczle><vczbe>"
3223   [(set (match_operand:VDF 0 "register_operand" "=w")
3224       (float_truncate:VDF
3225         (match_operand:<VWIDE> 1 "register_operand" "w")))]
3226   "TARGET_SIMD"
3227   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3228   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3229 )
3230
3231 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3232   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3233     (vec_concat:<VDBL>
3234       (match_operand:VDF 1 "register_operand" "0")
3235       (float_truncate:VDF
3236         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3237   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3238   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3239   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3240 )
3241
3242 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3243   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3244     (vec_concat:<VDBL>
3245       (float_truncate:VDF
3246         (match_operand:<VWIDE> 2 "register_operand" "w"))
3247       (match_operand:VDF 1 "register_operand" "0")))]
3248   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3249   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3250   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3251 )
3252
3253 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3254   [(match_operand:<VDBL> 0 "register_operand")
3255    (match_operand:VDF 1 "register_operand")
3256    (match_operand:<VWIDE> 2 "register_operand")]
3257   "TARGET_SIMD"
3258 {
3259   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3260                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3261                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3262   emit_insn (gen (operands[0], operands[1], operands[2]));
3263   DONE;
3264 }
3265 )
3266
3267 (define_expand "vec_pack_trunc_v2df"
3268   [(set (match_operand:V4SF 0 "register_operand")
3269       (vec_concat:V4SF
3270         (float_truncate:V2SF
3271             (match_operand:V2DF 1 "register_operand"))
3272         (float_truncate:V2SF
3273             (match_operand:V2DF 2 "register_operand"))
3274           ))]
3275   "TARGET_SIMD"
3276   {
3277     rtx tmp = gen_reg_rtx (V2SFmode);
3278     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3279     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3280
3281     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
3282     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3283                                                    tmp, operands[hi]));
3284     DONE;
3285   }
3286 )
3287
3288 (define_expand "vec_pack_trunc_df"
3289   [(set (match_operand:V2SF 0 "register_operand")
3290         (vec_concat:V2SF
3291           (float_truncate:SF (match_operand:DF 1 "general_operand"))
3292           (float_truncate:SF (match_operand:DF 2 "general_operand"))))]
3293   "TARGET_SIMD"
3294   {
3295     rtx tmp = gen_reg_rtx (V2SFmode);
3296     emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2]));
3297     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
3298     DONE;
3299   }
3300 )
3301
3302 ;; FP Max/Min
3303 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
3304 ;; expression like:
3305 ;;      a = (b < c) ? b : c;
3306 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3307 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3308 ;; -ffast-math.
3309 ;;
3310 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3311 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3312 ;; operand will be returned when both operands are zero (i.e. they may not
3313 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
3314 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3315 ;; NaNs.
3316
3317 (define_insn "<su><maxmin><mode>3"
3318   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3319         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3320                        (match_operand:VHSDF 2 "register_operand" "w")))]
3321   "TARGET_SIMD"
3322   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3323   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3324 )
3325
3326 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3327 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3328 ;; which implement the IEEE fmax ()/fmin () functions.
3329 (define_insn "<fmaxmin><mode>3<vczle><vczbe>"
3330   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3331        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3332                       (match_operand:VHSDF 2 "register_operand" "w")]
3333                       FMAXMIN_UNS))]
3334   "TARGET_SIMD"
3335   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3336   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3337 )
3338
3339 ;; 'across lanes' add.
3340
3341 (define_insn "aarch64_faddp<mode><vczle><vczbe>"
3342  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3343        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3344                       (match_operand:VHSDF 2 "register_operand" "w")]
3345         UNSPEC_FADDV))]
3346  "TARGET_SIMD"
3347  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3348   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3349 )
3350
3351 (define_insn "reduc_plus_scal_<mode>"
3352  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3353        (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
3354                     UNSPEC_ADDV))]
3355  "TARGET_SIMD"
3356  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3357   [(set_attr "type" "neon_reduc_add<q>")]
3358 )
3359
3360 (define_insn "reduc_plus_scal_v2si"
3361  [(set (match_operand:SI 0 "register_operand" "=w")
3362        (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
3363                     UNSPEC_ADDV))]
3364  "TARGET_SIMD"
3365  "addp\\t%0.2s, %1.2s, %1.2s"
3366   [(set_attr "type" "neon_reduc_add")]
3367 )
3368
3369 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3370 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3371  [(set (match_operand:GPI 0 "register_operand" "=w")
3372        (zero_extend:GPI
3373         (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3374                              UNSPEC_ADDV)))]
3375  "TARGET_SIMD"
3376  "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3377   [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3378 )
3379
3380 (define_insn "reduc_plus_scal_<mode>"
3381  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3382        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3383                    UNSPEC_FADDV))]
3384  "TARGET_SIMD"
3385  "faddp\\t%<Vetype>0, %1.<Vtype>"
3386   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3387 )
3388
3389 (define_expand "reduc_plus_scal_v4sf"
3390  [(set (match_operand:SF 0 "register_operand")
3391        (unspec:SF [(match_operand:V4SF 1 "register_operand")]
3392                     UNSPEC_FADDV))]
3393  "TARGET_SIMD"
3394 {
3395   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3396   rtx scratch = gen_reg_rtx (V4SFmode);
3397   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3398   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3399   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3400   DONE;
3401 })
3402
3403 ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
3404 ;; sign or zero-extends its elements.
3405 (define_insn "aarch64_<su>addlv<mode>"
3406  [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3407        (unspec:<VWIDE_S>
3408          [(ANY_EXTEND:<V2XWIDE>
3409             (match_operand:VDQV_L 1 "register_operand" "w"))]
3410          UNSPEC_ADDV))]
3411  "TARGET_SIMD"
3412  "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3413   [(set_attr "type" "neon_reduc_add<q>")]
3414 )
3415
3416 ;; An ADDV over a vector PLUS of elements extracted and widened all from the
3417 ;; same vector is the same as an [SU]ADDLV above, so long as all the elements
3418 ;; of that vector are used.  We can greatly simplify the RTL expression using
3419 ;; this splitter.
3420 (define_insn_and_split "*aarch64_<su>addlv<mode>_reduction"
3421  [(set (match_operand:<VWIDE_S> 0 "register_operand")
3422        (unspec:<VWIDE_S>
3423          [(plus:<VDBLW>
3424             (vec_select:<VDBLW>
3425               (ANY_EXTEND:<V2XWIDE>
3426                 (match_operand:VDQV_L 1 "register_operand"))
3427               (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3428             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3429               (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half")))]
3430          UNSPEC_ADDV))]
3431  "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3432  "#"
3433  "&& 1"
3434   [(set (match_dup 0)
3435        (unspec:<VWIDE_S>
3436          [(ANY_EXTEND:<V2XWIDE>
3437             (match_dup 1))]
3438          UNSPEC_ADDV))]
3439   {}
3440 )
3441
3442 ;; Similar to the above but for two-step zero-widening reductions.
3443 ;; We can push the outer zero_extend outside the ADDV unspec and make
3444 ;; use of the implicit high-part zeroing semantics of UADDLV to do it all
3445 ;; in a single instruction.
3446 (define_insn_and_split "*aarch64_uaddlv<mode>_reduction_2"
3447  [(set (match_operand:<VWIDE2X_S> 0 "register_operand" "=w")
3448        (unspec:<VWIDE2X_S>
3449          [(zero_extend:<VQUADW>
3450             (plus:<VDBLW>
3451               (vec_select:<VDBLW>
3452                 (zero_extend:<V2XWIDE>
3453                   (match_operand:VDQQH 1 "register_operand" "w"))
3454                 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3455               (vec_select:<VDBLW> (zero_extend:<V2XWIDE> (match_dup 1))
3456                 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half"))))]
3457          UNSPEC_ADDV))]
3458  "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3459  "#"
3460  "&& 1"
3461   [(set (match_dup 0)
3462         (zero_extend:<VWIDE2X_S>
3463           (unspec:<VWIDE_S>
3464             [(zero_extend:<V2XWIDE>
3465                (match_dup 1))]
3466             UNSPEC_ADDV)))]
3467   {}
3468 )
3469
3470 ;; Zero-extending version of the above.  As these intrinsics produce a scalar
3471 ;; value that may be used by further intrinsics we want to avoid moving the
3472 ;; result into GP regs to do a zero-extension that ADDLV/ADDLP gives for free.
3473
3474 (define_insn "*aarch64_<su>addlv<VDQV_L:mode>_ze<GPI:mode>"
3475  [(set (match_operand:GPI 0 "register_operand" "=w")
3476        (zero_extend:GPI
3477          (unspec:<VWIDE_S>
3478            [(ANY_EXTEND:<VDQV_L:V2XWIDE>
3479               (match_operand:VDQV_L 1 "register_operand" "w"))]
3480          UNSPEC_ADDV)))]
3481  "TARGET_SIMD
3482   && (GET_MODE_SIZE (<GPI:MODE>mode) > GET_MODE_SIZE (<VWIDE_S>mode))"
3483  "<su>addl<VDQV_L:vp>\\t%<VDQV_L:Vwstype>0<VDQV_L:Vwsuf>, %1.<VDQV_L:Vtype>"
3484   [(set_attr "type" "neon_reduc_add<VDQV_L:q>")]
3485 )
3486
3487 (define_expand "aarch64_<su>addlp<mode>"
3488   [(set (match_operand:<VDBLW> 0 "register_operand")
3489         (plus:<VDBLW>
3490           (vec_select:<VDBLW>
3491             (ANY_EXTEND:<V2XWIDE>
3492               (match_operand:VDQV_L 1 "register_operand"))
3493             (match_dup 2))
3494           (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3495             (match_dup 3))))]
3496  "TARGET_SIMD"
3497  {
3498    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
3499    operands[2] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
3500    operands[3] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
3501  }
3502 )
3503
3504 (define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn"
3505   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3506         (plus:<VDBLW>
3507           (vec_select:<VDBLW>
3508             (ANY_EXTEND:<V2XWIDE>
3509               (match_operand:VDQV_L 1 "register_operand" "w"))
3510             (match_operand:<V2XWIDE> 2 "vect_par_cnst_even_or_odd_half"))
3511           (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3512             (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half"))))]
3513  "TARGET_SIMD
3514   && !rtx_equal_p (operands[2], operands[3])"
3515  "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3516   [(set_attr "type" "neon_reduc_add<q>")]
3517 )
3518
3519 (define_insn "clrsb<mode>2<vczle><vczbe>"
3520   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3521         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3522   "TARGET_SIMD"
3523   "cls\\t%0.<Vtype>, %1.<Vtype>"
3524   [(set_attr "type" "neon_cls<q>")]
3525 )
3526
3527 (define_insn "clz<mode>2<vczle><vczbe>"
3528  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3529        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3530  "TARGET_SIMD"
3531  "clz\\t%0.<Vtype>, %1.<Vtype>"
3532   [(set_attr "type" "neon_cls<q>")]
3533 )
3534
3535 (define_insn "popcount<mode>2<vczle><vczbe>"
3536   [(set (match_operand:VB 0 "register_operand" "=w")
3537         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3538   "TARGET_SIMD"
3539   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3540   [(set_attr "type" "neon_cnt<q>")]
3541 )
3542
3543 ;; 'across lanes' max and min ops.
3544
3545 ;; Template for outputting a scalar, so we can create __builtins which can be
3546 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
3547 (define_expand "reduc_<optab>_scal_<mode>"
3548   [(match_operand:<VEL> 0 "register_operand")
3549    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3550                  FMAXMINV)]
3551   "TARGET_SIMD"
3552   {
3553     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3554     rtx scratch = gen_reg_rtx (<MODE>mode);
3555     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3556                                                          operands[1]));
3557     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3558     DONE;
3559   }
3560 )
3561
3562 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3563   [(match_operand:<VEL> 0 "register_operand")
3564    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3565                  FMAXMINNMV)]
3566   "TARGET_SIMD"
3567   {
3568     emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3569     DONE;
3570   }
3571 )
3572
3573 ;; Likewise for integer cases, signed and unsigned.
3574 (define_expand "reduc_<optab>_scal_<mode>"
3575   [(match_operand:<VEL> 0 "register_operand")
3576    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3577                     MAXMINV)]
3578   "TARGET_SIMD"
3579   {
3580     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3581     rtx scratch = gen_reg_rtx (<MODE>mode);
3582     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3583                                                          operands[1]));
3584     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3585     DONE;
3586   }
3587 )
3588
3589 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3590  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3591        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3592                     MAXMINV))]
3593  "TARGET_SIMD"
3594  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3595   [(set_attr "type" "neon_reduc_minmax<q>")]
3596 )
3597
3598 (define_insn "aarch64_reduc_<optab>_internalv2si"
3599  [(set (match_operand:V2SI 0 "register_operand" "=w")
3600        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3601                     MAXMINV))]
3602  "TARGET_SIMD"
3603  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3604   [(set_attr "type" "neon_reduc_minmax")]
3605 )
3606
3607 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3608  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3609        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3610                       FMAXMINV))]
3611  "TARGET_SIMD"
3612  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3613   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3614 )
3615
3616 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3617 ;; allocation.
3618 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3619 ;; to select.
3620 ;;
3621 ;; Thus our BSL is of the form:
3622 ;;   op0 = bsl (mask, op2, op3)
3623 ;; We can use any of:
3624 ;;
3625 ;;   if (op0 = mask)
3626 ;;     bsl mask, op1, op2
3627 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3628 ;;     bit op0, op2, mask
3629 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3630 ;;     bif op0, op1, mask
3631 ;;
3632 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3633 ;; Some forms of straight-line code may generate the equivalent form
3634 ;; in *aarch64_simd_bsl<mode>_alt.
3635
3636 (define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
3637   [(set (match_operand:VDQ_I 0 "register_operand")
3638         (xor:VDQ_I
3639            (and:VDQ_I
3640              (xor:VDQ_I
3641                (match_operand:<V_INT_EQUIV> 3 "register_operand")
3642                (match_operand:VDQ_I 2 "register_operand"))
3643              (match_operand:VDQ_I 1 "register_operand"))
3644           (match_dup:<V_INT_EQUIV> 3)
3645         ))]
3646   "TARGET_SIMD"
3647   {@ [ cons: =0 , 1 , 2 , 3  ]
3648      [ w        , 0 , w , w  ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3649      [ w        , w , w , 0  ] bit\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3650      [ w        , w , 0 , w  ] bif\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3651   }
3652   [(set_attr "type" "neon_bsl<q>")]
3653 )
3654
3655 ;; We need this form in addition to the above pattern to match the case
3656 ;; when combine tries merging three insns such that the second operand of
3657 ;; the outer XOR matches the second operand of the inner XOR rather than
3658 ;; the first.  The two are equivalent but since recog doesn't try all
3659 ;; permutations of commutative operations, we have to have a separate pattern.
3660
3661 (define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
3662   [(set (match_operand:VDQ_I 0 "register_operand")
3663         (xor:VDQ_I
3664            (and:VDQ_I
3665              (xor:VDQ_I
3666                (match_operand:VDQ_I 3 "register_operand")
3667                (match_operand:<V_INT_EQUIV> 2 "register_operand"))
3668               (match_operand:VDQ_I 1 "register_operand"))
3669           (match_dup:<V_INT_EQUIV> 2)))]
3670   "TARGET_SIMD"
3671   {@ [ cons: =0 , 1 , 2 , 3  ]
3672      [ w        , 0 , w , w  ] bsl\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3673      [ w        , w , 0 , w  ] bit\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3674      [ w        , w , w , 0  ] bif\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3675   }
3676   [(set_attr "type" "neon_bsl<q>")]
3677 )
3678
3679 ;; DImode is special, we want to avoid computing operations which are
3680 ;; more naturally computed in general purpose registers in the vector
3681 ;; registers.  If we do that, we need to move all three operands from general
3682 ;; purpose registers to vector registers, then back again.  However, we
3683 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3684 ;; optimizations based on the component operations of a BSL.
3685 ;;
3686 ;; That means we need a splitter back to the individual operations, if they
3687 ;; would be better calculated on the integer side.
3688
3689 (define_insn_and_split "aarch64_simd_bsldi_internal"
3690   [(set (match_operand:DI 0 "register_operand")
3691         (xor:DI
3692            (and:DI
3693              (xor:DI
3694                (match_operand:DI 3 "register_operand")
3695                (match_operand:DI 2 "register_operand"))
3696              (match_operand:DI 1 "register_operand"))
3697           (match_dup:DI 3)
3698         ))]
3699   "TARGET_SIMD"
3700   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3701      [ w        , 0 , w , w ; neon_bsl    , 4      ] bsl\t%0.8b, %2.8b, %3.8b
3702      [ w        , w , w , 0 ; neon_bsl    , 4      ] bit\t%0.8b, %2.8b, %1.8b
3703      [ w        , w , 0 , w ; neon_bsl    , 4      ] bif\t%0.8b, %3.8b, %1.8b
3704      [ &r       , r , r , r ; multiple    , 12     ] #
3705   }
3706   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3707   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3708 {
3709   /* Split back to individual operations.  If we're before reload, and
3710      able to create a temporary register, do so.  If we're after reload,
3711      we've got an early-clobber destination register, so use that.
3712      Otherwise, we can't create pseudos and we can't yet guarantee that
3713      operands[0] is safe to write, so FAIL to split.  */
3714
3715   rtx scratch;
3716   if (reload_completed)
3717     scratch = operands[0];
3718   else if (can_create_pseudo_p ())
3719     scratch = gen_reg_rtx (DImode);
3720   else
3721     FAIL;
3722
3723   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3724   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3725   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3726   DONE;
3727 }
3728 )
3729
3730 (define_insn_and_split "aarch64_simd_bsldi_alt"
3731   [(set (match_operand:DI 0 "register_operand")
3732         (xor:DI
3733            (and:DI
3734              (xor:DI
3735                (match_operand:DI 3 "register_operand")
3736                (match_operand:DI 2 "register_operand"))
3737              (match_operand:DI 1 "register_operand"))
3738           (match_dup:DI 2)
3739         ))]
3740   "TARGET_SIMD"
3741   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3742      [ w        , 0 , w , w ; neon_bsl    , 4      ] bsl\t%0.8b, %3.8b, %2.8b
3743      [ w        , w , 0 , w ; neon_bsl    , 4      ] bit\t%0.8b, %3.8b, %1.8b
3744      [ w        , w , w , 0 ; neon_bsl    , 4      ] bif\t%0.8b, %2.8b, %1.8b
3745      [ &r       , r , r , r ; multiple    , 12     ] #
3746   }
3747   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3748   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3749 {
3750   /* Split back to individual operations.  If we're before reload, and
3751      able to create a temporary register, do so.  If we're after reload,
3752      we've got an early-clobber destination register, so use that.
3753      Otherwise, we can't create pseudos and we can't yet guarantee that
3754      operands[0] is safe to write, so FAIL to split.  */
3755
3756   rtx scratch;
3757   if (reload_completed)
3758     scratch = operands[0];
3759   else if (can_create_pseudo_p ())
3760     scratch = gen_reg_rtx (DImode);
3761   else
3762     FAIL;
3763
3764   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3765   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3766   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3767   DONE;
3768 }
3769 )
3770
3771 (define_expand "aarch64_simd_bsl<mode>"
3772   [(match_operand:VALLDIF 0 "register_operand")
3773    (match_operand:<V_INT_EQUIV> 1 "register_operand")
3774    (match_operand:VALLDIF 2 "register_operand")
3775    (match_operand:VALLDIF 3 "register_operand")]
3776  "TARGET_SIMD"
3777 {
3778   /* We can't alias operands together if they have different modes.  */
3779   rtx tmp = operands[0];
3780   if (FLOAT_MODE_P (<MODE>mode))
3781     {
3782       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3783       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3784       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3785     }
3786   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3787   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3788                                                          operands[1],
3789                                                          operands[2],
3790                                                          operands[3]));
3791   if (tmp != operands[0])
3792     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3793
3794   DONE;
3795 })
3796
3797 (define_expand "vcond_mask_<mode><v_int_equiv>"
3798   [(match_operand:VALLDI 0 "register_operand")
3799    (match_operand:VALLDI 1 "nonmemory_operand")
3800    (match_operand:VALLDI 2 "nonmemory_operand")
3801    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3802   "TARGET_SIMD"
3803 {
3804   /* If we have (a = (P) ? -1 : 0);
3805      Then we can simply move the generated mask (result must be int).  */
3806   if (operands[1] == CONSTM1_RTX (<MODE>mode)
3807       && operands[2] == CONST0_RTX (<MODE>mode))
3808     emit_move_insn (operands[0], operands[3]);
3809   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
3810   else if (operands[1] == CONST0_RTX (<MODE>mode)
3811            && operands[2] == CONSTM1_RTX (<MODE>mode))
3812     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3813   else
3814     {
3815       if (!REG_P (operands[1]))
3816         operands[1] = force_reg (<MODE>mode, operands[1]);
3817       if (!REG_P (operands[2]))
3818         operands[2] = force_reg (<MODE>mode, operands[2]);
3819       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3820                                              operands[1], operands[2]));
3821     }
3822
3823   DONE;
3824 })
3825
3826 ;; Patterns comparing two vectors and conditionally jump
3827
3828 (define_expand "cbranch<mode>4"
3829   [(set (pc)
3830         (if_then_else
3831           (match_operator 0 "aarch64_equality_operator"
3832             [(match_operand:VDQ_I 1 "register_operand")
3833              (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")])
3834           (label_ref (match_operand 3 ""))
3835           (pc)))]
3836   "TARGET_SIMD"
3837 {
3838   auto code = GET_CODE (operands[0]);
3839   rtx tmp = operands[1];
3840
3841   /* If comparing against a non-zero vector we have to do a comparison first
3842      so we can have a != 0 comparison with the result.  */
3843   if (operands[2] != CONST0_RTX (<MODE>mode))
3844     {
3845       tmp = gen_reg_rtx (<MODE>mode);
3846       emit_insn (gen_xor<mode>3 (tmp, operands[1], operands[2]));
3847     }
3848
3849   /* For 64-bit vectors we need no reductions.  */
3850   if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
3851     {
3852       /* Always reduce using a V4SI.  */
3853       rtx reduc = gen_lowpart (V4SImode, tmp);
3854       rtx res = gen_reg_rtx (V4SImode);
3855       emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
3856       emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
3857     }
3858
3859   rtx val = gen_reg_rtx (DImode);
3860   emit_move_insn (val, gen_lowpart (DImode, tmp));
3861
3862   rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx);
3863   rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx);
3864   emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));
3865   DONE;
3866 })
3867
3868 ;; Patterns comparing two vectors to produce a mask.
3869
3870 (define_expand "vec_cmp<mode><mode>"
3871   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3872           (match_operator 1 "comparison_operator"
3873             [(match_operand:VSDQ_I_DI 2 "register_operand")
3874              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3875   "TARGET_SIMD"
3876 {
3877   rtx mask = operands[0];
3878   enum rtx_code code = GET_CODE (operands[1]);
3879
3880   switch (code)
3881     {
3882     case NE:
3883     case LE:
3884     case LT:
3885     case GE:
3886     case GT:
3887     case EQ:
3888       if (operands[3] == CONST0_RTX (<MODE>mode))
3889         break;
3890
3891       /* Fall through.  */
3892     default:
3893       if (!REG_P (operands[3]))
3894         operands[3] = force_reg (<MODE>mode, operands[3]);
3895
3896       break;
3897     }
3898
3899   switch (code)
3900     {
3901     case LT:
3902       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3903       break;
3904
3905     case GE:
3906       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3907       break;
3908
3909     case LE:
3910       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3911       break;
3912
3913     case GT:
3914       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
3915       break;
3916
3917     case LTU:
3918       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
3919       break;
3920
3921     case GEU:
3922       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
3923       break;
3924
3925     case LEU:
3926       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
3927       break;
3928
3929     case GTU:
3930       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
3931       break;
3932
3933     case NE:
3934       /* Handle NE as !EQ.  */
3935       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3936       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
3937       break;
3938
3939     case EQ:
3940       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3941       break;
3942
3943     default:
3944       gcc_unreachable ();
3945     }
3946
3947   DONE;
3948 })
3949
3950 (define_expand "vec_cmp<mode><v_int_equiv>"
3951   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3952         (match_operator 1 "comparison_operator"
3953             [(match_operand:VDQF 2 "register_operand")
3954              (match_operand:VDQF 3 "nonmemory_operand")]))]
3955   "TARGET_SIMD"
3956 {
3957   int use_zero_form = 0;
3958   enum rtx_code code = GET_CODE (operands[1]);
3959   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3960
3961   rtx (*comparison) (rtx, rtx, rtx) = NULL;
3962
3963   switch (code)
3964     {
3965     case LE:
3966     case LT:
3967     case GE:
3968     case GT:
3969     case EQ:
3970       if (operands[3] == CONST0_RTX (<MODE>mode))
3971         {
3972           use_zero_form = 1;
3973           break;
3974         }
3975       /* Fall through.  */
3976     default:
3977       if (!REG_P (operands[3]))
3978         operands[3] = force_reg (<MODE>mode, operands[3]);
3979
3980       break;
3981     }
3982
3983   switch (code)
3984     {
3985     case LT:
3986       if (use_zero_form)
3987         {
3988           comparison = gen_aarch64_cmlt<mode>;
3989           break;
3990         }
3991       /* Fall through.  */
3992     case UNLT:
3993       std::swap (operands[2], operands[3]);
3994       /* Fall through.  */
3995     case UNGT:
3996     case GT:
3997       comparison = gen_aarch64_cmgt<mode>;
3998       break;
3999     case LE:
4000       if (use_zero_form)
4001         {
4002           comparison = gen_aarch64_cmle<mode>;
4003           break;
4004         }
4005       /* Fall through.  */
4006     case UNLE:
4007       std::swap (operands[2], operands[3]);
4008       /* Fall through.  */
4009     case UNGE:
4010     case GE:
4011       comparison = gen_aarch64_cmge<mode>;
4012       break;
4013     case NE:
4014     case EQ:
4015       comparison = gen_aarch64_cmeq<mode>;
4016       break;
4017     case UNEQ:
4018     case ORDERED:
4019     case UNORDERED:
4020     case LTGT:
4021       break;
4022     default:
4023       gcc_unreachable ();
4024     }
4025
4026   switch (code)
4027     {
4028     case UNGE:
4029     case UNGT:
4030     case UNLE:
4031     case UNLT:
4032       {
4033         /* All of the above must not raise any FP exceptions.  Thus we first
4034            check each operand for NaNs and force any elements containing NaN to
4035            zero before using them in the compare.
4036            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4037                                      (cm<cc> (isnan (a) ? 0.0 : a,
4038                                               isnan (b) ? 0.0 : b))
4039            We use the following transformations for doing the comparisions:
4040            a UNGE b -> a GE b
4041            a UNGT b -> a GT b
4042            a UNLE b -> b GE a
4043            a UNLT b -> b GT a.  */
4044
4045         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4046         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4047         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4048         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4049         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4050         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4051         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4052                                           lowpart_subreg (<V_INT_EQUIV>mode,
4053                                                           operands[2],
4054                                                           <MODE>mode)));
4055         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4056                                           lowpart_subreg (<V_INT_EQUIV>mode,
4057                                                           operands[3],
4058                                                           <MODE>mode)));
4059         gcc_assert (comparison != NULL);
4060         emit_insn (comparison (operands[0],
4061                                lowpart_subreg (<MODE>mode,
4062                                                tmp0, <V_INT_EQUIV>mode),
4063                                lowpart_subreg (<MODE>mode,
4064                                                tmp1, <V_INT_EQUIV>mode)));
4065         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
4066       }
4067       break;
4068
4069     case LT:
4070     case LE:
4071     case GT:
4072     case GE:
4073     case EQ:
4074     case NE:
4075       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
4076          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
4077          a GE b -> a GE b
4078          a GT b -> a GT b
4079          a LE b -> b GE a
4080          a LT b -> b GT a
4081          a EQ b -> a EQ b
4082          a NE b -> ~(a EQ b)  */
4083       gcc_assert (comparison != NULL);
4084       emit_insn (comparison (operands[0], operands[2], operands[3]));
4085       if (code == NE)
4086         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4087       break;
4088
4089     case LTGT:
4090       /* LTGT is not guranteed to not generate a FP exception.  So let's
4091          go the faster way : ((a > b) || (b > a)).  */
4092       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4093                                          operands[2], operands[3]));
4094       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4095       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4096       break;
4097
4098     case ORDERED:
4099     case UNORDERED:
4100     case UNEQ:
4101       /* cmeq (a, a) & cmeq (b, b).  */
4102       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4103                                          operands[2], operands[2]));
4104       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4105       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4106
4107       if (code == UNORDERED)
4108         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4109       else if (code == UNEQ)
4110         {
4111           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4112           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
4113         }
4114       break;
4115
4116     default:
4117       gcc_unreachable ();
4118     }
4119
4120   DONE;
4121 })
4122
4123 (define_expand "vec_cmpu<mode><mode>"
4124   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4125           (match_operator 1 "comparison_operator"
4126             [(match_operand:VSDQ_I_DI 2 "register_operand")
4127              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4128   "TARGET_SIMD"
4129 {
4130   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4131                                       operands[2], operands[3]));
4132   DONE;
4133 })
4134
4135 (define_expand "vcond<mode><mode>"
4136   [(set (match_operand:VALLDI 0 "register_operand")
4137         (if_then_else:VALLDI
4138           (match_operator 3 "comparison_operator"
4139             [(match_operand:VALLDI 4 "register_operand")
4140              (match_operand:VALLDI 5 "nonmemory_operand")])
4141           (match_operand:VALLDI 1 "nonmemory_operand")
4142           (match_operand:VALLDI 2 "nonmemory_operand")))]
4143   "TARGET_SIMD"
4144 {
4145   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4146   enum rtx_code code = GET_CODE (operands[3]);
4147
4148   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4149      it as well as switch operands 1/2 in order to avoid the additional
4150      NOT instruction.  */
4151   if (code == NE)
4152     {
4153       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4154                                     operands[4], operands[5]);
4155       std::swap (operands[1], operands[2]);
4156     }
4157   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4158                                              operands[4], operands[5]));
4159   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4160                                                  operands[2], mask));
4161
4162   DONE;
4163 })
4164
4165 (define_expand "vcond<v_cmp_mixed><mode>"
4166   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
4167         (if_then_else:<V_cmp_mixed>
4168           (match_operator 3 "comparison_operator"
4169             [(match_operand:VDQF_COND 4 "register_operand")
4170              (match_operand:VDQF_COND 5 "nonmemory_operand")])
4171           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
4172           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
4173   "TARGET_SIMD"
4174 {
4175   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4176   enum rtx_code code = GET_CODE (operands[3]);
4177
4178   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4179      it as well as switch operands 1/2 in order to avoid the additional
4180      NOT instruction.  */
4181   if (code == NE)
4182     {
4183       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4184                                     operands[4], operands[5]);
4185       std::swap (operands[1], operands[2]);
4186     }
4187   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4188                                              operands[4], operands[5]));
4189   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
4190                                                 operands[0], operands[1],
4191                                                 operands[2], mask));
4192
4193   DONE;
4194 })
4195
4196 (define_expand "vcondu<mode><mode>"
4197   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4198         (if_then_else:VSDQ_I_DI
4199           (match_operator 3 "comparison_operator"
4200             [(match_operand:VSDQ_I_DI 4 "register_operand")
4201              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
4202           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
4203           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
4204   "TARGET_SIMD"
4205 {
4206   rtx mask = gen_reg_rtx (<MODE>mode);
4207   enum rtx_code code = GET_CODE (operands[3]);
4208
4209   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4210      it as well as switch operands 1/2 in order to avoid the additional
4211      NOT instruction.  */
4212   if (code == NE)
4213     {
4214       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4215                                     operands[4], operands[5]);
4216       std::swap (operands[1], operands[2]);
4217     }
4218   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
4219                                       operands[4], operands[5]));
4220   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4221                                                  operands[2], mask));
4222   DONE;
4223 })
4224
4225 (define_expand "vcondu<mode><v_cmp_mixed>"
4226   [(set (match_operand:VDQF 0 "register_operand")
4227         (if_then_else:VDQF
4228           (match_operator 3 "comparison_operator"
4229             [(match_operand:<V_cmp_mixed> 4 "register_operand")
4230              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
4231           (match_operand:VDQF 1 "nonmemory_operand")
4232           (match_operand:VDQF 2 "nonmemory_operand")))]
4233   "TARGET_SIMD"
4234 {
4235   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4236   enum rtx_code code = GET_CODE (operands[3]);
4237
4238   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4239      it as well as switch operands 1/2 in order to avoid the additional
4240      NOT instruction.  */
4241   if (code == NE)
4242     {
4243       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4244                                     operands[4], operands[5]);
4245       std::swap (operands[1], operands[2]);
4246     }
4247   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
4248                                                   mask, operands[3],
4249                                                   operands[4], operands[5]));
4250   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4251                                                  operands[2], mask));
4252   DONE;
4253 })
4254
4255 ;; Patterns for AArch64 SIMD Intrinsics.
4256
4257 ;; Lane extraction with sign extension to general purpose register.
4258 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4259   [(set (match_operand:GPI 0 "register_operand" "=r")
4260         (sign_extend:GPI
4261           (vec_select:<VDQQH:VEL>
4262             (match_operand:VDQQH 1 "register_operand" "w")
4263             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4264   "TARGET_SIMD"
4265   {
4266     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4267                                            INTVAL (operands[2]));
4268     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4269   }
4270   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4271 )
4272
4273 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4274   [(set (match_operand:GPI 0 "register_operand" "=r")
4275         (zero_extend:GPI
4276           (vec_select:<VDQQH:VEL>
4277             (match_operand:VDQQH 1 "register_operand" "w")
4278             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4279   "TARGET_SIMD"
4280   {
4281     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4282                                            INTVAL (operands[2]));
4283     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4284   }
4285   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4286 )
4287
4288 ;; Lane extraction of a value, neither sign nor zero extension
4289 ;; is guaranteed so upper bits should be considered undefined.
4290 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4291 ;; Extracting lane zero is split into a simple move when it is between SIMD
4292 ;; registers or a store.
4293 (define_insn_and_split "aarch64_get_lane<mode>"
4294   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4295         (vec_select:<VEL>
4296           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4297           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4298   "TARGET_SIMD"
4299   {
4300     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4301     switch (which_alternative)
4302       {
4303         case 0:
4304           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4305         case 1:
4306           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4307         case 2:
4308           return "st1\\t{%1.<Vetype>}[%2], %0";
4309         default:
4310           gcc_unreachable ();
4311       }
4312   }
4313  "&& reload_completed
4314   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4315  [(set (match_dup 0) (match_dup 1))]
4316  {
4317    operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4318  }
4319   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4320 )
4321
4322 (define_insn "*aarch64_get_high<mode>"
4323   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
4324         (vec_select:<VEL>
4325           (match_operand:VQ_2E 1 "register_operand" "w")
4326           (parallel [(match_operand:SI 2 "immediate_operand")])))]
4327   "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
4328   "fmov\t%0, %1.d[1]"
4329   [(set_attr "type" "f_mrc")]
4330 )
4331
4332 (define_insn "load_pair_lanes<mode>"
4333   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4334         (vec_concat:<VDBL>
4335            (match_operand:VDCSIF 1 "memory_operand" "Utq")
4336            (match_operand:VDCSIF 2 "memory_operand" "m")))]
4337   "TARGET_FLOAT
4338    && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
4339   "ldr\\t%<single_dtype>0, %1"
4340   [(set_attr "type" "neon_load1_1reg<dblq>")]
4341 )
4342
4343 ;; This STP pattern is a partial duplicate of the general vec_concat patterns
4344 ;; below.  The reason for having both of them is that the alternatives of
4345 ;; the later patterns do not have consistent register preferences: the STP
4346 ;; alternatives have no preference between GPRs and FPRs (and if anything,
4347 ;; the GPR form is more natural for scalar integers) whereas the other
4348 ;; alternatives *require* an FPR for operand 1 and prefer one for operand 2.
4349 ;;
4350 ;; Using "*" to hide the STP alternatives from the RA penalizes cases in
4351 ;; which the destination was always memory.  On the other hand, expressing
4352 ;; the true preferences makes GPRs seem more palatable than they really are
4353 ;; for register destinations.
4354 ;;
4355 ;; Despite that, we do still want the general form to have STP alternatives,
4356 ;; in order to handle cases where a register destination is spilled.
4357 ;;
4358 ;; The best compromise therefore seemed to be to have a dedicated STP
4359 ;; pattern to catch cases in which the destination was always memory.
4360 ;; This dedicated pattern must come first.
4361
4362 (define_insn "store_pair_lanes<mode>"
4363   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand")
4364         (vec_concat:<VDBL>
4365            (match_operand:VDCSIF 1 "register_operand")
4366            (match_operand:VDCSIF 2 "register_operand")))]
4367   "TARGET_FLOAT"
4368   {@ [ cons: =0 , 1 , 2 ; attrs: type ]
4369      [ Umn      , w , w ; neon_stp    ] stp\t%<single_type>1, %<single_type>2, %y0
4370      [ Umn      , r , r ; store_16    ] stp\t%<single_wx>1, %<single_wx>2, %y0
4371   }
4372 )
4373
4374 ;; Form a vector whose least significant half comes from operand 1 and whose
4375 ;; most significant half comes from operand 2.  The register alternatives
4376 ;; tie the least significant half to the same register as the destination,
4377 ;; so that only the other half needs to be handled explicitly.  For the
4378 ;; reasons given above, the STP alternatives use ? for constraints that
4379 ;; the register alternatives either don't accept or themselves disparage.
4380
4381 (define_insn "*aarch64_combine_internal<mode>"
4382   [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4383         (vec_concat:<VDBL>
4384           (match_operand:VDCSIF 1 "register_operand")
4385           (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")))]
4386   "TARGET_FLOAT
4387    && !BYTES_BIG_ENDIAN
4388    && (register_operand (operands[0], <VDBL>mode)
4389        || register_operand (operands[2], <MODE>mode))"
4390   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
4391      [ w        , 0  , w   ; neon_ins<dblq>            , simd  ] ins\t%0.<single_type>[1], %2.<single_type>[0]
4392      [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ] ins\t%0.<single_type>[1], %<single_wx>2
4393      [ w        , 0  , ?r  ; f_mcr                     , *     ] fmov\t%0.d[1], %2
4394      [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ] ld1\t{%0.<single_type>}[1], %2
4395      [ Umn      , ?w , w   ; neon_stp                  , *     ] stp\t%<single_type>1, %<single_type>2, %y0
4396      [ Umn      , ?r , ?r  ; store_16                  , *     ] stp\t%<single_wx>1, %<single_wx>2, %y0
4397   }
4398 )
4399
4400 (define_insn "*aarch64_combine_internal_be<mode>"
4401   [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4402         (vec_concat:<VDBL>
4403           (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")
4404           (match_operand:VDCSIF 1 "register_operand")))]
4405   "TARGET_FLOAT
4406    && BYTES_BIG_ENDIAN
4407    && (register_operand (operands[0], <VDBL>mode)
4408        || register_operand (operands[2], <MODE>mode))"
4409   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
4410      [ w        , 0  , w   ; neon_ins<dblq>            , simd  ] ins\t%0.<single_type>[1], %2.<single_type>[0]
4411      [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ] ins\t%0.<single_type>[1], %<single_wx>2
4412      [ w        , 0  , ?r  ; f_mcr                     , *     ] fmov\t%0.d[1], %2
4413      [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ] ld1\t{%0.<single_type>}[1], %2
4414      [ Umn      , ?w , ?w  ; neon_stp                  , *     ] stp\t%<single_type>2, %<single_type>1, %y0
4415      [ Umn      , ?r , ?r  ; store_16                  , *     ] stp\t%<single_wx>2, %<single_wx>1, %y0
4416   }
4417 )
4418
4419 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4420 ;; dest vector.
4421
4422 (define_insn "*aarch64_combinez<mode>"
4423   [(set (match_operand:<VDBL> 0 "register_operand")
4424         (vec_concat:<VDBL>
4425           (match_operand:VDCSIF 1 "nonimmediate_operand")
4426           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
4427   "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
4428   {@ [ cons: =0 , 1  ; attrs: type      ]
4429      [ w        , w  ; neon_move<q>     ] fmov\t%<single_type>0, %<single_type>1
4430      [ w        , ?r ; neon_from_gp     ] fmov\t%<single_type>0, %<single_wx>1
4431      [ w        , m  ; neon_load1_1reg  ] ldr\t%<single_type>0, %1
4432   }
4433 )
4434
4435 (define_insn "*aarch64_combinez_be<mode>"
4436   [(set (match_operand:<VDBL> 0 "register_operand")
4437         (vec_concat:<VDBL>
4438           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
4439           (match_operand:VDCSIF 1 "nonimmediate_operand")))]
4440   "TARGET_FLOAT && BYTES_BIG_ENDIAN"
4441   {@ [ cons: =0 , 1  ; attrs: type      ]
4442      [ w        , w  ; neon_move<q>     ] fmov\t%<single_type>0, %<single_type>1
4443      [ w        , ?r ; neon_from_gp     ] fmov\t%<single_type>0, %<single_wx>1
4444      [ w        , m  ; neon_load1_1reg  ] ldr\t%<single_type>0, %1
4445   }
4446 )
4447
4448 ;; Form a vector whose first half (in array order) comes from operand 1
4449 ;; and whose second half (in array order) comes from operand 2.
4450 ;; This operand order follows the RTL vec_concat operation.
4451 (define_expand "@aarch64_vec_concat<mode>"
4452   [(set (match_operand:<VDBL> 0 "register_operand")
4453         (vec_concat:<VDBL>
4454           (match_operand:VDCSIF 1 "general_operand")
4455           (match_operand:VDCSIF 2 "general_operand")))]
4456   "TARGET_FLOAT"
4457 {
4458   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
4459   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
4460
4461   if (MEM_P (operands[1])
4462       && MEM_P (operands[2])
4463       && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
4464     /* Use load_pair_lanes<mode>.  */
4465     ;
4466   else if (operands[hi] == CONST0_RTX (<MODE>mode))
4467     {
4468       /* Use *aarch64_combinez<mode>.  */
4469       if (!nonimmediate_operand (operands[lo], <MODE>mode))
4470         operands[lo] = force_reg (<MODE>mode, operands[lo]);
4471     }
4472   else
4473     {
4474       /* Use *aarch64_combine_internal<mode>.  */
4475       operands[lo] = force_reg (<MODE>mode, operands[lo]);
4476       if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
4477         {
4478           if (MEM_P (operands[hi]))
4479             {
4480               rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
4481               operands[hi] = replace_equiv_address (operands[hi], addr);
4482             }
4483           else
4484             operands[hi] = force_reg (<MODE>mode, operands[hi]);
4485         }
4486     }
4487 })
4488
4489 ;; Form a vector whose least significant half comes from operand 1 and whose
4490 ;; most significant half comes from operand 2.  This operand order follows
4491 ;; arm_neon.h vcombine* intrinsics.
4492 (define_expand "aarch64_combine<mode>"
4493   [(match_operand:<VDBL> 0 "register_operand")
4494    (match_operand:VDC 1 "general_operand")
4495    (match_operand:VDC 2 "general_operand")]
4496   "TARGET_FLOAT"
4497 {
4498   if (BYTES_BIG_ENDIAN)
4499     std::swap (operands[1], operands[2]);
4500   emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
4501                                            operands[2]));
4502   DONE;
4503 }
4504 )
4505
4506 ;; <su><addsub>l<q>.
4507
4508 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4509  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4510        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4511                            (match_operand:VQW 1 "register_operand" "w")
4512                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4513                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4514                            (match_operand:VQW 2 "register_operand" "w")
4515                            (match_dup 3)))))]
4516   "TARGET_SIMD"
4517   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4518   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4519 )
4520
4521 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4522  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4523        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4524                            (match_operand:VQW 1 "register_operand" "w")
4525                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4526                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4527                            (match_operand:VQW 2 "register_operand" "w")
4528                            (match_dup 3)))))]
4529   "TARGET_SIMD"
4530   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4531   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4532 )
4533
4534 (define_expand "vec_widen_<su>add_lo_<mode>"
4535   [(match_operand:<VWIDE> 0 "register_operand")
4536    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4537    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4538   "TARGET_SIMD"
4539 {
4540   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4541   emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4542                                                      operands[2], p));
4543   DONE;
4544 })
4545
4546 (define_expand "vec_widen_<su>add_hi_<mode>"
4547   [(match_operand:<VWIDE> 0 "register_operand")
4548    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4549    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4550   "TARGET_SIMD"
4551 {
4552   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4553   emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4554                                                      operands[2], p));
4555   DONE;
4556 })
4557
4558 (define_expand "vec_widen_<su>sub_lo_<mode>"
4559   [(match_operand:<VWIDE> 0 "register_operand")
4560    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4561    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4562   "TARGET_SIMD"
4563 {
4564   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4565   emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4566                                                      operands[2], p));
4567   DONE;
4568 })
4569
4570 (define_expand "vec_widen_<su>sub_hi_<mode>"
4571   [(match_operand:<VWIDE> 0 "register_operand")
4572    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4573    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4574   "TARGET_SIMD"
4575 {
4576   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4577   emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4578                                                      operands[2], p));
4579   DONE;
4580 })
4581
4582 (define_expand "aarch64_saddl2<mode>"
4583   [(match_operand:<VWIDE> 0 "register_operand")
4584    (match_operand:VQW 1 "register_operand")
4585    (match_operand:VQW 2 "register_operand")]
4586   "TARGET_SIMD"
4587 {
4588   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4589   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4590                                                   operands[2], p));
4591   DONE;
4592 })
4593
4594 (define_expand "aarch64_uaddl2<mode>"
4595   [(match_operand:<VWIDE> 0 "register_operand")
4596    (match_operand:VQW 1 "register_operand")
4597    (match_operand:VQW 2 "register_operand")]
4598   "TARGET_SIMD"
4599 {
4600   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4601   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4602                                                   operands[2], p));
4603   DONE;
4604 })
4605
4606 (define_expand "aarch64_ssubl2<mode>"
4607   [(match_operand:<VWIDE> 0 "register_operand")
4608    (match_operand:VQW 1 "register_operand")
4609    (match_operand:VQW 2 "register_operand")]
4610   "TARGET_SIMD"
4611 {
4612   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4613   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4614                                                 operands[2], p));
4615   DONE;
4616 })
4617
4618 (define_expand "aarch64_usubl2<mode>"
4619   [(match_operand:<VWIDE> 0 "register_operand")
4620    (match_operand:VQW 1 "register_operand")
4621    (match_operand:VQW 2 "register_operand")]
4622   "TARGET_SIMD"
4623 {
4624   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4625   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4626                                                 operands[2], p));
4627   DONE;
4628 })
4629
4630 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4631  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4632        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4633                            (match_operand:VD_BHSI 1 "register_operand" "w"))
4634                        (ANY_EXTEND:<VWIDE>
4635                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4636   "TARGET_SIMD"
4637   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4638   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4639 )
4640
4641 ;; <su><addsub>w<q>.
4642
4643 (define_expand "widen_ssum<mode>3"
4644   [(set (match_operand:<VDBLW> 0 "register_operand")
4645         (plus:<VDBLW> (sign_extend:<VDBLW>
4646                         (match_operand:VQW 1 "register_operand"))
4647                       (match_operand:<VDBLW> 2 "register_operand")))]
4648   "TARGET_SIMD"
4649   {
4650     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4651     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4652
4653     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4654                                                 operands[1], p));
4655     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4656     DONE;
4657   }
4658 )
4659
4660 (define_expand "widen_ssum<mode>3"
4661   [(set (match_operand:<VWIDE> 0 "register_operand")
4662         (plus:<VWIDE> (sign_extend:<VWIDE>
4663                         (match_operand:VD_BHSI 1 "register_operand"))
4664                       (match_operand:<VWIDE> 2 "register_operand")))]
4665   "TARGET_SIMD"
4666 {
4667   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4668   DONE;
4669 })
4670
4671 (define_expand "widen_usum<mode>3"
4672   [(set (match_operand:<VDBLW> 0 "register_operand")
4673         (plus:<VDBLW> (zero_extend:<VDBLW>
4674                         (match_operand:VQW 1 "register_operand"))
4675                       (match_operand:<VDBLW> 2 "register_operand")))]
4676   "TARGET_SIMD"
4677   {
4678     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4679     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4680
4681     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4682                                                  operands[1], p));
4683     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4684     DONE;
4685   }
4686 )
4687
4688 (define_expand "widen_usum<mode>3"
4689   [(set (match_operand:<VWIDE> 0 "register_operand")
4690         (plus:<VWIDE> (zero_extend:<VWIDE>
4691                         (match_operand:VD_BHSI 1 "register_operand"))
4692                       (match_operand:<VWIDE> 2 "register_operand")))]
4693   "TARGET_SIMD"
4694 {
4695   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4696   DONE;
4697 })
4698
4699 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4700   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4701         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4702           (ANY_EXTEND:<VWIDE>
4703             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4704   "TARGET_SIMD"
4705   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4706   [(set_attr "type" "neon_sub_widen")]
4707 )
4708
4709 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4710   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4711         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4712           (ANY_EXTEND:<VWIDE>
4713             (vec_select:<VHALF>
4714               (match_operand:VQW 2 "register_operand" "w")
4715               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4716   "TARGET_SIMD"
4717   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4718   [(set_attr "type" "neon_sub_widen")]
4719 )
4720
4721 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4722   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4723         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4724           (ANY_EXTEND:<VWIDE>
4725             (vec_select:<VHALF>
4726               (match_operand:VQW 2 "register_operand" "w")
4727               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4728   "TARGET_SIMD"
4729   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4730   [(set_attr "type" "neon_sub_widen")]
4731 )
4732
4733 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4734   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4735         (plus:<VWIDE>
4736           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4737           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4738   "TARGET_SIMD"
4739   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4740   [(set_attr "type" "neon_add_widen")]
4741 )
4742
4743 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4744   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4745         (plus:<VWIDE>
4746           (ANY_EXTEND:<VWIDE>
4747             (vec_select:<VHALF>
4748               (match_operand:VQW 2 "register_operand" "w")
4749               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4750           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4751   "TARGET_SIMD"
4752   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4753   [(set_attr "type" "neon_add_widen")]
4754 )
4755
4756 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4757   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4758         (plus:<VWIDE>
4759           (ANY_EXTEND:<VWIDE>
4760             (vec_select:<VHALF>
4761               (match_operand:VQW 2 "register_operand" "w")
4762               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4763           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4764   "TARGET_SIMD"
4765   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4766   [(set_attr "type" "neon_add_widen")]
4767 )
4768
4769 (define_expand "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>"
4770   [(set (match_operand:<VWIDE> 0 "register_operand")
4771         (ADDSUB:<VWIDE>
4772           (ANY_EXTEND:<VWIDE>
4773             (vec_select:<VHALF>
4774               (match_operand:VQW 2 "register_operand")
4775               (match_dup 3)))
4776           (match_operand:<VWIDE> 1 "register_operand")))]
4777   "TARGET_SIMD"
4778 {
4779   /* We still do an emit_insn rather than relying on the pattern above
4780      because for the MINUS case the operands would need to be swapped
4781      around.  */
4782   operands[3]
4783     = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4784   emit_insn (gen_aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal(
4785                                                        operands[0],
4786                                                        operands[1],
4787                                                        operands[2],
4788                                                        operands[3]));
4789   DONE;
4790 })
4791
4792 ;; <su><r>h<addsub>.
4793
4794 (define_expand "<su_optab>avg<mode>3_floor"
4795   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4796         (truncate:VDQ_BHSI
4797           (ashiftrt:<V2XWIDE>
4798             (plus:<V2XWIDE>
4799               (ANY_EXTEND:<V2XWIDE>
4800                 (match_operand:VDQ_BHSI 1 "register_operand"))
4801               (ANY_EXTEND:<V2XWIDE>
4802                 (match_operand:VDQ_BHSI 2 "register_operand")))
4803             (match_dup 3))))]
4804   "TARGET_SIMD"
4805   {
4806     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4807   }
4808 )
4809
4810 (define_expand "<su_optab>avg<mode>3_ceil"
4811   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4812         (truncate:VDQ_BHSI
4813           (ashiftrt:<V2XWIDE>
4814             (plus:<V2XWIDE>
4815               (plus:<V2XWIDE>
4816                 (ANY_EXTEND:<V2XWIDE>
4817                   (match_operand:VDQ_BHSI 1 "register_operand"))
4818                 (ANY_EXTEND:<V2XWIDE>
4819                   (match_operand:VDQ_BHSI 2 "register_operand")))
4820                (match_dup 3))
4821             (match_dup 3))))]
4822   "TARGET_SIMD"
4823   {
4824     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4825   }
4826 )
4827
4828 (define_expand "aarch64_<su>hsub<mode>"
4829   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4830         (truncate:VDQ_BHSI
4831           (ashiftrt:<V2XWIDE>
4832             (minus:<V2XWIDE>
4833               (ANY_EXTEND:<V2XWIDE>
4834                 (match_operand:VDQ_BHSI 1 "register_operand"))
4835               (ANY_EXTEND:<V2XWIDE>
4836                 (match_operand:VDQ_BHSI 2 "register_operand")))
4837             (match_dup 3))))]
4838   "TARGET_SIMD"
4839   {
4840     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4841   }
4842 )
4843
4844 (define_insn "*aarch64_<su>h<ADDSUB:optab><mode><vczle><vczbe>_insn"
4845   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4846         (truncate:VDQ_BHSI
4847           (ashiftrt:<V2XWIDE>
4848             (ADDSUB:<V2XWIDE>
4849               (ANY_EXTEND:<V2XWIDE>
4850                 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4851               (ANY_EXTEND:<V2XWIDE>
4852                 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4853             (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))))]
4854   "TARGET_SIMD"
4855   "<su>h<ADDSUB:optab>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4856   [(set_attr "type" "neon_<ADDSUB:optab>_halve<q>")]
4857 )
4858
4859 (define_insn "*aarch64_<su>rhadd<mode><vczle><vczbe>_insn"
4860   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4861         (truncate:VDQ_BHSI
4862           (ashiftrt:<V2XWIDE>
4863             (plus:<V2XWIDE>
4864               (plus:<V2XWIDE>
4865                 (ANY_EXTEND:<V2XWIDE>
4866                   (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4867                 (ANY_EXTEND:<V2XWIDE>
4868                   (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4869                (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))
4870             (match_dup 3))))]
4871   "TARGET_SIMD"
4872   "<su>rhadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4873   [(set_attr "type" "neon_add_halve<q>")]
4874 )
4875
4876 ;; <r><addsub>hn<q>.
4877
4878 (define_insn "aarch64_<optab>hn<mode>_insn<vczle><vczbe>"
4879   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4880         (truncate:<VNARROWQ>
4881           (ashiftrt:VQN
4882             (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4883                         (match_operand:VQN 2 "register_operand" "w"))
4884             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top"))))]
4885   "TARGET_SIMD"
4886   "<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4887   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4888 )
4889
4890 (define_insn "aarch64_r<optab>hn<mode>_insn<vczle><vczbe>"
4891   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4892         (truncate:<VNARROWQ>
4893           (ashiftrt:VQN
4894             (plus:VQN
4895               (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4896                           (match_operand:VQN 2 "register_operand" "w"))
4897               (match_operand:VQN 3 "aarch64_simd_raddsubhn_imm_vec"))
4898             (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top"))))]
4899   "TARGET_SIMD"
4900   "r<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4901   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4902 )
4903
4904 (define_expand "aarch64_<optab>hn<mode>"
4905   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4906         (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4907                     (match_operand:VQN 2 "register_operand")))]
4908   "TARGET_SIMD"
4909   {
4910     rtx shft
4911       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4912                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4913     emit_insn (gen_aarch64_<optab>hn<mode>_insn (operands[0], operands[1],
4914                                                  operands[2], shft));
4915     DONE;
4916   }
4917 )
4918
4919 (define_expand "aarch64_r<optab>hn<mode>"
4920   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4921         (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4922                     (match_operand:VQN 2 "register_operand")))]
4923   "TARGET_SIMD"
4924   {
4925     rtx shft
4926       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4927                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4928     rtx rnd
4929       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4930         HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4931     emit_insn (gen_aarch64_r<optab>hn<mode>_insn (operands[0], operands[1],
4932                                                   operands[2], rnd, shft));
4933     DONE;
4934   }
4935 )
4936
4937 (define_insn "aarch64_<optab>hn2<mode>_insn_le"
4938   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4939         (vec_concat:<VNARROWQ2>
4940           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4941           (truncate:<VNARROWQ>
4942             (ashiftrt:VQN
4943               (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4944                           (match_operand:VQN 3 "register_operand" "w"))
4945               (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))))]
4946   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4947   "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4948   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4949 )
4950
4951 (define_insn "aarch64_r<optab>hn2<mode>_insn_le"
4952   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4953         (vec_concat:<VNARROWQ2>
4954           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4955           (truncate:<VNARROWQ>
4956             (ashiftrt:VQN
4957               (plus:VQN
4958                 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4959                             (match_operand:VQN 3 "register_operand" "w"))
4960                 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4961               (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))))]
4962   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4963   "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4964   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4965 )
4966
4967 (define_insn "aarch64_<optab>hn2<mode>_insn_be"
4968   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4969         (vec_concat:<VNARROWQ2>
4970           (truncate:<VNARROWQ>
4971             (ashiftrt:VQN
4972               (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4973                           (match_operand:VQN 3 "register_operand" "w"))
4974               (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))
4975           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4976   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4977   "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4978   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4979 )
4980
4981 (define_insn "aarch64_r<optab>hn2<mode>_insn_be"
4982   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4983         (vec_concat:<VNARROWQ2>
4984           (truncate:<VNARROWQ>
4985             (ashiftrt:VQN
4986               (plus:VQN
4987                 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4988                             (match_operand:VQN 3 "register_operand" "w"))
4989                 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4990               (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))
4991           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4992   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4993   "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4994   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4995 )
4996
4997 (define_expand "aarch64_<optab>hn2<mode>"
4998   [(match_operand:<VNARROWQ2> 0 "register_operand")
4999    (match_operand:<VNARROWQ> 1 "register_operand")
5000    (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
5001                (match_operand:VQN 3 "register_operand"))]
5002   "TARGET_SIMD"
5003   {
5004     rtx shft
5005       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5006                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
5007     if (BYTES_BIG_ENDIAN)
5008       emit_insn (gen_aarch64_<optab>hn2<mode>_insn_be (operands[0],
5009                                 operands[1], operands[2], operands[3], shft));
5010     else
5011       emit_insn (gen_aarch64_<optab>hn2<mode>_insn_le (operands[0],
5012                                 operands[1], operands[2], operands[3], shft));
5013     DONE;
5014   }
5015 )
5016
5017 (define_expand "aarch64_r<optab>hn2<mode>"
5018   [(match_operand:<VNARROWQ2> 0 "register_operand")
5019    (match_operand:<VNARROWQ> 1 "register_operand")
5020    (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
5021                (match_operand:VQN 3 "register_operand"))]
5022   "TARGET_SIMD"
5023   {
5024     rtx shft
5025       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5026                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
5027     rtx rnd
5028       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5029         HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
5030     if (BYTES_BIG_ENDIAN)
5031       emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_be (operands[0],
5032                                 operands[1], operands[2], operands[3], rnd, shft));
5033     else
5034       emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_le (operands[0],
5035                                 operands[1], operands[2], operands[3], rnd, shft));
5036     DONE;
5037   }
5038 )
5039
5040 ;; Optimize ((a + b) >> n) + c where n is half the bitsize of the vector
5041 (define_insn_and_split "*bitmask_shift_plus<mode>"
5042   [(set (match_operand:VQN 0 "register_operand" "=&w")
5043         (plus:VQN
5044           (lshiftrt:VQN
5045             (plus:VQN (match_operand:VQN 1 "register_operand" "w")
5046                       (match_operand:VQN 2 "register_operand" "w"))
5047             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top" ""))
5048           (match_operand:VQN 4 "register_operand" "w")))]
5049   "TARGET_SIMD"
5050   "#"
5051   "&& true"
5052   [(const_int 0)]
5053 {
5054   rtx tmp;
5055   if (can_create_pseudo_p ())
5056     tmp = gen_reg_rtx (<VNARROWQ>mode);
5057   else
5058     tmp = gen_rtx_REG (<VNARROWQ>mode, REGNO (operands[0]));
5059   emit_insn (gen_aarch64_addhn<mode> (tmp, operands[1], operands[2]));
5060   emit_insn (gen_aarch64_uaddw<Vnarrowq> (operands[0], operands[4], tmp));
5061   DONE;
5062 })
5063
5064 ;; pmul.
5065
5066 (define_insn "aarch64_pmul<mode>"
5067   [(set (match_operand:VB 0 "register_operand" "=w")
5068         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
5069                     (match_operand:VB 2 "register_operand" "w")]
5070                    UNSPEC_PMUL))]
5071  "TARGET_SIMD"
5072  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5073   [(set_attr "type" "neon_mul_<Vetype><q>")]
5074 )
5075
5076 (define_insn "aarch64_pmullv8qi"
5077   [(set (match_operand:V8HI 0 "register_operand" "=w")
5078         (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
5079                       (match_operand:V8QI 2 "register_operand" "w")]
5080                      UNSPEC_PMULL))]
5081  "TARGET_SIMD"
5082  "pmull\\t%0.8h, %1.8b, %2.8b"
5083   [(set_attr "type" "neon_mul_b_long")]
5084 )
5085
5086 (define_insn "aarch64_pmull_hiv16qi_insn"
5087   [(set (match_operand:V8HI 0 "register_operand" "=w")
5088         (unspec:V8HI
5089           [(vec_select:V8QI
5090              (match_operand:V16QI 1 "register_operand" "w")
5091              (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
5092            (vec_select:V8QI
5093              (match_operand:V16QI 2 "register_operand" "w")
5094              (match_dup 3))]
5095           UNSPEC_PMULL))]
5096  "TARGET_SIMD"
5097  "pmull2\\t%0.8h, %1.16b, %2.16b"
5098   [(set_attr "type" "neon_mul_b_long")]
5099 )
5100
5101 (define_expand "aarch64_pmull_hiv16qi"
5102   [(match_operand:V8HI 0 "register_operand")
5103    (match_operand:V16QI 1 "register_operand")
5104    (match_operand:V16QI 2 "register_operand")]
5105  "TARGET_SIMD"
5106  {
5107    rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
5108    emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
5109                                               operands[2], p));
5110    DONE;
5111  }
5112 )
5113
5114 ;; fmulx.
5115
5116 (define_insn "aarch64_fmulx<mode>"
5117   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5118         (unspec:VHSDF_HSDF
5119           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5120            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5121            UNSPEC_FMULX))]
5122  "TARGET_SIMD"
5123  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5124  [(set_attr "type" "neon_fp_mul_<stype>")]
5125 )
5126
5127 ;; vmulxq_lane_f32, and vmulx_laneq_f32
5128
5129 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
5130   [(set (match_operand:VDQSF 0 "register_operand" "=w")
5131         (unspec:VDQSF
5132          [(match_operand:VDQSF 1 "register_operand" "w")
5133           (vec_duplicate:VDQSF
5134            (vec_select:<VEL>
5135             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
5136             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5137          UNSPEC_FMULX))]
5138   "TARGET_SIMD"
5139   {
5140     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
5141     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5142   }
5143   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
5144 )
5145
5146 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
5147
5148 (define_insn "*aarch64_mulx_elt<mode>"
5149   [(set (match_operand:VDQF 0 "register_operand" "=w")
5150         (unspec:VDQF
5151          [(match_operand:VDQF 1 "register_operand" "w")
5152           (vec_duplicate:VDQF
5153            (vec_select:<VEL>
5154             (match_operand:VDQF 2 "register_operand" "w")
5155             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5156          UNSPEC_FMULX))]
5157   "TARGET_SIMD"
5158   {
5159     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5160     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5161   }
5162   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
5163 )
5164
5165 ;; vmulxq_lane
5166
5167 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
5168   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5169         (unspec:VHSDF
5170          [(match_operand:VHSDF 1 "register_operand" "w")
5171           (vec_duplicate:VHSDF
5172             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5173          UNSPEC_FMULX))]
5174   "TARGET_SIMD"
5175   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
5176   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
5177 )
5178
5179 ;; vmulxs_lane_f32, vmulxs_laneq_f32
5180 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
5181 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
5182
5183 (define_insn "*aarch64_vgetfmulx<mode>"
5184   [(set (match_operand:<VEL> 0 "register_operand" "=w")
5185         (unspec:<VEL>
5186          [(match_operand:<VEL> 1 "register_operand" "w")
5187           (vec_select:<VEL>
5188            (match_operand:VDQF 2 "register_operand" "w")
5189             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5190          UNSPEC_FMULX))]
5191   "TARGET_SIMD"
5192   {
5193     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5194     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5195   }
5196   [(set_attr "type" "fmul<Vetype>")]
5197 )
5198 ;; <su>q<addsub>
5199
5200 (define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
5201   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5202         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
5203                         (match_operand:VSDQ_I 2 "register_operand" "w")))]
5204   "TARGET_SIMD"
5205   "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5206   [(set_attr "type" "neon_q<addsub><q>")]
5207 )
5208
5209 ;; suqadd and usqadd
5210
5211 (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
5212   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5213         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5214                         (match_operand:VSDQ_I 2 "register_operand" "w")]
5215                        USSUQADD))]
5216   "TARGET_SIMD"
5217   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5218   [(set_attr "type" "neon_qadd<q>")]
5219 )
5220
5221 ;; sqmovn and uqmovn
5222
5223 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5224   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5225         (SAT_TRUNC:<VNARROWQ>
5226           (match_operand:SD_HSDI 1 "register_operand" "w")))]
5227   "TARGET_SIMD"
5228   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5229   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5230 )
5231
5232 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5233   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5234         (SAT_TRUNC:<VNARROWQ>
5235           (match_operand:VQN 1 "register_operand" "w")))]
5236   "TARGET_SIMD"
5237   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5238   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5239 )
5240
5241 (define_insn "aarch64_<su>qxtn2<mode>_le"
5242   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5243         (vec_concat:<VNARROWQ2>
5244           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5245           (SAT_TRUNC:<VNARROWQ>
5246             (match_operand:VQN 2 "register_operand" "w"))))]
5247   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5248   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5249    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5250 )
5251
5252 (define_insn "aarch64_<su>qxtn2<mode>_be"
5253   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5254         (vec_concat:<VNARROWQ2>
5255           (SAT_TRUNC:<VNARROWQ>
5256             (match_operand:VQN 2 "register_operand" "w"))
5257           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5258   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5259   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5260    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5261 )
5262
5263 (define_expand "aarch64_<su>qxtn2<mode>"
5264   [(match_operand:<VNARROWQ2> 0 "register_operand")
5265    (match_operand:<VNARROWQ> 1 "register_operand")
5266    (SAT_TRUNC:<VNARROWQ>
5267      (match_operand:VQN 2 "register_operand"))]
5268   "TARGET_SIMD"
5269   {
5270     if (BYTES_BIG_ENDIAN)
5271       emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5272                                                  operands[2]));
5273     else
5274       emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5275                                                  operands[2]));
5276     DONE;
5277   }
5278 )
5279
5280 ;; sqmovun
5281
5282 (define_insn "aarch64_sqmovun<mode>"
5283   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5284         (truncate:<VNARROWQ>
5285           (smin:SD_HSDI
5286             (smax:SD_HSDI
5287               (match_operand:SD_HSDI 1 "register_operand" "w")
5288               (const_int 0))
5289             (const_int <half_mask>))))]
5290    "TARGET_SIMD"
5291    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5292    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5293 )
5294
5295 (define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
5296   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5297         (truncate:<VNARROWQ>
5298           (smin:VQN
5299             (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5300                       (match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
5301             (match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
5302   "TARGET_SIMD"
5303   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5304   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5305 )
5306
5307 (define_expand "aarch64_sqmovun<mode>"
5308   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5309         (truncate:<VNARROWQ>
5310           (smin:VQN
5311             (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5312                       (match_dup 2))
5313             (match_dup 3))))]
5314   "TARGET_SIMD"
5315   {
5316     operands[2] = CONST0_RTX (<MODE>mode);
5317     operands[3]
5318       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5319                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5320   }
5321 )
5322
5323 (define_insn "aarch64_sqxtun2<mode>_le"
5324   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5325         (vec_concat:<VNARROWQ2>
5326           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5327           (truncate:<VNARROWQ>
5328             (smin:VQN
5329               (smax:VQN
5330                 (match_operand:VQN 2 "register_operand" "w")
5331                 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5332               (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
5333   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5334   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5335    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5336 )
5337
5338 (define_insn "aarch64_sqxtun2<mode>_be"
5339   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5340         (vec_concat:<VNARROWQ2>
5341           (truncate:<VNARROWQ>
5342             (smin:VQN
5343               (smax:VQN
5344                 (match_operand:VQN 2 "register_operand" "w")
5345                 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5346               (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
5347           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5348   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5349   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5350    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5351 )
5352
5353 (define_expand "aarch64_sqxtun2<mode>"
5354   [(match_operand:<VNARROWQ2> 0 "register_operand")
5355    (match_operand:<VNARROWQ> 1 "register_operand")
5356    (match_operand:VQN 2 "register_operand")]
5357   "TARGET_SIMD"
5358   {
5359     rtx zeros = CONST0_RTX (<MODE>mode);
5360     rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5361                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5362     if (BYTES_BIG_ENDIAN)
5363       emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5364                                                operands[2], zeros, half_umax));
5365     else
5366       emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5367                                                operands[2], zeros, half_umax));
5368     DONE;
5369   }
5370 )
5371
5372 ;; <su>q<absneg>
5373
5374 (define_insn "aarch64_s<optab><mode><vczle><vczbe>"
5375   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5376         (UNQOPS:VSDQ_I
5377           (match_operand:VSDQ_I 1 "register_operand" "w")))]
5378   "TARGET_SIMD"
5379   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5380   [(set_attr "type" "neon_<optab><q>")]
5381 )
5382
5383 ;; sq<r>dmulh.
5384
5385 (define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
5386   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5387         (unspec:VSDQ_HSI
5388           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5389            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5390          VQDMULH))]
5391   "TARGET_SIMD"
5392   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5393   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5394 )
5395
5396 (define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
5397   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5398         (unspec:VDQHS
5399           [(match_operand:VDQHS 1 "register_operand" "w")
5400            (vec_duplicate:VDQHS
5401              (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5402          VQDMULH))]
5403   "TARGET_SIMD"
5404   "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5405   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5406 )
5407
5408 ;; sq<r>dmulh_lane
5409
5410 (define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
5411   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5412         (unspec:VDQHS
5413           [(match_operand:VDQHS 1 "register_operand" "w")
5414            (vec_select:<VEL>
5415              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5416              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5417          VQDMULH))]
5418   "TARGET_SIMD"
5419   "*
5420    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5421    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5422   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5423 )
5424
5425 (define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
5426   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5427         (unspec:VDQHS
5428           [(match_operand:VDQHS 1 "register_operand" "w")
5429            (vec_select:<VEL>
5430              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5431              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5432          VQDMULH))]
5433   "TARGET_SIMD"
5434   "*
5435    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5436    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5437   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5438 )
5439
5440 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5441   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5442         (unspec:SD_HSI
5443           [(match_operand:SD_HSI 1 "register_operand" "w")
5444            (vec_select:<VEL>
5445              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5446              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5447          VQDMULH))]
5448   "TARGET_SIMD"
5449   "*
5450    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5451    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5452   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5453 )
5454
5455 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5456   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5457         (unspec:SD_HSI
5458           [(match_operand:SD_HSI 1 "register_operand" "w")
5459            (vec_select:<VEL>
5460              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5461              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5462          VQDMULH))]
5463   "TARGET_SIMD"
5464   "*
5465    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5466    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5467   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5468 )
5469
5470 ;; sqrdml[as]h.
5471
5472 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
5473   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5474         (unspec:VSDQ_HSI
5475           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5476            (match_operand:VSDQ_HSI 2 "register_operand" "w")
5477            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5478           SQRDMLH_AS))]
5479    "TARGET_SIMD_RDMA"
5480    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5481    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5482 )
5483
5484 ;; sqrdml[as]h_lane.
5485
5486 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5487   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5488         (unspec:VDQHS
5489           [(match_operand:VDQHS 1 "register_operand" "0")
5490            (match_operand:VDQHS 2 "register_operand" "w")
5491            (vec_select:<VEL>
5492              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5493              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5494           SQRDMLH_AS))]
5495    "TARGET_SIMD_RDMA"
5496    {
5497      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5498      return
5499       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5500    }
5501    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5502 )
5503
5504 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5505   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5506         (unspec:SD_HSI
5507           [(match_operand:SD_HSI 1 "register_operand" "0")
5508            (match_operand:SD_HSI 2 "register_operand" "w")
5509            (vec_select:<VEL>
5510              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5511              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5512           SQRDMLH_AS))]
5513    "TARGET_SIMD_RDMA"
5514    {
5515      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5516      return
5517       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5518    }
5519    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5520 )
5521
5522 ;; sqrdml[as]h_laneq.
5523
5524 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5525   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5526         (unspec:VDQHS
5527           [(match_operand:VDQHS 1 "register_operand" "0")
5528            (match_operand:VDQHS 2 "register_operand" "w")
5529            (vec_select:<VEL>
5530              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5531              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5532           SQRDMLH_AS))]
5533    "TARGET_SIMD_RDMA"
5534    {
5535      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5536      return
5537       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5538    }
5539    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5540 )
5541
5542 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5543   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5544         (unspec:SD_HSI
5545           [(match_operand:SD_HSI 1 "register_operand" "0")
5546            (match_operand:SD_HSI 2 "register_operand" "w")
5547            (vec_select:<VEL>
5548              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5549              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5550           SQRDMLH_AS))]
5551    "TARGET_SIMD_RDMA"
5552    {
5553      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5554      return
5555       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5556    }
5557    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5558 )
5559
5560 ;; vqdml[sa]l
5561
5562 (define_insn "aarch64_sqdmlal<mode>"
5563   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5564         (ss_plus:<VWIDE>
5565           (ss_ashift:<VWIDE>
5566               (mult:<VWIDE>
5567                 (sign_extend:<VWIDE>
5568                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5569                 (sign_extend:<VWIDE>
5570                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5571               (const_int 1))
5572           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5573   "TARGET_SIMD"
5574   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5575   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5576 )
5577
5578 (define_insn "aarch64_sqdmlsl<mode>"
5579   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5580         (ss_minus:<VWIDE>
5581           (match_operand:<VWIDE> 1 "register_operand" "0")
5582           (ss_ashift:<VWIDE>
5583               (mult:<VWIDE>
5584                 (sign_extend:<VWIDE>
5585                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5586                 (sign_extend:<VWIDE>
5587                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5588               (const_int 1))))]
5589   "TARGET_SIMD"
5590   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5591   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5592 )
5593
5594 ;; vqdml[sa]l_lane
5595
5596 (define_insn "aarch64_sqdmlal_lane<mode>"
5597   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5598         (ss_plus:<VWIDE>
5599           (ss_ashift:<VWIDE>
5600             (mult:<VWIDE>
5601               (sign_extend:<VWIDE>
5602                 (match_operand:VD_HSI 2 "register_operand" "w"))
5603               (vec_duplicate:<VWIDE>
5604                 (sign_extend:<VWIDE_S>
5605                   (vec_select:<VEL>
5606                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5607                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5608               ))
5609             (const_int 1))
5610           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5611   "TARGET_SIMD"
5612   {
5613     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5614     return
5615       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5616   }
5617   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5618 )
5619
5620 (define_insn "aarch64_sqdmlsl_lane<mode>"
5621   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5622         (ss_minus:<VWIDE>
5623           (match_operand:<VWIDE> 1 "register_operand" "0")
5624           (ss_ashift:<VWIDE>
5625             (mult:<VWIDE>
5626               (sign_extend:<VWIDE>
5627                 (match_operand:VD_HSI 2 "register_operand" "w"))
5628               (vec_duplicate:<VWIDE>
5629                 (sign_extend:<VWIDE_S>
5630                   (vec_select:<VEL>
5631                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5632                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5633               ))
5634             (const_int 1))))]
5635   "TARGET_SIMD"
5636   {
5637     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5638     return
5639       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5640   }
5641   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5642 )
5643
5644
5645 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5646   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5647         (ss_minus:<VWIDE>
5648           (match_operand:<VWIDE> 1 "register_operand" "0")
5649           (ss_ashift:<VWIDE>
5650             (mult:<VWIDE>
5651               (sign_extend:<VWIDE>
5652                 (match_operand:VD_HSI 2 "register_operand" "w"))
5653               (vec_duplicate:<VWIDE>
5654                 (sign_extend:<VWIDE_S>
5655                   (vec_select:<VEL>
5656                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5657                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5658               ))
5659             (const_int 1))))]
5660   "TARGET_SIMD"
5661   {
5662     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5663     return
5664       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5665   }
5666   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5667 )
5668
5669 (define_insn "aarch64_sqdmlal_laneq<mode>"
5670   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5671         (ss_plus:<VWIDE>
5672           (ss_ashift:<VWIDE>
5673             (mult:<VWIDE>
5674               (sign_extend:<VWIDE>
5675                 (match_operand:VD_HSI 2 "register_operand" "w"))
5676               (vec_duplicate:<VWIDE>
5677                 (sign_extend:<VWIDE_S>
5678                   (vec_select:<VEL>
5679                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5680                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5681               ))
5682             (const_int 1))
5683           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5684   "TARGET_SIMD"
5685   {
5686     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5687     return
5688       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5689   }
5690   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5691 )
5692
5693
5694 (define_insn "aarch64_sqdmlal_lane<mode>"
5695   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5696         (ss_plus:<VWIDE>
5697           (ss_ashift:<VWIDE>
5698             (mult:<VWIDE>
5699               (sign_extend:<VWIDE>
5700                 (match_operand:SD_HSI 2 "register_operand" "w"))
5701               (sign_extend:<VWIDE>
5702                 (vec_select:<VEL>
5703                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5704                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5705               )
5706             (const_int 1))
5707           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5708   "TARGET_SIMD"
5709   {
5710     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5711     return
5712       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5713   }
5714   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5715 )
5716
5717 (define_insn "aarch64_sqdmlsl_lane<mode>"
5718   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5719         (ss_minus:<VWIDE>
5720           (match_operand:<VWIDE> 1 "register_operand" "0")
5721           (ss_ashift:<VWIDE>
5722             (mult:<VWIDE>
5723               (sign_extend:<VWIDE>
5724                 (match_operand:SD_HSI 2 "register_operand" "w"))
5725               (sign_extend:<VWIDE>
5726                 (vec_select:<VEL>
5727                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5728                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5729               )
5730             (const_int 1))))]
5731   "TARGET_SIMD"
5732   {
5733     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5734     return
5735       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5736   }
5737   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5738 )
5739
5740
5741 (define_insn "aarch64_sqdmlal_laneq<mode>"
5742   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5743         (ss_plus:<VWIDE>
5744           (ss_ashift:<VWIDE>
5745             (mult:<VWIDE>
5746               (sign_extend:<VWIDE>
5747                 (match_operand:SD_HSI 2 "register_operand" "w"))
5748               (sign_extend:<VWIDE>
5749                 (vec_select:<VEL>
5750                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5751                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5752               )
5753             (const_int 1))
5754           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5755   "TARGET_SIMD"
5756   {
5757     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5758     return
5759       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5760   }
5761   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5762 )
5763
5764 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5765   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5766         (ss_minus:<VWIDE>
5767           (match_operand:<VWIDE> 1 "register_operand" "0")
5768           (ss_ashift:<VWIDE>
5769             (mult:<VWIDE>
5770               (sign_extend:<VWIDE>
5771                 (match_operand:SD_HSI 2 "register_operand" "w"))
5772               (sign_extend:<VWIDE>
5773                 (vec_select:<VEL>
5774                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5775                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5776               )
5777             (const_int 1))))]
5778   "TARGET_SIMD"
5779   {
5780     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5781     return
5782       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5783   }
5784   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5785 )
5786
5787 ;; vqdml[sa]l_n
5788
5789 (define_insn "aarch64_sqdmlsl_n<mode>"
5790   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5791         (ss_minus:<VWIDE>
5792           (match_operand:<VWIDE> 1 "register_operand" "0")
5793           (ss_ashift:<VWIDE>
5794               (mult:<VWIDE>
5795                 (sign_extend:<VWIDE>
5796                       (match_operand:VD_HSI 2 "register_operand" "w"))
5797                 (vec_duplicate:<VWIDE>
5798                   (sign_extend:<VWIDE_S>
5799                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5800               (const_int 1))))]
5801   "TARGET_SIMD"
5802   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5803   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5804 )
5805
5806 (define_insn "aarch64_sqdmlal_n<mode>"
5807   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5808         (ss_plus:<VWIDE>
5809           (ss_ashift:<VWIDE>
5810               (mult:<VWIDE>
5811                 (sign_extend:<VWIDE>
5812                       (match_operand:VD_HSI 2 "register_operand" "w"))
5813                 (vec_duplicate:<VWIDE>
5814                   (sign_extend:<VWIDE_S>
5815                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5816               (const_int 1))
5817           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5818   "TARGET_SIMD"
5819   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5820   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5821 )
5822
5823
5824 ;; sqdml[as]l2
5825
5826 (define_insn "aarch64_sqdmlal2<mode>_internal"
5827   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5828         (ss_plus:<VWIDE>
5829          (ss_ashift:<VWIDE>
5830              (mult:<VWIDE>
5831                (sign_extend:<VWIDE>
5832                  (vec_select:<VHALF>
5833                      (match_operand:VQ_HSI 2 "register_operand" "w")
5834                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5835                (sign_extend:<VWIDE>
5836                  (vec_select:<VHALF>
5837                      (match_operand:VQ_HSI 3 "register_operand" "w")
5838                      (match_dup 4))))
5839              (const_int 1))
5840           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5841   "TARGET_SIMD"
5842   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5843   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5844 )
5845
5846 (define_insn "aarch64_sqdmlsl2<mode>_internal"
5847   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5848         (ss_minus:<VWIDE>
5849          (match_operand:<VWIDE> 1 "register_operand" "0")
5850          (ss_ashift:<VWIDE>
5851              (mult:<VWIDE>
5852                (sign_extend:<VWIDE>
5853                  (vec_select:<VHALF>
5854                      (match_operand:VQ_HSI 2 "register_operand" "w")
5855                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5856                (sign_extend:<VWIDE>
5857                  (vec_select:<VHALF>
5858                      (match_operand:VQ_HSI 3 "register_operand" "w")
5859                      (match_dup 4))))
5860              (const_int 1))))]
5861   "TARGET_SIMD"
5862   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5863   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5864 )
5865
5866 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
5867   [(match_operand:<VWIDE> 0 "register_operand")
5868    (SBINQOPS:<VWIDE>
5869      (match_operand:<VWIDE> 1 "register_operand")
5870      (match_dup 1))
5871    (match_operand:VQ_HSI 2 "register_operand")
5872    (match_operand:VQ_HSI 3 "register_operand")]
5873   "TARGET_SIMD"
5874 {
5875   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5876   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
5877                                                 operands[1], operands[2],
5878                                                 operands[3], p));
5879   DONE;
5880 })
5881
5882 ;; vqdml[sa]l2_lane
5883
5884 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
5885   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5886         (ss_minus:<VWIDE>
5887           (match_operand:<VWIDE> 1 "register_operand" "0")
5888           (ss_ashift:<VWIDE>
5889               (mult:<VWIDE>
5890                 (sign_extend:<VWIDE>
5891                   (vec_select:<VHALF>
5892                     (match_operand:VQ_HSI 2 "register_operand" "w")
5893                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5894                 (vec_duplicate:<VWIDE>
5895                   (sign_extend:<VWIDE_S>
5896                     (vec_select:<VEL>
5897                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5898                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5899                     ))))
5900               (const_int 1))))]
5901   "TARGET_SIMD"
5902   {
5903     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5904     return
5905      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5906   }
5907   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5908 )
5909
5910 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
5911   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5912         (ss_plus:<VWIDE>
5913           (ss_ashift:<VWIDE>
5914               (mult:<VWIDE>
5915                 (sign_extend:<VWIDE>
5916                   (vec_select:<VHALF>
5917                     (match_operand:VQ_HSI 2 "register_operand" "w")
5918                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5919                 (vec_duplicate:<VWIDE>
5920                   (sign_extend:<VWIDE_S>
5921                     (vec_select:<VEL>
5922                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5923                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5924                     ))))
5925               (const_int 1))
5926           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5927   "TARGET_SIMD"
5928   {
5929     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5930     return
5931      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5932   }
5933   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5934 )
5935
5936 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
5937   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5938         (ss_minus:<VWIDE>
5939           (match_operand:<VWIDE> 1 "register_operand" "0")
5940           (ss_ashift:<VWIDE>
5941               (mult:<VWIDE>
5942                 (sign_extend:<VWIDE>
5943                   (vec_select:<VHALF>
5944                     (match_operand:VQ_HSI 2 "register_operand" "w")
5945                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5946                 (vec_duplicate:<VWIDE>
5947                   (sign_extend:<VWIDE_S>
5948                     (vec_select:<VEL>
5949                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5950                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5951                     ))))
5952               (const_int 1))))]
5953   "TARGET_SIMD"
5954   {
5955     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5956     return
5957      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5958   }
5959   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5960 )
5961
5962 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
5963   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5964         (ss_plus:<VWIDE>
5965           (ss_ashift:<VWIDE>
5966               (mult:<VWIDE>
5967                 (sign_extend:<VWIDE>
5968                   (vec_select:<VHALF>
5969                     (match_operand:VQ_HSI 2 "register_operand" "w")
5970                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5971                 (vec_duplicate:<VWIDE>
5972                   (sign_extend:<VWIDE_S>
5973                     (vec_select:<VEL>
5974                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5975                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5976                     ))))
5977               (const_int 1))
5978           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5979   "TARGET_SIMD"
5980   {
5981     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5982     return
5983      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5984   }
5985   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5986 )
5987
5988 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
5989   [(match_operand:<VWIDE> 0 "register_operand")
5990    (SBINQOPS:<VWIDE>
5991      (match_operand:<VWIDE> 1 "register_operand")
5992      (match_dup 1))
5993    (match_operand:VQ_HSI 2 "register_operand")
5994    (match_operand:<VCOND> 3 "register_operand")
5995    (match_operand:SI 4 "immediate_operand")]
5996   "TARGET_SIMD"
5997 {
5998   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5999   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
6000                                                 operands[1], operands[2],
6001                                                 operands[3], operands[4], p));
6002   DONE;
6003 })
6004
6005 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
6006   [(match_operand:<VWIDE> 0 "register_operand")
6007    (SBINQOPS:<VWIDE>
6008      (match_operand:<VWIDE> 1 "register_operand")
6009      (match_dup 1))
6010    (match_operand:VQ_HSI 2 "register_operand")
6011    (match_operand:<VCONQ> 3 "register_operand")
6012    (match_operand:SI 4 "immediate_operand")]
6013   "TARGET_SIMD"
6014 {
6015   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6016   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
6017                                                 operands[1], operands[2],
6018                                                 operands[3], operands[4], p));
6019   DONE;
6020 })
6021
6022 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
6023   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6024         (ss_minus:<VWIDE>
6025           (match_operand:<VWIDE> 1 "register_operand" "0")
6026           (ss_ashift:<VWIDE>
6027             (mult:<VWIDE>
6028               (sign_extend:<VWIDE>
6029                 (vec_select:<VHALF>
6030                   (match_operand:VQ_HSI 2 "register_operand" "w")
6031                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6032               (vec_duplicate:<VWIDE>
6033                 (sign_extend:<VWIDE_S>
6034                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6035             (const_int 1))))]
6036   "TARGET_SIMD"
6037   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6038   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6039 )
6040
6041 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
6042   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6043         (ss_plus:<VWIDE>
6044           (ss_ashift:<VWIDE>
6045             (mult:<VWIDE>
6046               (sign_extend:<VWIDE>
6047                 (vec_select:<VHALF>
6048                   (match_operand:VQ_HSI 2 "register_operand" "w")
6049                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6050               (vec_duplicate:<VWIDE>
6051                 (sign_extend:<VWIDE_S>
6052                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6053             (const_int 1))
6054           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6055   "TARGET_SIMD"
6056   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6057   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6058 )
6059
6060 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
6061   [(match_operand:<VWIDE> 0 "register_operand")
6062    (SBINQOPS:<VWIDE>
6063      (match_operand:<VWIDE> 1 "register_operand")
6064      (match_dup 1))
6065    (match_operand:VQ_HSI 2 "register_operand")
6066    (match_operand:<VEL> 3 "register_operand")]
6067   "TARGET_SIMD"
6068 {
6069   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6070   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
6071                                                 operands[1], operands[2],
6072                                                 operands[3], p));
6073   DONE;
6074 })
6075
6076 ;; vqdmull
6077
6078 (define_insn "aarch64_sqdmull<mode>"
6079   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6080         (ss_ashift:<VWIDE>
6081              (mult:<VWIDE>
6082                (sign_extend:<VWIDE>
6083                      (match_operand:VSD_HSI 1 "register_operand" "w"))
6084                (sign_extend:<VWIDE>
6085                      (match_operand:VSD_HSI 2 "register_operand" "w")))
6086              (const_int 1)))]
6087   "TARGET_SIMD"
6088   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6089   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
6090 )
6091
6092 ;; vqdmull_lane
6093
6094 (define_insn "aarch64_sqdmull_lane<mode>"
6095   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6096         (ss_ashift:<VWIDE>
6097              (mult:<VWIDE>
6098                (sign_extend:<VWIDE>
6099                  (match_operand:VD_HSI 1 "register_operand" "w"))
6100                (vec_duplicate:<VWIDE>
6101                  (sign_extend:<VWIDE_S>
6102                    (vec_select:<VEL>
6103                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6104                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6105                ))
6106              (const_int 1)))]
6107   "TARGET_SIMD"
6108   {
6109     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6110     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6111   }
6112   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6113 )
6114
6115 (define_insn "aarch64_sqdmull_laneq<mode>"
6116   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6117         (ss_ashift:<VWIDE>
6118              (mult:<VWIDE>
6119                (sign_extend:<VWIDE>
6120                  (match_operand:VD_HSI 1 "register_operand" "w"))
6121                (vec_duplicate:<VWIDE>
6122                  (sign_extend:<VWIDE_S>
6123                    (vec_select:<VEL>
6124                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6125                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6126                ))
6127              (const_int 1)))]
6128   "TARGET_SIMD"
6129   {
6130     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6131     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6132   }
6133   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6134 )
6135
6136 (define_insn "aarch64_sqdmull_lane<mode>"
6137   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6138         (ss_ashift:<VWIDE>
6139              (mult:<VWIDE>
6140                (sign_extend:<VWIDE>
6141                  (match_operand:SD_HSI 1 "register_operand" "w"))
6142                (sign_extend:<VWIDE>
6143                  (vec_select:<VEL>
6144                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6145                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6146                ))
6147              (const_int 1)))]
6148   "TARGET_SIMD"
6149   {
6150     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6151     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6152   }
6153   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6154 )
6155
6156 (define_insn "aarch64_sqdmull_laneq<mode>"
6157   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6158         (ss_ashift:<VWIDE>
6159              (mult:<VWIDE>
6160                (sign_extend:<VWIDE>
6161                  (match_operand:SD_HSI 1 "register_operand" "w"))
6162                (sign_extend:<VWIDE>
6163                  (vec_select:<VEL>
6164                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6165                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6166                ))
6167              (const_int 1)))]
6168   "TARGET_SIMD"
6169   {
6170     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6171     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6172   }
6173   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6174 )
6175
6176 ;; vqdmull_n
6177
6178 (define_insn "aarch64_sqdmull_n<mode>"
6179   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6180         (ss_ashift:<VWIDE>
6181              (mult:<VWIDE>
6182                (sign_extend:<VWIDE>
6183                  (match_operand:VD_HSI 1 "register_operand" "w"))
6184                (vec_duplicate:<VWIDE>
6185                  (sign_extend:<VWIDE_S>
6186                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6187                )
6188              (const_int 1)))]
6189   "TARGET_SIMD"
6190   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6191   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6192 )
6193
6194 ;; vqdmull2
6195
6196 (define_insn "aarch64_sqdmull2<mode>_internal"
6197   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6198         (ss_ashift:<VWIDE>
6199              (mult:<VWIDE>
6200                (sign_extend:<VWIDE>
6201                  (vec_select:<VHALF>
6202                    (match_operand:VQ_HSI 1 "register_operand" "w")
6203                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6204                (sign_extend:<VWIDE>
6205                  (vec_select:<VHALF>
6206                    (match_operand:VQ_HSI 2 "register_operand" "w")
6207                    (match_dup 3)))
6208                )
6209              (const_int 1)))]
6210   "TARGET_SIMD"
6211   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6212   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6213 )
6214
6215 (define_expand "aarch64_sqdmull2<mode>"
6216   [(match_operand:<VWIDE> 0 "register_operand")
6217    (match_operand:VQ_HSI 1 "register_operand")
6218    (match_operand:VQ_HSI 2 "register_operand")]
6219   "TARGET_SIMD"
6220 {
6221   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6222   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6223                                                   operands[2], p));
6224   DONE;
6225 })
6226
6227 ;; vqdmull2_lane
6228
6229 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6230   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6231         (ss_ashift:<VWIDE>
6232              (mult:<VWIDE>
6233                (sign_extend:<VWIDE>
6234                  (vec_select:<VHALF>
6235                    (match_operand:VQ_HSI 1 "register_operand" "w")
6236                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6237                (vec_duplicate:<VWIDE>
6238                  (sign_extend:<VWIDE_S>
6239                    (vec_select:<VEL>
6240                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6241                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6242                ))
6243              (const_int 1)))]
6244   "TARGET_SIMD"
6245   {
6246     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6247     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6248   }
6249   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6250 )
6251
6252 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6253   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6254         (ss_ashift:<VWIDE>
6255              (mult:<VWIDE>
6256                (sign_extend:<VWIDE>
6257                  (vec_select:<VHALF>
6258                    (match_operand:VQ_HSI 1 "register_operand" "w")
6259                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6260                (vec_duplicate:<VWIDE>
6261                  (sign_extend:<VWIDE_S>
6262                    (vec_select:<VEL>
6263                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6264                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6265                ))
6266              (const_int 1)))]
6267   "TARGET_SIMD"
6268   {
6269     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6270     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6271   }
6272   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6273 )
6274
6275 (define_expand "aarch64_sqdmull2_lane<mode>"
6276   [(match_operand:<VWIDE> 0 "register_operand")
6277    (match_operand:VQ_HSI 1 "register_operand")
6278    (match_operand:<VCOND> 2 "register_operand")
6279    (match_operand:SI 3 "immediate_operand")]
6280   "TARGET_SIMD"
6281 {
6282   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6283   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6284                                                        operands[2], operands[3],
6285                                                        p));
6286   DONE;
6287 })
6288
6289 (define_expand "aarch64_sqdmull2_laneq<mode>"
6290   [(match_operand:<VWIDE> 0 "register_operand")
6291    (match_operand:VQ_HSI 1 "register_operand")
6292    (match_operand:<VCONQ> 2 "register_operand")
6293    (match_operand:SI 3 "immediate_operand")]
6294   "TARGET_SIMD"
6295 {
6296   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6297   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6298                                                        operands[2], operands[3],
6299                                                        p));
6300   DONE;
6301 })
6302
6303 ;; vqdmull2_n
6304
6305 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6306   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6307         (ss_ashift:<VWIDE>
6308              (mult:<VWIDE>
6309                (sign_extend:<VWIDE>
6310                  (vec_select:<VHALF>
6311                    (match_operand:VQ_HSI 1 "register_operand" "w")
6312                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6313                (vec_duplicate:<VWIDE>
6314                  (sign_extend:<VWIDE_S>
6315                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6316                )
6317              (const_int 1)))]
6318   "TARGET_SIMD"
6319   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6320   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6321 )
6322
6323 (define_expand "aarch64_sqdmull2_n<mode>"
6324   [(match_operand:<VWIDE> 0 "register_operand")
6325    (match_operand:VQ_HSI 1 "register_operand")
6326    (match_operand:<VEL> 2 "register_operand")]
6327   "TARGET_SIMD"
6328 {
6329   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6330   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6331                                                     operands[2], p));
6332   DONE;
6333 })
6334
6335 ;; vshl
6336
6337 (define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
6338   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6339         (unspec:VSDQ_I_DI
6340           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6341            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6342          VSHL))]
6343   "TARGET_SIMD"
6344   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6345   [(set_attr "type" "neon_shift_reg<q>")]
6346 )
6347
6348
6349 ;; vqshl
6350
6351 (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
6352   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6353         (unspec:VSDQ_I
6354           [(match_operand:VSDQ_I 1 "register_operand" "w")
6355            (match_operand:VSDQ_I 2 "register_operand" "w")]
6356          VQSHL))]
6357   "TARGET_SIMD"
6358   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6359   [(set_attr "type" "neon_sat_shift_reg<q>")]
6360 )
6361
6362 ;; vshll_n
6363
6364 (define_insn "aarch64_<su>shll<mode>"
6365   [(set (match_operand:<VWIDE> 0 "register_operand")
6366         (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6367                             (match_operand:VD_BHSI 1 "register_operand"))
6368                          (match_operand:<VWIDE> 2
6369                            "aarch64_simd_shll_imm_vec")))]
6370   "TARGET_SIMD"
6371   {@ [cons: =0, 1, 2]
6372      [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6373      [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6374   }
6375   [(set_attr "type" "neon_shift_imm_long")]
6376 )
6377
6378 (define_expand "aarch64_<sur>shll_n<mode>"
6379   [(set (match_operand:<VWIDE> 0 "register_operand")
6380         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
6381                          (match_operand:SI 2
6382                            "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6383                          VSHLL))]
6384   "TARGET_SIMD"
6385   {
6386     rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6387     emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
6388     DONE;
6389   }
6390 )
6391
6392 ;; vshll_high_n
6393
6394 (define_insn "aarch64_<su>shll2<mode>"
6395   [(set (match_operand:<VWIDE> 0 "register_operand")
6396         (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6397                           (vec_select:<VHALF>
6398                             (match_operand:VQW 1 "register_operand")
6399                             (match_operand:VQW 2 "vect_par_cnst_hi_half")))
6400                          (match_operand:<VWIDE> 3
6401                            "aarch64_simd_shll_imm_vec")))]
6402   "TARGET_SIMD"
6403   {@ [cons: =0, 1, 2, 3]
6404      [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6405      [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6406   }
6407   [(set_attr "type" "neon_shift_imm_long")]
6408 )
6409
6410 (define_expand "aarch64_<sur>shll2_n<mode>"
6411   [(set (match_operand:<VWIDE> 0 "register_operand")
6412         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
6413                          (match_operand:SI 2
6414                            "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6415                          VSHLL))]
6416   "TARGET_SIMD"
6417   {
6418     rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6419     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6420     emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
6421     DONE;
6422   }
6423 )
6424
6425 ;; vrshr_n
6426
6427 (define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn"
6428   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6429         (truncate:VSDQ_I_DI
6430           (SHIFTRT:<V2XWIDE>
6431             (plus:<V2XWIDE>
6432               (<SHIFTEXTEND>:<V2XWIDE>
6433                 (match_operand:VSDQ_I_DI 1 "register_operand" "w"))
6434               (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6435             (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))]
6436   "TARGET_SIMD
6437    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6438   "<sra_op>rshr\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6439   [(set_attr "type" "neon_sat_shift_imm<q>")]
6440 )
6441
6442 (define_expand "aarch64_<sra_op>rshr_n<mode>"
6443   [(match_operand:VSDQ_I_DI 0 "register_operand")
6444    (SHIFTRT:VSDQ_I_DI
6445      (match_operand:VSDQ_I_DI 1 "register_operand")
6446      (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
6447   "TARGET_SIMD"
6448   {
6449     /* Use this expander to create the rounding constant vector, which is
6450        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6451        RTL is generated when handling the DImode expanders.  */
6452     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6453     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6454     rtx shft = gen_int_mode (INTVAL (operands[2]), DImode);
6455     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6456     if (VECTOR_MODE_P (<MODE>mode))
6457       {
6458         shft = gen_const_vec_duplicate (<MODE>mode, shft);
6459         rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6460       }
6461
6462     emit_insn (gen_aarch64_<sra_op>rshr_n<mode>_insn (operands[0], operands[1],
6463                                                       shft, rnd));
6464     DONE;
6465   }
6466 )
6467
6468 ;; v(r)sra_n
6469
6470 (define_insn "aarch64_<sur>sra_ndi"
6471   [(set (match_operand:DI 0 "register_operand" "=w")
6472        (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6473                       (match_operand:DI 2 "register_operand" "w")
6474                        (match_operand:SI 3
6475                         "aarch64_simd_shift_imm_offset_di" "i")]
6476                       VSRA))]
6477   "TARGET_SIMD"
6478   "<sur>sra\\t%d0, %d2, %3"
6479   [(set_attr "type" "neon_shift_acc")]
6480 )
6481
6482 ;; vs<lr>i_n
6483
6484 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6485   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6486         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6487                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6488                        (match_operand:SI 3
6489                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6490                       VSLRI))]
6491   "TARGET_SIMD"
6492   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6493   [(set_attr "type" "neon_shift_imm<q>")]
6494 )
6495
6496 ;; vqshl(u)
6497
6498 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6499   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6500         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6501                        (match_operand:SI 2
6502                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
6503                       VQSHL_N))]
6504   "TARGET_SIMD"
6505   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6506   [(set_attr "type" "neon_sat_shift_imm<q>")]
6507 )
6508
6509
6510 ;; vq(r)shr(u)n_n
6511
6512 (define_insn "aarch64_<shrn_op>shrn_n<mode>"
6513   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6514         (SAT_TRUNC:<VNARROWQ>
6515           (<TRUNC_SHIFT>:SD_HSDI
6516             (match_operand:SD_HSDI 1 "register_operand" "w")
6517             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6518   "TARGET_SIMD"
6519   "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6520   [(set_attr "type" "neon_shift_imm_narrow_q")]
6521 )
6522
6523 (define_insn "*aarch64_<shrn_op><shrn_s>shrn_n<mode>_insn<vczle><vczbe>"
6524   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6525         (ALL_TRUNC:<VNARROWQ>
6526           (SHIFTRT:VQN
6527             (match_operand:VQN 1 "register_operand" "w")
6528             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6529   "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6530   "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6531   [(set_attr "type" "neon_shift_imm_narrow_q")]
6532 )
6533
6534 (define_expand "aarch64_<shrn_op>shrn_n<mode>"
6535   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6536         (ALL_TRUNC:<VNARROWQ>
6537           (<TRUNC_SHIFT>:VQN
6538             (match_operand:VQN 1 "register_operand")
6539             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6540   "TARGET_SIMD"
6541   {
6542     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6543                                                  INTVAL (operands[2]));
6544   }
6545 )
6546
6547 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
6548   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6549         (ALL_TRUNC:<VNARROWQ>
6550           (<TRUNC_SHIFT>:<V2XWIDE>
6551             (plus:<V2XWIDE>
6552               (<TRUNCEXTEND>:<V2XWIDE>
6553                 (match_operand:VQN 1 "register_operand" "w"))
6554               (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6555             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6556   "TARGET_SIMD
6557    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6558   "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6559   [(set_attr "type" "neon_shift_imm_narrow_q")]
6560 )
6561
6562 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
6563   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6564         (SAT_TRUNC:<VNARROWQ>
6565           (<TRUNC_SHIFT>:<DWI>
6566             (plus:<DWI>
6567               (<TRUNCEXTEND>:<DWI>
6568                 (match_operand:SD_HSDI 1 "register_operand" "w"))
6569               (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6570             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6571   "TARGET_SIMD
6572    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6573   "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6574   [(set_attr "type" "neon_shift_imm_narrow_q")]
6575 )
6576
6577 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6578   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6579         (SAT_TRUNC:<VNARROWQ>
6580           (<TRUNC_SHIFT>:<V2XWIDE>
6581             (plus:<V2XWIDE>
6582               (<TRUNCEXTEND>:<V2XWIDE>
6583                 (match_operand:SD_HSDI 1 "register_operand"))
6584               (match_dup 3))
6585             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6586   "TARGET_SIMD"
6587   {
6588     /* Use this expander to create the rounding constant vector, which is
6589        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6590        RTL is generated when handling the DImode expanders.  */
6591     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6592     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6593     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6594   }
6595 )
6596
6597 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6598   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6599         (ALL_TRUNC:<VNARROWQ>
6600           (<TRUNC_SHIFT>:<V2XWIDE>
6601             (plus:<V2XWIDE>
6602               (<TRUNCEXTEND>:<V2XWIDE>
6603                 (match_operand:VQN 1 "register_operand"))
6604               (match_dup 3))
6605             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6606   "TARGET_SIMD"
6607   {
6608     if (<CODE> == TRUNCATE
6609         && INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6610       {
6611         rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
6612         emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
6613         DONE;
6614       }
6615     /* Use this expander to create the rounding constant vector, which is
6616        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6617        RTL is generated when handling the DImode expanders.  */
6618     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6619     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6620     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6621     operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6622     operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6623   }
6624 )
6625
6626 (define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
6627   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6628         (truncate:<VNARROWQ>
6629           (smin:VQN
6630             (smax:VQN
6631               (ashiftrt:VQN
6632                 (match_operand:VQN 1 "register_operand" "w")
6633                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6634               (match_operand:VQN 3 "aarch64_simd_imm_zero"))
6635             (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))]
6636   "TARGET_SIMD"
6637   "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6638   [(set_attr "type" "neon_shift_imm_narrow_q")]
6639 )
6640
6641 (define_insn "aarch64_sqshrun_n<mode>_insn"
6642   [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
6643         (smin:SD_HSDI
6644           (smax:SD_HSDI
6645             (ashiftrt:SD_HSDI
6646               (match_operand:SD_HSDI 1 "register_operand" "w")
6647               (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6648             (const_int 0))
6649           (const_int <half_mask>)))]
6650   "TARGET_SIMD"
6651   "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6652   [(set_attr "type" "neon_shift_imm_narrow_q")]
6653 )
6654
6655 (define_expand "aarch64_sqshrun_n<mode>"
6656   [(match_operand:<VNARROWQ> 0 "register_operand")
6657    (match_operand:SD_HSDI 1 "register_operand")
6658    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6659   "TARGET_SIMD"
6660   {
6661     rtx dst = gen_reg_rtx (<MODE>mode);
6662     emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
6663                                                  operands[2]));
6664     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6665     DONE;
6666   }
6667 )
6668
6669 (define_expand "aarch64_sqshrun_n<mode>"
6670   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6671         (truncate:<VNARROWQ>
6672           (smin:VQN
6673             (smax:VQN
6674               (ashiftrt:VQN
6675                 (match_operand:VQN 1 "register_operand")
6676                 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6677               (match_dup 3))
6678             (match_dup 4))))]
6679   "TARGET_SIMD"
6680   {
6681     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6682                                                  INTVAL (operands[2]));
6683     operands[3] = CONST0_RTX (<MODE>mode);
6684     operands[4]
6685       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6686                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6687   }
6688 )
6689
6690 (define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
6691   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6692         (truncate:<VNARROWQ>
6693           (smin:<V2XWIDE>
6694             (smax:<V2XWIDE>
6695               (ashiftrt:<V2XWIDE>
6696                 (plus:<V2XWIDE>
6697                   (sign_extend:<V2XWIDE>
6698                     (match_operand:VQN 1 "register_operand" "w"))
6699                   (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6700                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6701               (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
6702             (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
6703   "TARGET_SIMD
6704    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6705   "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6706   [(set_attr "type" "neon_shift_imm_narrow_q")]
6707 )
6708
6709 (define_insn "aarch64_sqrshrun_n<mode>_insn"
6710   [(set (match_operand:<DWI> 0 "register_operand" "=w")
6711         (smin:<DWI>
6712           (smax:<DWI>
6713             (ashiftrt:<DWI>
6714               (plus:<DWI>
6715                 (sign_extend:<DWI>
6716                   (match_operand:SD_HSDI 1 "register_operand" "w"))
6717                 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6718               (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6719             (const_int 0))
6720           (const_int <half_mask>)))]
6721   "TARGET_SIMD
6722    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6723   "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6724   [(set_attr "type" "neon_shift_imm_narrow_q")]
6725 )
6726
6727 (define_expand "aarch64_sqrshrun_n<mode>"
6728   [(match_operand:<VNARROWQ> 0 "register_operand")
6729    (match_operand:SD_HSDI 1 "register_operand")
6730    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6731   "TARGET_SIMD"
6732   {
6733     int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
6734     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6735     rtx rnd = immed_wide_int_const (rnd_wi, <DWI>mode);
6736     rtx dst = gen_reg_rtx (<DWI>mode);
6737     emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
6738     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6739     DONE;
6740   }
6741 )
6742
6743 (define_expand "aarch64_sqrshrun_n<mode>"
6744   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6745         (truncate:<VNARROWQ>
6746           (smin:<V2XWIDE>
6747             (smax:<V2XWIDE>
6748               (ashiftrt:<V2XWIDE>
6749                 (plus:<V2XWIDE>
6750                   (sign_extend:<V2XWIDE>
6751                     (match_operand:VQN 1 "register_operand"))
6752                   (match_dup 3))
6753                 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6754               (match_dup 4))
6755             (match_dup 5))))]
6756   "TARGET_SIMD"
6757   {
6758     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6759     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6760     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6761     operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6762     operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6763     operands[4] = CONST0_RTX (<V2XWIDE>mode);
6764     operands[5]
6765       = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
6766     operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]);
6767   }
6768 )
6769
6770 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le"
6771   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6772         (vec_concat:<VNARROWQ2>
6773           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6774           (ALL_TRUNC:<VNARROWQ>
6775             (SHIFTRT:VQN
6776               (match_operand:VQN 2 "register_operand" "w")
6777               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6778   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6779    && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6780   "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6781   [(set_attr "type" "neon_shift_imm_narrow_q")]
6782 )
6783
6784 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be"
6785   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6786         (vec_concat:<VNARROWQ2>
6787           (ALL_TRUNC:<VNARROWQ>
6788             (SHIFTRT:VQN
6789               (match_operand:VQN 2 "register_operand" "w")
6790               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6791           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6792   "TARGET_SIMD && BYTES_BIG_ENDIAN
6793    && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6794   "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6795   [(set_attr "type" "neon_shift_imm_narrow_q")]
6796 )
6797
6798 (define_expand "aarch64_<shrn_op><sra_op>shrn2_n<mode>"
6799   [(match_operand:<VNARROWQ2> 0 "register_operand")
6800    (match_operand:<VNARROWQ> 1 "register_operand")
6801    (ALL_TRUNC:<VNARROWQ>
6802      (SHIFTRT:VQN (match_operand:VQN 2 "register_operand")))
6803    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6804   "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6805   {
6806     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6807                                                  INTVAL (operands[3]));
6808
6809     if (BYTES_BIG_ENDIAN)
6810       emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be (
6811                 operands[0], operands[1], operands[2], operands[3]));
6812     else
6813       emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le (
6814                 operands[0], operands[1], operands[2], operands[3]));
6815     DONE;
6816   }
6817 )
6818
6819 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le"
6820   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6821         (vec_concat:<VNARROWQ2>
6822           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6823           (ALL_TRUNC:<VNARROWQ>
6824             (<TRUNC_SHIFT>:<V2XWIDE>
6825               (plus:<V2XWIDE>
6826                 (<TRUNCEXTEND>:<V2XWIDE>
6827                   (match_operand:VQN 2 "register_operand" "w"))
6828                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6829               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6830   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6831    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6832   "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6833   [(set_attr "type" "neon_shift_imm_narrow_q")]
6834 )
6835
6836 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be"
6837   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6838         (vec_concat:<VNARROWQ2>
6839           (ALL_TRUNC:<VNARROWQ>
6840             (<TRUNC_SHIFT>:<V2XWIDE>
6841               (plus:<V2XWIDE>
6842                 (<TRUNCEXTEND>:<V2XWIDE>
6843                   (match_operand:VQN 2 "register_operand" "w"))
6844                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6845               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6846           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6847   "TARGET_SIMD && BYTES_BIG_ENDIAN
6848    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6849   "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6850   [(set_attr "type" "neon_shift_imm_narrow_q")]
6851 )
6852
6853 (define_expand "aarch64_<shrn_op>rshrn2_n<mode>"
6854   [(match_operand:<VNARROWQ2> 0 "register_operand")
6855    (match_operand:<VNARROWQ> 1 "register_operand")
6856    (ALL_TRUNC:<VNARROWQ> (match_operand:VQN 2 "register_operand"))
6857    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6858   "TARGET_SIMD"
6859   {
6860     if (<CODE> == TRUNCATE
6861         && INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6862       {
6863         rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
6864         emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
6865                                               operands[2], tmp));
6866         DONE;
6867       }
6868     /* Use this expander to create the rounding constant vector, which is
6869        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6870        RTL is generated when handling the DImode expanders.  */
6871     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6872     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
6873     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6874     rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6875     operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
6876     if (BYTES_BIG_ENDIAN)
6877       emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[0],
6878                                                               operands[1],
6879                                                               operands[2],
6880                                                               operands[3],
6881                                                               rnd));
6882     else
6883       emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[0],
6884                                                               operands[1],
6885                                                               operands[2],
6886                                                               operands[3],
6887                                                               rnd));
6888     DONE;
6889   }
6890 )
6891
6892 (define_insn "aarch64_sqshrun2_n<mode>_insn_le"
6893   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6894         (vec_concat:<VNARROWQ2>
6895           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6896           (truncate:<VNARROWQ>
6897             (smin:VQN
6898               (smax:VQN
6899                 (ashiftrt:VQN
6900                   (match_operand:VQN 2 "register_operand" "w")
6901                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6902                 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
6903               (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))))]
6904   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6905   "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6906   [(set_attr "type" "neon_shift_imm_narrow_q")]
6907 )
6908
6909 (define_insn "aarch64_sqshrun2_n<mode>_insn_be"
6910   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6911         (vec_concat:<VNARROWQ2>
6912           (truncate:<VNARROWQ>
6913             (smin:VQN
6914               (smax:VQN
6915                 (ashiftrt:VQN
6916                   (match_operand:VQN 2 "register_operand" "w")
6917                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6918                 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
6919               (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))
6920           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6921   "TARGET_SIMD && BYTES_BIG_ENDIAN"
6922   "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6923   [(set_attr "type" "neon_shift_imm_narrow_q")]
6924 )
6925
6926 (define_expand "aarch64_sqshrun2_n<mode>"
6927   [(match_operand:<VNARROWQ2> 0 "register_operand")
6928    (match_operand:<VNARROWQ> 1 "register_operand")
6929    (match_operand:VQN 2 "register_operand")
6930    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6931   "TARGET_SIMD"
6932   {
6933     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6934                                                  INTVAL (operands[3]));
6935     rtx zeros = CONST0_RTX (<MODE>mode);
6936     rtx half_umax
6937       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6938                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6939     if (BYTES_BIG_ENDIAN)
6940       emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[0],
6941                                 operands[1], operands[2], operands[3],
6942                                 zeros, half_umax));
6943     else
6944       emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[0],
6945                                 operands[1], operands[2], operands[3],
6946                                 zeros, half_umax));
6947     DONE;
6948   }
6949 )
6950
6951 (define_insn "aarch64_sqrshrun2_n<mode>_insn_le"
6952   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6953         (vec_concat:<VNARROWQ2>
6954           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6955           (truncate:<VNARROWQ>
6956             (smin:<V2XWIDE>
6957               (smax:<V2XWIDE>
6958                 (ashiftrt:<V2XWIDE>
6959                   (plus:<V2XWIDE>
6960                     (sign_extend:<V2XWIDE>
6961                       (match_operand:VQN 2 "register_operand" "w"))
6962                     (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6963                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6964                 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
6965               (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))]
6966   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6967    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6968   "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6969   [(set_attr "type" "neon_shift_imm_narrow_q")]
6970 )
6971
6972 (define_insn "aarch64_sqrshrun2_n<mode>_insn_be"
6973   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6974         (vec_concat:<VNARROWQ2>
6975           (truncate:<VNARROWQ>
6976             (smin:<V2XWIDE>
6977               (smax:<V2XWIDE>
6978                 (ashiftrt:<V2XWIDE>
6979                   (plus:<V2XWIDE>
6980                     (sign_extend:<V2XWIDE>
6981                       (match_operand:VQN 2 "register_operand" "w"))
6982                     (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6983                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6984                 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
6985               (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))
6986           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6987   "TARGET_SIMD && BYTES_BIG_ENDIAN
6988    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6989   "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6990   [(set_attr "type" "neon_shift_imm_narrow_q")]
6991 )
6992
6993 (define_expand "aarch64_sqrshrun2_n<mode>"
6994   [(match_operand:<VNARROWQ2> 0 "register_operand")
6995    (match_operand:<VNARROWQ> 1 "register_operand")
6996    (match_operand:VQN 2 "register_operand")
6997    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6998   "TARGET_SIMD"
6999   {
7000     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7001     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7002     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7003     rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7004     rtx zero = CONST0_RTX (<V2XWIDE>mode);
7005     rtx half_umax
7006       = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode,
7007                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7008     operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7009     if (BYTES_BIG_ENDIAN)
7010       emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[0],
7011                                 operands[1], operands[2], operands[3], rnd,
7012                                 zero, half_umax));
7013     else
7014       emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[0],
7015                                 operands[1], operands[2], operands[3], rnd,
7016                                 zero, half_umax));
7017     DONE;
7018   }
7019 )
7020
7021 ;; cm(eq|ge|gt|lt|le)
7022 ;; Note, we have constraints for Dz and Z as different expanders
7023 ;; have different ideas of what should be passed to this pattern.
7024
7025 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7026   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7027         (neg:<V_INT_EQUIV>
7028           (COMPARISONS:<V_INT_EQUIV>
7029             (match_operand:VDQ_I 1 "register_operand")
7030             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")
7031           )))]
7032   "TARGET_SIMD"
7033   {@ [ cons: =0 , 1 , 2   ; attrs: type           ]
7034      [ w        , w , w   ; neon_compare<q>       ] cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7035      [ w        , w , ZDz ; neon_compare_zero<q>  ] cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0
7036   }
7037 )
7038
7039 (define_insn_and_split "aarch64_cm<optab>di"
7040   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
7041         (neg:DI
7042           (COMPARISONS:DI
7043             (match_operand:DI 1 "register_operand" "w,w,r")
7044             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
7045           )))
7046      (clobber (reg:CC CC_REGNUM))]
7047   "TARGET_SIMD"
7048   "#"
7049   "&& reload_completed"
7050   [(set (match_operand:DI 0 "register_operand")
7051         (neg:DI
7052           (COMPARISONS:DI
7053             (match_operand:DI 1 "register_operand")
7054             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7055           )))]
7056   {
7057     /* If we are in the general purpose register file,
7058        we split to a sequence of comparison and store.  */
7059     if (GP_REGNUM_P (REGNO (operands[0]))
7060         && GP_REGNUM_P (REGNO (operands[1])))
7061       {
7062         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
7063         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7064         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7065         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7066         DONE;
7067       }
7068     /* Otherwise, we expand to a similar pattern which does not
7069        clobber CC_REGNUM.  */
7070   }
7071   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
7072 )
7073
7074 (define_insn "*aarch64_cm<optab>di"
7075   [(set (match_operand:DI 0 "register_operand")
7076         (neg:DI
7077           (COMPARISONS:DI
7078             (match_operand:DI 1 "register_operand")
7079             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7080           )))]
7081   "TARGET_SIMD && reload_completed"
7082   {@ [ cons: =0 , 1 , 2   ; attrs: type        ]
7083      [ w        , w , w   ; neon_compare       ] cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
7084      [ w        , w , ZDz ; neon_compare_zero  ] cm<optab>\t%d0, %d1, #0
7085   }
7086 )
7087
7088 ;; cm(hs|hi)
7089
7090 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7091   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7092         (neg:<V_INT_EQUIV>
7093           (UCOMPARISONS:<V_INT_EQUIV>
7094             (match_operand:VDQ_I 1 "register_operand" "w")
7095             (match_operand:VDQ_I 2 "register_operand" "w")
7096           )))]
7097   "TARGET_SIMD"
7098   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7099   [(set_attr "type" "neon_compare<q>")]
7100 )
7101
7102 (define_insn_and_split "aarch64_cm<optab>di"
7103   [(set (match_operand:DI 0 "register_operand" "=w,r")
7104         (neg:DI
7105           (UCOMPARISONS:DI
7106             (match_operand:DI 1 "register_operand" "w,r")
7107             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
7108           )))
7109     (clobber (reg:CC CC_REGNUM))]
7110   "TARGET_SIMD"
7111   "#"
7112   "&& reload_completed"
7113   [(set (match_operand:DI 0 "register_operand")
7114         (neg:DI
7115           (UCOMPARISONS:DI
7116             (match_operand:DI 1 "register_operand")
7117             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7118           )))]
7119   {
7120     /* If we are in the general purpose register file,
7121        we split to a sequence of comparison and store.  */
7122     if (GP_REGNUM_P (REGNO (operands[0]))
7123         && GP_REGNUM_P (REGNO (operands[1])))
7124       {
7125         machine_mode mode = CCmode;
7126         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7127         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7128         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7129         DONE;
7130       }
7131     /* Otherwise, we expand to a similar pattern which does not
7132        clobber CC_REGNUM.  */
7133   }
7134   [(set_attr "type" "neon_compare,multiple")]
7135 )
7136
7137 (define_insn "*aarch64_cm<optab>di"
7138   [(set (match_operand:DI 0 "register_operand" "=w")
7139         (neg:DI
7140           (UCOMPARISONS:DI
7141             (match_operand:DI 1 "register_operand" "w")
7142             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
7143           )))]
7144   "TARGET_SIMD && reload_completed"
7145   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
7146   [(set_attr "type" "neon_compare")]
7147 )
7148
7149 ;; cmtst
7150
7151 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
7152 ;; we don't have any insns using ne, and aarch64_vcond outputs
7153 ;; not (neg (eq (and x y) 0))
7154 ;; which is rewritten by simplify_rtx as
7155 ;; plus (eq (and x y) 0) -1.
7156
7157 (define_insn "aarch64_cmtst<mode><vczle><vczbe>"
7158   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7159         (plus:<V_INT_EQUIV>
7160           (eq:<V_INT_EQUIV>
7161             (and:VDQ_I
7162               (match_operand:VDQ_I 1 "register_operand" "w")
7163               (match_operand:VDQ_I 2 "register_operand" "w"))
7164             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
7165           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
7166   ]
7167   "TARGET_SIMD"
7168   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7169   [(set_attr "type" "neon_tst<q>")]
7170 )
7171
7172 ;; One can also get a cmtsts by having to combine a
7173 ;; not (neq (eq x 0)) in which case you rewrite it to
7174 ;; a comparison against itself
7175
7176 (define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
7177   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7178         (plus:<V_INT_EQUIV>
7179           (eq:<V_INT_EQUIV>
7180             (match_operand:VDQ_I 1 "register_operand" "w")
7181             (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
7182           (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
7183   ]
7184   "TARGET_SIMD"
7185   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
7186   [(set_attr "type" "neon_tst<q>")]
7187 )
7188
7189 (define_insn_and_split "aarch64_cmtstdi"
7190   [(set (match_operand:DI 0 "register_operand" "=w,r")
7191         (neg:DI
7192           (ne:DI
7193             (and:DI
7194               (match_operand:DI 1 "register_operand" "w,r")
7195               (match_operand:DI 2 "register_operand" "w,r"))
7196             (const_int 0))))
7197     (clobber (reg:CC CC_REGNUM))]
7198   "TARGET_SIMD"
7199   "#"
7200   "&& reload_completed"
7201   [(set (match_operand:DI 0 "register_operand")
7202         (neg:DI
7203           (ne:DI
7204             (and:DI
7205               (match_operand:DI 1 "register_operand")
7206               (match_operand:DI 2 "register_operand"))
7207             (const_int 0))))]
7208   {
7209     /* If we are in the general purpose register file,
7210        we split to a sequence of comparison and store.  */
7211     if (GP_REGNUM_P (REGNO (operands[0]))
7212         && GP_REGNUM_P (REGNO (operands[1])))
7213       {
7214         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
7215         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
7216         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
7217         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
7218         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7219         DONE;
7220       }
7221     /* Otherwise, we expand to a similar pattern which does not
7222        clobber CC_REGNUM.  */
7223   }
7224   [(set_attr "type" "neon_tst,multiple")]
7225 )
7226
7227 (define_insn "*aarch64_cmtstdi<vczle><vczbe>"
7228   [(set (match_operand:DI 0 "register_operand" "=w")
7229         (neg:DI
7230           (ne:DI
7231             (and:DI
7232               (match_operand:DI 1 "register_operand" "w")
7233               (match_operand:DI 2 "register_operand" "w"))
7234             (const_int 0))))]
7235   "TARGET_SIMD"
7236   "cmtst\t%d0, %d1, %d2"
7237   [(set_attr "type" "neon_tst")]
7238 )
7239
7240 ;; fcm(eq|ge|gt|le|lt)
7241
7242 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7243   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7244         (neg:<V_INT_EQUIV>
7245           (COMPARISONS:<V_INT_EQUIV>
7246             (match_operand:VHSDF_HSDF 1 "register_operand")
7247             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero")
7248           )))]
7249   "TARGET_SIMD"
7250   {@ [ cons: =0 , 1 , 2    ]
7251      [ w        , w , w    ] fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7252      [ w        , w , YDz  ] fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0
7253   }
7254   [(set_attr "type" "neon_fp_compare_<stype><q>")]
7255 )
7256
7257 ;; fac(ge|gt)
7258 ;; Note we can also handle what would be fac(le|lt) by
7259 ;; generating fac(ge|gt).
7260
7261 (define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
7262   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7263         (neg:<V_INT_EQUIV>
7264           (FAC_COMPARISONS:<V_INT_EQUIV>
7265             (abs:VHSDF_HSDF
7266               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
7267             (abs:VHSDF_HSDF
7268               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
7269   )))]
7270   "TARGET_SIMD"
7271   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7272   [(set_attr "type" "neon_fp_compare_<stype><q>")]
7273 )
7274
7275 ;; addp
7276
7277 ;; ADDP with two registers semantically concatenates them and performs
7278 ;; a pairwise addition on the result.  For 128-bit input modes represent this
7279 ;; as a concatentation of the pairwise addition results of the two input
7280 ;; registers.  This allow us to avoid using intermediate 256-bit modes.
7281 (define_insn "aarch64_addp<mode>_insn"
7282   [(set (match_operand:VQ_I 0 "register_operand" "=w")
7283         (vec_concat:VQ_I
7284           (plus:<VHALF>
7285             (vec_select:<VHALF>
7286               (match_operand:VQ_I 1 "register_operand" "w")
7287               (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))
7288             (vec_select:<VHALF>
7289               (match_dup 1)
7290               (match_operand:VQ_I 4 "vect_par_cnst_even_or_odd_half")))
7291           (plus:<VHALF>
7292             (vec_select:<VHALF>
7293               (match_operand:VQ_I 2 "register_operand" "w")
7294               (match_dup 3))
7295             (vec_select:<VHALF>
7296               (match_dup 2)
7297               (match_dup 4)))))]
7298   "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7299   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7300   [(set_attr "type" "neon_reduc_add<q>")]
7301 )
7302
7303 ;; For 64-bit input modes an ADDP is represented as a concatentation
7304 ;; of the input registers into an 128-bit register which is then fed
7305 ;; into a pairwise add.  That way we avoid having to create intermediate
7306 ;; 32-bit vector modes.
7307 (define_insn "aarch64_addp<mode><vczle><vczbe>_insn"
7308   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
7309         (plus:VD_BHSI
7310           (vec_select:VD_BHSI
7311             (vec_concat:<VDBL>
7312               (match_operand:VD_BHSI 1 "register_operand" "w")
7313               (match_operand:VD_BHSI 2 "register_operand" "w"))
7314             (match_operand:<VDBL> 3 "vect_par_cnst_even_or_odd_half"))
7315           (vec_select:VD_BHSI
7316             (vec_concat:<VDBL>
7317               (match_dup 1)
7318               (match_dup 2))
7319             (match_operand:<VDBL> 4 "vect_par_cnst_even_or_odd_half"))))]
7320   "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7321   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7322   [(set_attr "type" "neon_reduc_add<q>")]
7323 )
7324
7325 ;; A common usecase of 64-bit ADDP is to have both operands come from the same
7326 ;; 128-bit vector and produce the pairwise addition results in the lower half.
7327 ;; Split into the 128-bit ADDP form and extract the low half.
7328 (define_insn_and_split "*aarch64_addp_same_reg<mode>"
7329   [(set (match_operand:<VHALF> 0 "register_operand" "=w")
7330         (plus:<VHALF>
7331           (vec_select:<VHALF>
7332             (match_operand:VQ_I 1 "register_operand" "w")
7333             (match_operand:VQ_I 2 "vect_par_cnst_even_or_odd_half"))
7334           (vec_select:<VHALF>
7335             (match_dup 1)
7336             (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))))]
7337   "TARGET_SIMD && !rtx_equal_p (operands[2], operands[3])"
7338   "#"
7339   "&& 1"
7340   [(const_int 0)]
7341   {
7342     rtx scratch;
7343     if (can_create_pseudo_p ())
7344       scratch = gen_reg_rtx (<MODE>mode);
7345     else
7346       scratch = lowpart_subreg (<MODE>mode, operands[0], <VHALF>mode);
7347
7348     emit_insn (gen_aarch64_addp<mode>_insn (scratch, operands[1], operands[1],
7349                                             operands[2], operands[3]));
7350     emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, scratch));
7351     DONE;
7352   }
7353 )
7354
7355 (define_expand "aarch64_addp<mode>"
7356   [(match_operand:VDQ_I 0 "register_operand")
7357    (match_operand:VDQ_I 1 "register_operand")
7358    (match_operand:VDQ_I 2 "register_operand")]
7359   "TARGET_SIMD"
7360   {
7361     int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant ();
7362     if (known_eq (GET_MODE_BITSIZE (<MODE>mode), 128))
7363       nunits /= 2;
7364     rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
7365     rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
7366     if (BYTES_BIG_ENDIAN)
7367       std::swap (operands[1], operands[2]);
7368     emit_insn (gen_aarch64_addp<mode>_insn (operands[0], operands[1],
7369                                             operands[2], par_even, par_odd));
7370     DONE;
7371   }
7372 )
7373
7374 ;; sqrt
7375
7376 (define_expand "sqrt<mode>2"
7377   [(set (match_operand:VHSDF 0 "register_operand")
7378         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
7379   "TARGET_SIMD"
7380 {
7381   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
7382     DONE;
7383 })
7384
7385 (define_insn "*sqrt<mode>2<vczle><vczbe>"
7386   [(set (match_operand:VHSDF 0 "register_operand" "=w")
7387         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
7388   "TARGET_SIMD"
7389   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
7390   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
7391 )
7392
7393 ;; Patterns for vector struct loads and stores.
7394
7395 (define_insn "aarch64_simd_ld2<vstruct_elt>"
7396   [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
7397         (unspec:VSTRUCT_2Q [
7398           (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
7399           UNSPEC_LD2))]
7400   "TARGET_SIMD"
7401   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7402   [(set_attr "type" "neon_load2_2reg<q>")]
7403 )
7404
7405 (define_insn "aarch64_simd_ld2r<vstruct_elt>"
7406   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7407         (unspec:VSTRUCT_2QD [
7408           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7409           UNSPEC_LD2_DUP))]
7410   "TARGET_SIMD"
7411   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7412   [(set_attr "type" "neon_load2_all_lanes<q>")]
7413 )
7414
7415 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7416   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7417         (unspec:VSTRUCT_2QD [
7418                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7419                 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
7420                 (match_operand:SI 3 "immediate_operand" "i")]
7421                 UNSPEC_LD2_LANE))]
7422   "TARGET_SIMD"
7423   {
7424     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7425                                            INTVAL (operands[3]));
7426     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
7427   }
7428   [(set_attr "type" "neon_load2_one_lane")]
7429 )
7430
7431 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7432   [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
7433         (unspec:VSTRUCT_2Q [
7434                 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
7435                 UNSPEC_LD2))]
7436   "TARGET_SIMD"
7437 {
7438   if (BYTES_BIG_ENDIAN)
7439     {
7440       rtx tmp = gen_reg_rtx (<MODE>mode);
7441       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7442                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7443       emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
7444       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7445     }
7446   else
7447     emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
7448   DONE;
7449 })
7450
7451 (define_insn "aarch64_simd_st2<vstruct_elt>"
7452   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
7453         (unspec:VSTRUCT_2Q [
7454                 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
7455                 UNSPEC_ST2))]
7456   "TARGET_SIMD"
7457   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7458   [(set_attr "type" "neon_store2_2reg<q>")]
7459 )
7460
7461 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7462 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7463   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7464         (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
7465                      (match_operand:SI 2 "immediate_operand" "i")]
7466                      UNSPEC_ST2_LANE))]
7467   "TARGET_SIMD"
7468   {
7469     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7470                                            INTVAL (operands[2]));
7471     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
7472   }
7473   [(set_attr "type" "neon_store2_one_lane<q>")]
7474 )
7475
7476 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7477   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
7478         (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
7479                    UNSPEC_ST2))]
7480   "TARGET_SIMD"
7481 {
7482   if (BYTES_BIG_ENDIAN)
7483     {
7484       rtx tmp = gen_reg_rtx (<MODE>mode);
7485       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7486                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7487       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7488       emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
7489     }
7490   else
7491     emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
7492   DONE;
7493 })
7494
7495 (define_insn "aarch64_simd_ld3<vstruct_elt>"
7496   [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
7497         (unspec:VSTRUCT_3Q [
7498           (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
7499           UNSPEC_LD3))]
7500   "TARGET_SIMD"
7501   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7502   [(set_attr "type" "neon_load3_3reg<q>")]
7503 )
7504
7505 (define_insn "aarch64_simd_ld3r<vstruct_elt>"
7506   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7507         (unspec:VSTRUCT_3QD [
7508           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7509           UNSPEC_LD3_DUP))]
7510   "TARGET_SIMD"
7511   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7512   [(set_attr "type" "neon_load3_all_lanes<q>")]
7513 )
7514
7515 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7516   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7517         (unspec:VSTRUCT_3QD [
7518                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7519                 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
7520                 (match_operand:SI 3 "immediate_operand" "i")]
7521                 UNSPEC_LD3_LANE))]
7522   "TARGET_SIMD"
7523 {
7524     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7525                                            INTVAL (operands[3]));
7526     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
7527 }
7528   [(set_attr "type" "neon_load3_one_lane")]
7529 )
7530
7531 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7532   [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
7533         (unspec:VSTRUCT_3Q [
7534                 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
7535                 UNSPEC_LD3))]
7536   "TARGET_SIMD"
7537 {
7538   if (BYTES_BIG_ENDIAN)
7539     {
7540       rtx tmp = gen_reg_rtx (<MODE>mode);
7541       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7542                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7543       emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
7544       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7545     }
7546   else
7547     emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
7548   DONE;
7549 })
7550
7551 (define_insn "aarch64_simd_st3<vstruct_elt>"
7552   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
7553         (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
7554                    UNSPEC_ST3))]
7555   "TARGET_SIMD"
7556   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7557   [(set_attr "type" "neon_store3_3reg<q>")]
7558 )
7559
7560 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7561 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7562   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7563         (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
7564                      (match_operand:SI 2 "immediate_operand" "i")]
7565                      UNSPEC_ST3_LANE))]
7566   "TARGET_SIMD"
7567   {
7568     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7569                                            INTVAL (operands[2]));
7570     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
7571   }
7572   [(set_attr "type" "neon_store3_one_lane<q>")]
7573 )
7574
7575 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7576   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
7577         (unspec:VSTRUCT_3Q [
7578                 (match_operand:VSTRUCT_3Q 1 "register_operand")]
7579                 UNSPEC_ST3))]
7580   "TARGET_SIMD"
7581 {
7582   if (BYTES_BIG_ENDIAN)
7583     {
7584       rtx tmp = gen_reg_rtx (<MODE>mode);
7585       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7586                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7587       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7588       emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
7589     }
7590   else
7591     emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
7592   DONE;
7593 })
7594
7595 (define_insn "aarch64_simd_ld4<vstruct_elt>"
7596   [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
7597         (unspec:VSTRUCT_4Q [
7598           (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
7599           UNSPEC_LD4))]
7600   "TARGET_SIMD"
7601   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7602   [(set_attr "type" "neon_load4_4reg<q>")]
7603 )
7604
7605 (define_insn "aarch64_simd_ld4r<vstruct_elt>"
7606   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7607         (unspec:VSTRUCT_4QD [
7608           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7609           UNSPEC_LD4_DUP))]
7610   "TARGET_SIMD"
7611   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7612   [(set_attr "type" "neon_load4_all_lanes<q>")]
7613 )
7614
7615 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7616   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7617         (unspec:VSTRUCT_4QD [
7618                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7619                 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7620                 (match_operand:SI 3 "immediate_operand" "i")]
7621                 UNSPEC_LD4_LANE))]
7622   "TARGET_SIMD"
7623 {
7624     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7625                                            INTVAL (operands[3]));
7626     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7627 }
7628   [(set_attr "type" "neon_load4_one_lane")]
7629 )
7630
7631 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7632   [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7633         (unspec:VSTRUCT_4Q [
7634                 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7635                 UNSPEC_LD4))]
7636   "TARGET_SIMD"
7637 {
7638   if (BYTES_BIG_ENDIAN)
7639     {
7640       rtx tmp = gen_reg_rtx (<MODE>mode);
7641       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7642                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7643       emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7644       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7645     }
7646   else
7647     emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7648   DONE;
7649 })
7650
7651 (define_insn "aarch64_simd_st4<vstruct_elt>"
7652   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7653         (unspec:VSTRUCT_4Q [
7654                 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7655                 UNSPEC_ST4))]
7656   "TARGET_SIMD"
7657   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7658   [(set_attr "type" "neon_store4_4reg<q>")]
7659 )
7660
7661 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7662 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7663   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7664         (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7665                      (match_operand:SI 2 "immediate_operand" "i")]
7666                      UNSPEC_ST4_LANE))]
7667   "TARGET_SIMD"
7668   {
7669     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7670                                            INTVAL (operands[2]));
7671     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7672   }
7673   [(set_attr "type" "neon_store4_one_lane<q>")]
7674 )
7675
7676 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7677   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7678         (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7679                    UNSPEC_ST4))]
7680   "TARGET_SIMD"
7681 {
7682   if (BYTES_BIG_ENDIAN)
7683     {
7684       rtx tmp = gen_reg_rtx (<MODE>mode);
7685       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7686                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7687       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7688       emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7689     }
7690   else
7691     emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7692   DONE;
7693 })
7694
7695 ;; Patterns for rcpc3 vector lane loads and stores.
7696
7697 (define_insn "aarch64_vec_stl1_lanes<mode>_lane<Vel>"
7698   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Q")
7699         (unspec:BLK [(match_operand:V12DIF 1 "register_operand" "w")
7700                      (match_operand:SI 2 "immediate_operand" "i")]
7701                      UNSPEC_STL1_LANE))]
7702   "TARGET_RCPC3"
7703   {
7704     operands[2] = aarch64_endian_lane_rtx (<MODE>mode,
7705                                            INTVAL (operands[2]));
7706     return "stl1\\t{%S1.<Vetype>}[%2], %0";
7707   }
7708   [(set_attr "type" "neon_store2_one_lane")]
7709 )
7710
7711 (define_expand "aarch64_vec_stl1_lane<mode>"
7712  [(match_operand:DI 0 "register_operand")
7713   (match_operand:V12DIF 1 "register_operand")
7714   (match_operand:SI 2 "immediate_operand")]
7715   "TARGET_RCPC3"
7716 {
7717   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7718   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7719
7720   aarch64_simd_lane_bounds (operands[2], 0,
7721                             GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7722   emit_insn (gen_aarch64_vec_stl1_lanes<mode>_lane<Vel> (mem,
7723                                         operands[1], operands[2]));
7724   DONE;
7725 })
7726
7727 (define_insn "aarch64_vec_ldap1_lanes<mode>_lane<Vel>"
7728   [(set (match_operand:V12DIF 0 "register_operand" "=w")
7729         (unspec:V12DIF [
7730                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Q")
7731                 (match_operand:V12DIF 2 "register_operand" "0")
7732                 (match_operand:SI 3 "immediate_operand" "i")]
7733                 UNSPEC_LDAP1_LANE))]
7734   "TARGET_RCPC3"
7735   {
7736     operands[3] = aarch64_endian_lane_rtx (<MODE>mode,
7737                                            INTVAL (operands[3]));
7738     return "ldap1\\t{%S0.<Vetype>}[%3], %1";
7739   }
7740   [(set_attr "type" "neon_load2_one_lane")]
7741 )
7742
7743 (define_expand "aarch64_vec_ldap1_lane<mode>"
7744   [(match_operand:V12DIF 0 "register_operand")
7745         (match_operand:DI 1 "register_operand")
7746         (match_operand:V12DIF 2 "register_operand")
7747         (match_operand:SI 3 "immediate_operand")]
7748   "TARGET_RCPC3"
7749 {
7750   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7751   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7752
7753   aarch64_simd_lane_bounds (operands[3], 0,
7754                             GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7755   emit_insn (gen_aarch64_vec_ldap1_lanes<mode>_lane<Vel> (operands[0],
7756                                 mem, operands[2], operands[3]));
7757   DONE;
7758 })
7759
7760 (define_insn_and_split "aarch64_rev_reglist<mode>"
7761 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7762         (unspec:VSTRUCT_QD
7763                    [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7764                     (match_operand:V16QI 2 "register_operand" "w")]
7765                    UNSPEC_REV_REGLIST))]
7766   "TARGET_SIMD"
7767   "#"
7768   "&& reload_completed"
7769   [(const_int 0)]
7770 {
7771   int i;
7772   int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7773   for (i = 0; i < nregs; i++)
7774     {
7775       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7776       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7777       emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7778     }
7779   DONE;
7780 }
7781   [(set_attr "type" "neon_tbl1_q")
7782    (set_attr "length" "<insn_count>")]
7783 )
7784
7785 ;; Reload patterns for AdvSIMD register list operands.
7786
7787 (define_expand "mov<mode>"
7788   [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7789         (match_operand:VSTRUCT_QD 1 "general_operand"))]
7790   "TARGET_FLOAT"
7791 {
7792   if (can_create_pseudo_p ())
7793     {
7794       if (GET_CODE (operands[0]) != REG)
7795         operands[1] = force_reg (<MODE>mode, operands[1]);
7796     }
7797 })
7798
7799 (define_expand "mov<mode>"
7800   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7801         (match_operand:VSTRUCT 1 "general_operand"))]
7802   "TARGET_FLOAT"
7803 {
7804   if (can_create_pseudo_p ())
7805     {
7806       if (GET_CODE (operands[0]) != REG)
7807         operands[1] = force_reg (<MODE>mode, operands[1]);
7808     }
7809 })
7810
7811 (define_expand "movv8di"
7812   [(set (match_operand:V8DI 0 "nonimmediate_operand")
7813         (match_operand:V8DI 1 "general_operand"))]
7814   ""
7815 {
7816   if (can_create_pseudo_p () && MEM_P (operands[0]))
7817     operands[1] = force_reg (V8DImode, operands[1]);
7818 })
7819
7820 (define_expand "aarch64_ld1x3<vstruct_elt>"
7821   [(match_operand:VSTRUCT_3QD 0 "register_operand")
7822    (match_operand:DI 1 "register_operand")]
7823   "TARGET_SIMD"
7824 {
7825   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7826   emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
7827   DONE;
7828 })
7829
7830 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
7831   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7832         (unspec:VSTRUCT_3QD
7833           [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
7834           UNSPEC_LD1))]
7835   "TARGET_SIMD"
7836   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7837   [(set_attr "type" "neon_load1_3reg<q>")]
7838 )
7839
7840 (define_expand "aarch64_ld1x4<vstruct_elt>"
7841   [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7842    (match_operand:DI 1 "register_operand" "r")]
7843   "TARGET_SIMD"
7844 {
7845   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7846   emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
7847   DONE;
7848 })
7849
7850 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
7851   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7852         (unspec:VSTRUCT_4QD
7853           [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
7854         UNSPEC_LD1))]
7855   "TARGET_SIMD"
7856   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7857   [(set_attr "type" "neon_load1_4reg<q>")]
7858 )
7859
7860 (define_expand "aarch64_st1x2<vstruct_elt>"
7861   [(match_operand:DI 0 "register_operand")
7862    (match_operand:VSTRUCT_2QD 1 "register_operand")]
7863   "TARGET_SIMD"
7864 {
7865   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7866   emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
7867   DONE;
7868 })
7869
7870 (define_insn "aarch64_st1_x2_<vstruct_elt>"
7871   [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
7872         (unspec:VSTRUCT_2QD
7873                 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
7874                 UNSPEC_ST1))]
7875   "TARGET_SIMD"
7876   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7877   [(set_attr "type" "neon_store1_2reg<q>")]
7878 )
7879
7880 (define_expand "aarch64_st1x3<vstruct_elt>"
7881   [(match_operand:DI 0 "register_operand")
7882    (match_operand:VSTRUCT_3QD 1 "register_operand")]
7883   "TARGET_SIMD"
7884 {
7885   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7886   emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
7887   DONE;
7888 })
7889
7890 (define_insn "aarch64_st1_x3_<vstruct_elt>"
7891   [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
7892         (unspec:VSTRUCT_3QD
7893                 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
7894                 UNSPEC_ST1))]
7895   "TARGET_SIMD"
7896   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7897   [(set_attr "type" "neon_store1_3reg<q>")]
7898 )
7899
7900 (define_expand "aarch64_st1x4<vstruct_elt>"
7901   [(match_operand:DI 0 "register_operand" "")
7902    (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
7903   "TARGET_SIMD"
7904 {
7905   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7906   emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
7907   DONE;
7908 })
7909
7910 (define_insn "aarch64_st1_x4_<vstruct_elt>"
7911   [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
7912         (unspec:VSTRUCT_4QD
7913                 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
7914                 UNSPEC_ST1))]
7915   "TARGET_SIMD"
7916   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7917   [(set_attr "type" "neon_store1_4reg<q>")]
7918 )
7919
7920 (define_insn "*aarch64_mov<mode>"
7921   [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand")
7922         (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand"))]
7923   "TARGET_SIMD && !BYTES_BIG_ENDIAN
7924    && (register_operand (operands[0], <MODE>mode)
7925        || register_operand (operands[1], <MODE>mode))"
7926   {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
7927      [ w        , w   ; multiple                       , <insn_count>  ] #
7928      [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
7929      [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1
7930   }
7931 )
7932
7933 (define_insn "*aarch64_mov<mode>"
7934   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand")
7935         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand"))]
7936   "TARGET_SIMD && !BYTES_BIG_ENDIAN
7937    && (register_operand (operands[0], <MODE>mode)
7938        || register_operand (operands[1], <MODE>mode))"
7939   {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
7940      [ w        , w   ; multiple                       , <insn_count>  ] #
7941      [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.16b - %<Vendreg>1.16b}, %0
7942      [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.16b - %<Vendreg>0.16b}, %1
7943   }
7944 )
7945
7946 (define_insn "*aarch64_movv8di"
7947   [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
7948         (match_operand:V8DI 1 "general_operand" " r,r,m"))]
7949   "(register_operand (operands[0], V8DImode)
7950     || register_operand (operands[1], V8DImode))"
7951   "#"
7952   [(set_attr "type" "multiple,multiple,multiple")
7953    (set_attr "length" "32,16,16")]
7954 )
7955
7956 (define_insn "aarch64_be_ld1<mode>"
7957   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
7958         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
7959                              "aarch64_simd_struct_operand" "Utv")]
7960         UNSPEC_LD1))]
7961   "TARGET_SIMD"
7962   "ld1\\t{%0<Vmtype>}, %1"
7963   [(set_attr "type" "neon_load1_1reg<q>")]
7964 )
7965
7966 (define_insn "aarch64_be_st1<mode>"
7967   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
7968         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
7969         UNSPEC_ST1))]
7970   "TARGET_SIMD"
7971   "st1\\t{%1<Vmtype>}, %0"
7972   [(set_attr "type" "neon_store1_1reg<q>")]
7973 )
7974
7975 (define_insn "*aarch64_be_mov<mode>"
7976   [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
7977         (match_operand:VSTRUCT_2D 1 "general_operand"))]
7978   "TARGET_FLOAT
7979    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7980    && (register_operand (operands[0], <MODE>mode)
7981        || register_operand (operands[1], <MODE>mode))"
7982   {@ [ cons: =0 , 1 ; attrs: type , length ]
7983      [ w        , w ; multiple    , 8      ] #
7984      [ m        , w ; neon_stp    , 4      ] stp\t%d1, %R1, %0
7985      [ w        , m ; neon_ldp    , 4      ] ldp\t%d0, %R0, %1
7986   }
7987 )
7988
7989 (define_insn "*aarch64_be_mov<mode>"
7990   [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
7991         (match_operand:VSTRUCT_2Q 1 "general_operand"))]
7992   "TARGET_FLOAT
7993    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7994    && (register_operand (operands[0], <MODE>mode)
7995        || register_operand (operands[1], <MODE>mode))"
7996   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
7997      [ w        , w ; multiple    , simd , 8      ] #
7998      [ m        , w ; neon_stp_q  , *    , 4      ] stp\t%q1, %R1, %0
7999      [ w        , m ; neon_ldp_q  , *    , 4      ] ldp\t%q0, %R0, %1
8000   }
8001 )
8002
8003 (define_insn "*aarch64_be_movoi"
8004   [(set (match_operand:OI 0 "nonimmediate_operand")
8005         (match_operand:OI 1 "general_operand"))]
8006   "TARGET_FLOAT
8007    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8008    && (register_operand (operands[0], OImode)
8009        || register_operand (operands[1], OImode))"
8010   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
8011      [ w        , w ; multiple    , simd , 8      ] #
8012      [ m        , w ; neon_stp_q  , *    , 4      ] stp\t%q1, %R1, %0
8013      [ w        , m ; neon_ldp_q  , *    , 4      ] ldp\t%q0, %R0, %1
8014   }
8015 )
8016
8017 (define_insn "*aarch64_be_mov<mode>"
8018   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
8019         (match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
8020   "TARGET_FLOAT
8021    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8022    && (register_operand (operands[0], <MODE>mode)
8023        || register_operand (operands[1], <MODE>mode))"
8024   "#"
8025   [(set_attr "type" "multiple")
8026    (set_attr "arch" "fp<q>,*,*")
8027    (set_attr "length" "12,8,8")]
8028 )
8029
8030 (define_insn "*aarch64_be_movci"
8031   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
8032         (match_operand:CI 1 "general_operand"      " w,w,o"))]
8033   "TARGET_FLOAT
8034    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8035    && (register_operand (operands[0], CImode)
8036        || register_operand (operands[1], CImode))"
8037   "#"
8038   [(set_attr "type" "multiple")
8039    (set_attr "arch" "simd,*,*")
8040    (set_attr "length" "12,8,8")]
8041 )
8042
8043 (define_insn "*aarch64_be_mov<mode>"
8044   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
8045         (match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
8046   "TARGET_FLOAT
8047    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8048    && (register_operand (operands[0], <MODE>mode)
8049        || register_operand (operands[1], <MODE>mode))"
8050   "#"
8051   [(set_attr "type" "multiple")
8052    (set_attr "arch" "fp<q>,*,*")
8053    (set_attr "length" "16,8,8")]
8054 )
8055
8056 (define_insn "*aarch64_be_movxi"
8057   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
8058         (match_operand:XI 1 "general_operand"      " w,w,o"))]
8059   "TARGET_FLOAT
8060    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8061    && (register_operand (operands[0], XImode)
8062        || register_operand (operands[1], XImode))"
8063   "#"
8064   [(set_attr "type" "multiple")
8065    (set_attr "arch" "simd,*,*")
8066    (set_attr "length" "16,8,8")]
8067 )
8068
8069 (define_split
8070   [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
8071         (match_operand:VSTRUCT_2QD 1 "register_operand"))]
8072   "TARGET_FLOAT && reload_completed"
8073   [(const_int 0)]
8074 {
8075   aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
8076   DONE;
8077 })
8078
8079 (define_split
8080   [(set (match_operand:OI 0 "register_operand")
8081         (match_operand:OI 1 "register_operand"))]
8082   "TARGET_FLOAT && reload_completed"
8083   [(const_int 0)]
8084 {
8085   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
8086   DONE;
8087 })
8088
8089 (define_split
8090   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
8091         (match_operand:VSTRUCT_3QD 1 "general_operand"))]
8092   "TARGET_FLOAT && reload_completed"
8093   [(const_int 0)]
8094 {
8095   if (register_operand (operands[0], <MODE>mode)
8096       && register_operand (operands[1], <MODE>mode))
8097     {
8098       aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
8099       DONE;
8100     }
8101   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8102     {
8103       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8104       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8105       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8106                                            <MODE>mode, 0),
8107                       simplify_gen_subreg (pair_mode, operands[1],
8108                                            <MODE>mode, 0));
8109       emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
8110                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
8111                                                         operands[0],
8112                                                         <MODE>mode,
8113                                                         2 * elt_size)),
8114                       gen_lowpart (<VSTRUCT_ELT>mode,
8115                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
8116                                                         operands[1],
8117                                                         <MODE>mode,
8118                                                         2 * elt_size)));
8119       DONE;
8120     }
8121   else
8122     FAIL;
8123 })
8124
8125 (define_split
8126   [(set (match_operand:CI 0 "nonimmediate_operand")
8127         (match_operand:CI 1 "general_operand"))]
8128   "TARGET_FLOAT && reload_completed"
8129   [(const_int 0)]
8130 {
8131   if (register_operand (operands[0], CImode)
8132       && register_operand (operands[1], CImode))
8133     {
8134       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
8135       DONE;
8136     }
8137   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8138     {
8139       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
8140                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
8141       emit_move_insn (gen_lowpart (V16QImode,
8142                                    simplify_gen_subreg (TImode, operands[0],
8143                                                         CImode, 32)),
8144                       gen_lowpart (V16QImode,
8145                                    simplify_gen_subreg (TImode, operands[1],
8146                                                         CImode, 32)));
8147       DONE;
8148     }
8149   else
8150     FAIL;
8151 })
8152
8153 (define_split
8154   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
8155         (match_operand:VSTRUCT_4QD 1 "general_operand"))]
8156   "TARGET_FLOAT && reload_completed"
8157   [(const_int 0)]
8158 {
8159   if (register_operand (operands[0], <MODE>mode)
8160       && register_operand (operands[1], <MODE>mode))
8161     {
8162       aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
8163       DONE;
8164     }
8165   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8166     {
8167       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8168       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8169       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8170                                            <MODE>mode, 0),
8171                       simplify_gen_subreg (pair_mode, operands[1],
8172                                            <MODE>mode, 0));
8173       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8174                                            <MODE>mode, 2 * elt_size),
8175                       simplify_gen_subreg (pair_mode, operands[1],
8176                                            <MODE>mode, 2 * elt_size));
8177       DONE;
8178     }
8179   else
8180     FAIL;
8181 })
8182
8183 (define_split
8184   [(set (match_operand:XI 0 "nonimmediate_operand")
8185         (match_operand:XI 1 "general_operand"))]
8186   "TARGET_FLOAT && reload_completed"
8187   [(const_int 0)]
8188 {
8189   if (register_operand (operands[0], XImode)
8190       && register_operand (operands[1], XImode))
8191     {
8192       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
8193       DONE;
8194     }
8195   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8196     {
8197       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
8198                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
8199       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
8200                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
8201       DONE;
8202     }
8203   else
8204     FAIL;
8205 })
8206
8207 (define_split
8208   [(set (match_operand:V8DI 0 "nonimmediate_operand")
8209         (match_operand:V8DI 1 "general_operand"))]
8210   "reload_completed"
8211   [(const_int 0)]
8212 {
8213   if (register_operand (operands[0], V8DImode)
8214       && register_operand (operands[1], V8DImode))
8215     {
8216       aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
8217       DONE;
8218     }
8219   else if ((register_operand (operands[0], V8DImode)
8220             && memory_operand (operands[1], V8DImode))
8221            || (memory_operand (operands[0], V8DImode)
8222                && register_operand (operands[1], V8DImode)))
8223     {
8224       /* V8DI only guarantees 8-byte alignment, whereas TImode requires 16.  */
8225       auto mode = STRICT_ALIGNMENT ? DImode : TImode;
8226       int increment = GET_MODE_SIZE (mode);
8227       std::pair<rtx, rtx> last_pair = {};
8228       for (int offset = 0; offset < 64; offset += increment)
8229         {
8230           std::pair<rtx, rtx> pair = {
8231             simplify_gen_subreg (mode, operands[0], V8DImode, offset),
8232             simplify_gen_subreg (mode, operands[1], V8DImode, offset)
8233           };
8234           if (register_operand (pair.first, mode)
8235               && reg_overlap_mentioned_p (pair.first, pair.second))
8236             last_pair = pair;
8237           else
8238             emit_move_insn (pair.first, pair.second);
8239         }
8240       if (last_pair.first)
8241         emit_move_insn (last_pair.first, last_pair.second);
8242       DONE;
8243     }
8244   else
8245     FAIL;
8246 })
8247
8248 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
8249   [(match_operand:VSTRUCT_QD 0 "register_operand")
8250    (match_operand:DI 1 "register_operand")]
8251   "TARGET_SIMD"
8252 {
8253   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8254   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8255
8256   emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
8257   DONE;
8258 })
8259
8260 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8261   [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
8262         (unspec:VSTRUCT_2DNX [
8263           (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
8264           UNSPEC_LD2_DREG))]
8265   "TARGET_SIMD"
8266   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8267   [(set_attr "type" "neon_load2_2reg<q>")]
8268 )
8269
8270 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8271   [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
8272         (unspec:VSTRUCT_2DX [
8273           (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
8274           UNSPEC_LD2_DREG))]
8275   "TARGET_SIMD"
8276   "ld1\\t{%S0.1d - %T0.1d}, %1"
8277   [(set_attr "type" "neon_load1_2reg<q>")]
8278 )
8279
8280 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8281   [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
8282         (unspec:VSTRUCT_3DNX [
8283           (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
8284           UNSPEC_LD3_DREG))]
8285   "TARGET_SIMD"
8286   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8287   [(set_attr "type" "neon_load3_3reg<q>")]
8288 )
8289
8290 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8291   [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
8292         (unspec:VSTRUCT_3DX [
8293           (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
8294           UNSPEC_LD3_DREG))]
8295   "TARGET_SIMD"
8296   "ld1\\t{%S0.1d - %U0.1d}, %1"
8297   [(set_attr "type" "neon_load1_3reg<q>")]
8298 )
8299
8300 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8301   [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
8302         (unspec:VSTRUCT_4DNX [
8303           (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
8304           UNSPEC_LD4_DREG))]
8305   "TARGET_SIMD"
8306   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8307   [(set_attr "type" "neon_load4_4reg<q>")]
8308 )
8309
8310 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8311   [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
8312         (unspec:VSTRUCT_4DX [
8313           (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
8314           UNSPEC_LD4_DREG))]
8315   "TARGET_SIMD"
8316   "ld1\\t{%S0.1d - %V0.1d}, %1"
8317   [(set_attr "type" "neon_load1_4reg<q>")]
8318 )
8319
8320 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8321  [(match_operand:VSTRUCT_D 0 "register_operand")
8322   (match_operand:DI 1 "register_operand")]
8323   "TARGET_SIMD"
8324 {
8325   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8326   emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
8327   DONE;
8328 })
8329
8330 (define_expand "aarch64_ld1<VALL_F16:mode>"
8331  [(match_operand:VALL_F16 0 "register_operand")
8332   (match_operand:DI 1 "register_operand")]
8333   "TARGET_SIMD"
8334 {
8335   machine_mode mode = <VALL_F16:MODE>mode;
8336   rtx mem = gen_rtx_MEM (mode, operands[1]);
8337
8338   if (BYTES_BIG_ENDIAN)
8339     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
8340   else
8341     emit_move_insn (operands[0], mem);
8342   DONE;
8343 })
8344
8345 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8346  [(match_operand:VSTRUCT_Q 0 "register_operand")
8347   (match_operand:DI 1 "register_operand")]
8348   "TARGET_SIMD"
8349 {
8350   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8351   emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
8352   DONE;
8353 })
8354
8355 (define_expand "aarch64_ld1x2<vstruct_elt>"
8356  [(match_operand:VSTRUCT_2QD 0 "register_operand")
8357   (match_operand:DI 1 "register_operand")]
8358   "TARGET_SIMD"
8359 {
8360   machine_mode mode = <MODE>mode;
8361   rtx mem = gen_rtx_MEM (mode, operands[1]);
8362
8363   emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
8364   DONE;
8365 })
8366
8367 (define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
8368   [(match_operand:VSTRUCT_QD 0 "register_operand")
8369         (match_operand:DI 1 "register_operand")
8370         (match_operand:VSTRUCT_QD 2 "register_operand")
8371         (match_operand:SI 3 "immediate_operand")]
8372   "TARGET_SIMD"
8373 {
8374   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8375   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8376
8377   aarch64_simd_lane_bounds (operands[3], 0,
8378                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8379   emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
8380                                 mem, operands[2], operands[3]));
8381   DONE;
8382 })
8383
8384 ;; Permuted-store expanders for neon intrinsics.
8385
8386 ;; Permute instructions
8387
8388 ;; vec_perm support
8389
8390 (define_expand "vec_perm<mode>"
8391   [(match_operand:VB 0 "register_operand")
8392    (match_operand:VB 1 "register_operand")
8393    (match_operand:VB 2 "register_operand")
8394    (match_operand:VB 3 "register_operand")]
8395   "TARGET_SIMD"
8396 {
8397   aarch64_expand_vec_perm (operands[0], operands[1],
8398                            operands[2], operands[3], <nunits>);
8399   DONE;
8400 })
8401
8402 (define_insn "aarch64_qtbl1<mode>"
8403   [(set (match_operand:VB 0 "register_operand" "=w")
8404         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
8405                     (match_operand:VB 2 "register_operand" "w")]
8406                    UNSPEC_TBL))]
8407   "TARGET_SIMD"
8408   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
8409   [(set_attr "type" "neon_tbl1<q>")]
8410 )
8411
8412 (define_insn "aarch64_qtbx1<mode>"
8413   [(set (match_operand:VB 0 "register_operand" "=w")
8414         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8415                     (match_operand:V16QI 2 "register_operand" "w")
8416                     (match_operand:VB 3 "register_operand" "w")]
8417                    UNSPEC_TBX))]
8418   "TARGET_SIMD"
8419   "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
8420   [(set_attr "type" "neon_tbl1<q>")]
8421 )
8422
8423 ;; Two source registers.
8424
8425 (define_insn "aarch64_qtbl2<mode>"
8426   [(set (match_operand:VB 0 "register_operand" "=w")
8427         (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
8428                       (match_operand:VB 2 "register_operand" "w")]
8429                       UNSPEC_TBL))]
8430   "TARGET_SIMD"
8431   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
8432   [(set_attr "type" "neon_tbl2")]
8433 )
8434
8435 (define_insn "aarch64_qtbx2<mode>"
8436   [(set (match_operand:VB 0 "register_operand" "=w")
8437         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8438                       (match_operand:V2x16QI 2 "register_operand" "w")
8439                       (match_operand:VB 3 "register_operand" "w")]
8440                       UNSPEC_TBX))]
8441   "TARGET_SIMD"
8442   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
8443   [(set_attr "type" "neon_tbl2")]
8444 )
8445
8446 ;; Three source registers.
8447
8448 (define_insn "aarch64_qtbl3<mode>"
8449   [(set (match_operand:VB 0 "register_operand" "=w")
8450         (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
8451                       (match_operand:VB 2 "register_operand" "w")]
8452                       UNSPEC_TBL))]
8453   "TARGET_SIMD"
8454   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
8455   [(set_attr "type" "neon_tbl3")]
8456 )
8457
8458 (define_insn "aarch64_qtbx3<mode>"
8459   [(set (match_operand:VB 0 "register_operand" "=w")
8460         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8461                       (match_operand:V3x16QI 2 "register_operand" "w")
8462                       (match_operand:VB 3 "register_operand" "w")]
8463                       UNSPEC_TBX))]
8464   "TARGET_SIMD"
8465   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
8466   [(set_attr "type" "neon_tbl3")]
8467 )
8468
8469 ;; Four source registers.
8470
8471 (define_insn "aarch64_qtbl4<mode>"
8472   [(set (match_operand:VB 0 "register_operand" "=w")
8473         (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
8474                       (match_operand:VB 2 "register_operand" "w")]
8475                       UNSPEC_TBL))]
8476   "TARGET_SIMD"
8477   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
8478   [(set_attr "type" "neon_tbl4")]
8479 )
8480
8481 (define_insn "aarch64_qtbx4<mode>"
8482   [(set (match_operand:VB 0 "register_operand" "=w")
8483         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8484                       (match_operand:V4x16QI 2 "register_operand" "w")
8485                       (match_operand:VB 3 "register_operand" "w")]
8486                       UNSPEC_TBX))]
8487   "TARGET_SIMD"
8488   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
8489   [(set_attr "type" "neon_tbl4")]
8490 )
8491
8492 (define_insn_and_split "aarch64_combinev16qi"
8493   [(set (match_operand:V2x16QI 0 "register_operand" "=w")
8494         (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
8495                          (match_operand:V16QI 2 "register_operand" "w")]
8496                         UNSPEC_CONCAT))]
8497   "TARGET_SIMD"
8498   "#"
8499   "&& reload_completed"
8500   [(const_int 0)]
8501 {
8502   aarch64_split_combinev16qi (operands);
8503   DONE;
8504 }
8505 [(set_attr "type" "multiple")]
8506 )
8507
8508 ;; This instruction's pattern is generated directly by
8509 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8510 ;; need corresponding changes there.
8511 (define_insn "aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
8512   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8513         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8514                           (match_operand:VALL_F16 2 "register_operand" "w")]
8515          PERMUTE))]
8516   "TARGET_SIMD"
8517   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8518   [(set_attr "type" "neon_permute<q>")]
8519 )
8520
8521 ;; ZIP1 ignores the contents of the upper halves of the registers,
8522 ;; so we can describe 128-bit operations in terms of 64-bit inputs.
8523 (define_insn "aarch64_zip1<mode>_low"
8524   [(set (match_operand:VQ 0 "register_operand" "=w")
8525         (unspec:VQ [(match_operand:<VHALF> 1 "register_operand" "w")
8526                     (match_operand:<VHALF> 2 "register_operand" "w")]
8527                    UNSPEC_ZIP1))]
8528   "TARGET_SIMD"
8529   "zip1\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8530   [(set_attr "type" "neon_permute_q")]
8531 )
8532
8533 ;; This instruction's pattern is generated directly by
8534 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8535 ;; need corresponding changes there.  Note that the immediate (third)
8536 ;; operand is a lane index not a byte index.
8537 (define_insn "aarch64_ext<mode>"
8538   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8539         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8540                           (match_operand:VALL_F16 2 "register_operand" "w")
8541                           (match_operand:SI 3 "immediate_operand" "i")]
8542          UNSPEC_EXT))]
8543   "TARGET_SIMD"
8544 {
8545   operands[3] = GEN_INT (INTVAL (operands[3])
8546       * GET_MODE_UNIT_SIZE (<MODE>mode));
8547   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
8548 }
8549   [(set_attr "type" "neon_ext<q>")]
8550 )
8551
8552 ;; This instruction's pattern is generated directly by
8553 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8554 ;; need corresponding changes there.
8555 (define_insn "aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
8556   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8557         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
8558                     REVERSE))]
8559   "TARGET_SIMD"
8560   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
8561   [(set_attr "type" "neon_rev<q>")]
8562 )
8563
8564 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8565   [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
8566         (unspec:VSTRUCT_2DNX [
8567                 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
8568                 UNSPEC_ST2))]
8569   "TARGET_SIMD"
8570   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8571   [(set_attr "type" "neon_store2_2reg")]
8572 )
8573
8574 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8575   [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
8576         (unspec:VSTRUCT_2DX [
8577                 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
8578                 UNSPEC_ST2))]
8579   "TARGET_SIMD"
8580   "st1\\t{%S1.1d - %T1.1d}, %0"
8581   [(set_attr "type" "neon_store1_2reg")]
8582 )
8583
8584 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8585   [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
8586         (unspec:VSTRUCT_3DNX [
8587                 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
8588                 UNSPEC_ST3))]
8589   "TARGET_SIMD"
8590   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8591   [(set_attr "type" "neon_store3_3reg")]
8592 )
8593
8594 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8595   [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
8596         (unspec:VSTRUCT_3DX [
8597                 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
8598                 UNSPEC_ST3))]
8599   "TARGET_SIMD"
8600   "st1\\t{%S1.1d - %U1.1d}, %0"
8601   [(set_attr "type" "neon_store1_3reg")]
8602 )
8603
8604 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8605   [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
8606         (unspec:VSTRUCT_4DNX [
8607                 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
8608                 UNSPEC_ST4))]
8609   "TARGET_SIMD"
8610   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8611   [(set_attr "type" "neon_store4_4reg")]
8612 )
8613
8614 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8615   [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
8616         (unspec:VSTRUCT_4DX [
8617                 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
8618                 UNSPEC_ST4))]
8619   "TARGET_SIMD"
8620   "st1\\t{%S1.1d - %V1.1d}, %0"
8621   [(set_attr "type" "neon_store1_4reg")]
8622 )
8623
8624 (define_expand "aarch64_st<nregs><vstruct_elt>"
8625  [(match_operand:DI 0 "register_operand")
8626   (match_operand:VSTRUCT_D 1 "register_operand")]
8627   "TARGET_SIMD"
8628 {
8629   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8630   emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
8631   DONE;
8632 })
8633
8634 (define_expand "aarch64_st<nregs><vstruct_elt>"
8635  [(match_operand:DI 0 "register_operand")
8636   (match_operand:VSTRUCT_Q 1 "register_operand")]
8637   "TARGET_SIMD"
8638 {
8639   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8640   emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
8641   DONE;
8642 })
8643
8644 (define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
8645  [(match_operand:DI 0 "register_operand")
8646   (match_operand:VSTRUCT_QD 1 "register_operand")
8647   (match_operand:SI 2 "immediate_operand")]
8648   "TARGET_SIMD"
8649 {
8650   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
8651   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8652
8653   aarch64_simd_lane_bounds (operands[2], 0,
8654                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8655   emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
8656                                         operands[1], operands[2]));
8657   DONE;
8658 })
8659
8660 (define_expand "aarch64_st1<VALL_F16:mode>"
8661  [(match_operand:DI 0 "register_operand")
8662   (match_operand:VALL_F16 1 "register_operand")]
8663   "TARGET_SIMD"
8664 {
8665   machine_mode mode = <VALL_F16:MODE>mode;
8666   rtx mem = gen_rtx_MEM (mode, operands[0]);
8667
8668   if (BYTES_BIG_ENDIAN)
8669     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
8670   else
8671     emit_move_insn (mem, operands[1]);
8672   DONE;
8673 })
8674
8675 ;; Standard pattern name vec_init<mode><Vel>.
8676
8677 (define_expand "vec_init<mode><Vel>"
8678   [(match_operand:VALL_F16 0 "register_operand")
8679    (match_operand 1 "" "")]
8680   "TARGET_SIMD"
8681 {
8682   aarch64_expand_vector_init (operands[0], operands[1]);
8683   DONE;
8684 })
8685
8686 (define_expand "vec_init<mode><Vhalf>"
8687   [(match_operand:VQ_NO2E 0 "register_operand")
8688    (match_operand 1 "" "")]
8689   "TARGET_SIMD"
8690 {
8691   aarch64_expand_vector_init (operands[0], operands[1]);
8692   DONE;
8693 })
8694
8695 (define_insn "*aarch64_simd_ld1r<mode>"
8696   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8697         (vec_duplicate:VALL_F16
8698           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
8699   "TARGET_SIMD"
8700   "ld1r\\t{%0.<Vtype>}, %1"
8701   [(set_attr "type" "neon_load1_all_lanes")]
8702 )
8703
8704 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
8705   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
8706         (unspec:VSTRUCT_2QD [
8707             (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
8708             UNSPEC_LD1))]
8709   "TARGET_SIMD"
8710   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8711   [(set_attr "type" "neon_load1_2reg<q>")]
8712 )
8713
8714
8715 (define_insn "@aarch64_frecpe<mode>"
8716   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8717         (unspec:VHSDF_HSDF
8718          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8719          UNSPEC_FRECPE))]
8720   "TARGET_SIMD"
8721   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8722   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8723 )
8724
8725 (define_insn "aarch64_frecpx<mode>"
8726   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8727         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8728          UNSPEC_FRECPX))]
8729   "TARGET_SIMD"
8730   "frecpx\t%<s>0, %<s>1"
8731   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8732 )
8733
8734 (define_insn "@aarch64_frecps<mode>"
8735   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8736         (unspec:VHSDF_HSDF
8737           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8738           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8739           UNSPEC_FRECPS))]
8740   "TARGET_SIMD"
8741   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8742   [(set_attr "type" "neon_fp_recps_<stype><q>")]
8743 )
8744
8745 (define_insn "aarch64_urecpe<mode>"
8746   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8747         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8748                 UNSPEC_URECPE))]
8749  "TARGET_SIMD"
8750  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8751   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8752
8753 ;; Standard pattern name vec_extract<mode><Vel>.
8754
8755 (define_expand "vec_extract<mode><Vel>"
8756   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8757    (match_operand:VALL_F16 1 "register_operand")
8758    (match_operand:SI 2 "immediate_operand")]
8759   "TARGET_SIMD"
8760 {
8761     emit_insn
8762       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8763     DONE;
8764 })
8765
8766 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8767 (define_expand "vec_extract<mode><Vhalf>"
8768   [(match_operand:<VHALF> 0 "register_operand")
8769    (match_operand:VQMOV_NO2E 1 "register_operand")
8770    (match_operand 2 "immediate_operand")]
8771   "TARGET_SIMD"
8772 {
8773   int start = INTVAL (operands[2]);
8774   gcc_assert (start == 0 || start == 1);
8775   start *= <nunits> / 2;
8776   rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8777   emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8778   DONE;
8779 })
8780
8781 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8782 (define_expand "vec_extract<mode><V1half>"
8783   [(match_operand:<V1HALF> 0 "register_operand")
8784    (match_operand:VQ_2E 1 "register_operand")
8785    (match_operand 2 "immediate_operand")]
8786   "TARGET_SIMD"
8787 {
8788   /* V1DI and V1DF are rarely used by other patterns, so it should be better
8789      to hide it in a subreg destination of a normal DI or DF op.  */
8790   rtx scalar0 = gen_lowpart (<VHALF>mode, operands[0]);
8791   emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands[1], operands[2]));
8792   DONE;
8793 })
8794
8795 ;; aes
8796
8797 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8798   [(set (match_operand:V16QI 0 "register_operand" "=w")
8799         (unspec:V16QI
8800                 [(xor:V16QI
8801                  (match_operand:V16QI 1 "register_operand" "%0")
8802                  (match_operand:V16QI 2 "register_operand" "w"))]
8803          CRYPTO_AES))]
8804   "TARGET_AES"
8805   "aes<aes_op>\\t%0.16b, %2.16b"
8806   [(set_attr "type" "crypto_aese")]
8807 )
8808
8809 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8810   [(set (match_operand:V16QI 0 "register_operand" "=w")
8811         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8812          CRYPTO_AESMC))]
8813   "TARGET_AES"
8814   "aes<aesmc_op>\\t%0.16b, %1.16b"
8815   [(set_attr "type" "crypto_aesmc")]
8816 )
8817
8818 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8819 ;; and enforce the register dependency without scheduling or register
8820 ;; allocation messing up the order or introducing moves inbetween.
8821 ;;  Mash the two together during combine.
8822
8823 (define_insn "*aarch64_crypto_aese_fused"
8824   [(set (match_operand:V16QI 0 "register_operand" "=w")
8825         (unspec:V16QI
8826           [(unspec:V16QI
8827            [(xor:V16QI
8828                 (match_operand:V16QI 1 "register_operand" "%0")
8829                 (match_operand:V16QI 2 "register_operand" "w"))]
8830              UNSPEC_AESE)]
8831         UNSPEC_AESMC))]
8832   "TARGET_AES
8833    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8834   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8835   [(set_attr "type" "crypto_aese")
8836    (set_attr "length" "8")]
8837 )
8838
8839 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8840 ;; and enforce the register dependency without scheduling or register
8841 ;; allocation messing up the order or introducing moves inbetween.
8842 ;;  Mash the two together during combine.
8843
8844 (define_insn "*aarch64_crypto_aesd_fused"
8845   [(set (match_operand:V16QI 0 "register_operand" "=w")
8846         (unspec:V16QI
8847           [(unspec:V16QI
8848                     [(xor:V16QI
8849                         (match_operand:V16QI 1 "register_operand" "%0")
8850                         (match_operand:V16QI 2 "register_operand" "w"))]
8851                 UNSPEC_AESD)]
8852           UNSPEC_AESIMC))]
8853   "TARGET_AES
8854    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8855   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8856   [(set_attr "type" "crypto_aese")
8857    (set_attr "length" "8")]
8858 )
8859
8860 ;; sha1
8861
8862 (define_insn "aarch64_crypto_sha1hsi"
8863   [(set (match_operand:SI 0 "register_operand" "=w")
8864         (unspec:SI [(match_operand:SI 1
8865                        "register_operand" "w")]
8866          UNSPEC_SHA1H))]
8867   "TARGET_SHA2"
8868   "sha1h\\t%s0, %s1"
8869   [(set_attr "type" "crypto_sha1_fast")]
8870 )
8871
8872 (define_insn "aarch64_crypto_sha1hv4si"
8873   [(set (match_operand:SI 0 "register_operand" "=w")
8874         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8875                      (parallel [(const_int 0)]))]
8876          UNSPEC_SHA1H))]
8877   "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8878   "sha1h\\t%s0, %s1"
8879   [(set_attr "type" "crypto_sha1_fast")]
8880 )
8881
8882 (define_insn "aarch64_be_crypto_sha1hv4si"
8883   [(set (match_operand:SI 0 "register_operand" "=w")
8884         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8885                      (parallel [(const_int 3)]))]
8886          UNSPEC_SHA1H))]
8887   "TARGET_SHA2 && BYTES_BIG_ENDIAN"
8888   "sha1h\\t%s0, %s1"
8889   [(set_attr "type" "crypto_sha1_fast")]
8890 )
8891
8892 (define_insn "aarch64_crypto_sha1su1v4si"
8893   [(set (match_operand:V4SI 0 "register_operand" "=w")
8894         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8895                       (match_operand:V4SI 2 "register_operand" "w")]
8896          UNSPEC_SHA1SU1))]
8897   "TARGET_SHA2"
8898   "sha1su1\\t%0.4s, %2.4s"
8899   [(set_attr "type" "crypto_sha1_fast")]
8900 )
8901
8902 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
8903   [(set (match_operand:V4SI 0 "register_operand" "=w")
8904         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8905                       (match_operand:SI 2 "register_operand" "w")
8906                       (match_operand:V4SI 3 "register_operand" "w")]
8907          CRYPTO_SHA1))]
8908   "TARGET_SHA2"
8909   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
8910   [(set_attr "type" "crypto_sha1_slow")]
8911 )
8912
8913 (define_insn "aarch64_crypto_sha1su0v4si"
8914   [(set (match_operand:V4SI 0 "register_operand" "=w")
8915         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8916                       (match_operand:V4SI 2 "register_operand" "w")
8917                       (match_operand:V4SI 3 "register_operand" "w")]
8918          UNSPEC_SHA1SU0))]
8919   "TARGET_SHA2"
8920   "sha1su0\\t%0.4s, %2.4s, %3.4s"
8921   [(set_attr "type" "crypto_sha1_xor")]
8922 )
8923
8924 ;; sha256
8925
8926 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
8927   [(set (match_operand:V4SI 0 "register_operand" "=w")
8928         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8929                       (match_operand:V4SI 2 "register_operand" "w")
8930                       (match_operand:V4SI 3 "register_operand" "w")]
8931          CRYPTO_SHA256))]
8932   "TARGET_SHA2"
8933   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
8934   [(set_attr "type" "crypto_sha256_slow")]
8935 )
8936
8937 (define_insn "aarch64_crypto_sha256su0v4si"
8938   [(set (match_operand:V4SI 0 "register_operand" "=w")
8939         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8940                       (match_operand:V4SI 2 "register_operand" "w")]
8941          UNSPEC_SHA256SU0))]
8942   "TARGET_SHA2"
8943   "sha256su0\\t%0.4s, %2.4s"
8944   [(set_attr "type" "crypto_sha256_fast")]
8945 )
8946
8947 (define_insn "aarch64_crypto_sha256su1v4si"
8948   [(set (match_operand:V4SI 0 "register_operand" "=w")
8949         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8950                       (match_operand:V4SI 2 "register_operand" "w")
8951                       (match_operand:V4SI 3 "register_operand" "w")]
8952          UNSPEC_SHA256SU1))]
8953   "TARGET_SHA2"
8954   "sha256su1\\t%0.4s, %2.4s, %3.4s"
8955   [(set_attr "type" "crypto_sha256_slow")]
8956 )
8957
8958 ;; sha512
8959
8960 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
8961   [(set (match_operand:V2DI 0 "register_operand" "=w")
8962         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8963                       (match_operand:V2DI 2 "register_operand" "w")
8964                       (match_operand:V2DI 3 "register_operand" "w")]
8965          CRYPTO_SHA512))]
8966   "TARGET_SHA3"
8967   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
8968   [(set_attr "type" "crypto_sha512")]
8969 )
8970
8971 (define_insn "aarch64_crypto_sha512su0qv2di"
8972   [(set (match_operand:V2DI 0 "register_operand" "=w")
8973         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8974                       (match_operand:V2DI 2 "register_operand" "w")]
8975          UNSPEC_SHA512SU0))]
8976   "TARGET_SHA3"
8977   "sha512su0\\t%0.2d, %2.2d"
8978   [(set_attr "type" "crypto_sha512")]
8979 )
8980
8981 (define_insn "aarch64_crypto_sha512su1qv2di"
8982   [(set (match_operand:V2DI 0 "register_operand" "=w")
8983         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8984                       (match_operand:V2DI 2 "register_operand" "w")
8985                       (match_operand:V2DI 3 "register_operand" "w")]
8986          UNSPEC_SHA512SU1))]
8987   "TARGET_SHA3"
8988   "sha512su1\\t%0.2d, %2.2d, %3.2d"
8989   [(set_attr "type" "crypto_sha512")]
8990 )
8991
8992 ;; sha3
8993
8994 (define_insn "eor3q<mode>4"
8995   [(set (match_operand:VQ_I 0 "register_operand" "=w")
8996         (xor:VQ_I
8997          (xor:VQ_I
8998           (match_operand:VQ_I 2 "register_operand" "w")
8999           (match_operand:VQ_I 3 "register_operand" "w"))
9000          (match_operand:VQ_I 1 "register_operand" "w")))]
9001   "TARGET_SHA3"
9002   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
9003   [(set_attr "type" "crypto_sha3")]
9004 )
9005
9006 (define_insn "aarch64_rax1qv2di"
9007   [(set (match_operand:V2DI 0 "register_operand" "=w")
9008         (xor:V2DI
9009          (rotate:V2DI
9010           (match_operand:V2DI 2 "register_operand" "w")
9011           (const_int 1))
9012          (match_operand:V2DI 1 "register_operand" "w")))]
9013   "TARGET_SHA3"
9014   "rax1\\t%0.2d, %1.2d, %2.2d"
9015   [(set_attr "type" "crypto_sha3")]
9016 )
9017
9018 (define_insn "aarch64_xarqv2di"
9019   [(set (match_operand:V2DI 0 "register_operand" "=w")
9020         (rotatert:V2DI
9021          (xor:V2DI
9022           (match_operand:V2DI 1 "register_operand" "%w")
9023           (match_operand:V2DI 2 "register_operand" "w"))
9024          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
9025   "TARGET_SHA3"
9026   "xar\\t%0.2d, %1.2d, %2.2d, %3"
9027   [(set_attr "type" "crypto_sha3")]
9028 )
9029
9030 (define_insn "bcaxq<mode>4"
9031   [(set (match_operand:VQ_I 0 "register_operand" "=w")
9032         (xor:VQ_I
9033          (and:VQ_I
9034           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
9035           (match_operand:VQ_I 2 "register_operand" "w"))
9036          (match_operand:VQ_I 1 "register_operand" "w")))]
9037   "TARGET_SHA3"
9038   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
9039   [(set_attr "type" "crypto_sha3")]
9040 )
9041
9042 ;; SM3
9043
9044 (define_insn "aarch64_sm3ss1qv4si"
9045   [(set (match_operand:V4SI 0 "register_operand" "=w")
9046         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9047                       (match_operand:V4SI 2 "register_operand" "w")
9048                       (match_operand:V4SI 3 "register_operand" "w")]
9049          UNSPEC_SM3SS1))]
9050   "TARGET_SM4"
9051   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
9052   [(set_attr "type" "crypto_sm3")]
9053 )
9054
9055
9056 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
9057   [(set (match_operand:V4SI 0 "register_operand" "=w")
9058         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9059                       (match_operand:V4SI 2 "register_operand" "w")
9060                       (match_operand:V4SI 3 "register_operand" "w")
9061                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
9062          CRYPTO_SM3TT))]
9063   "TARGET_SM4"
9064   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
9065   [(set_attr "type" "crypto_sm3")]
9066 )
9067
9068 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
9069   [(set (match_operand:V4SI 0 "register_operand" "=w")
9070         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9071                       (match_operand:V4SI 2 "register_operand" "w")
9072                       (match_operand:V4SI 3 "register_operand" "w")]
9073          CRYPTO_SM3PART))]
9074   "TARGET_SM4"
9075   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
9076   [(set_attr "type" "crypto_sm3")]
9077 )
9078
9079 ;; SM4
9080
9081 (define_insn "aarch64_sm4eqv4si"
9082   [(set (match_operand:V4SI 0 "register_operand" "=w")
9083         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9084                       (match_operand:V4SI 2 "register_operand" "w")]
9085          UNSPEC_SM4E))]
9086   "TARGET_SM4"
9087   "sm4e\\t%0.4s, %2.4s"
9088   [(set_attr "type" "crypto_sm4")]
9089 )
9090
9091 (define_insn "aarch64_sm4ekeyqv4si"
9092   [(set (match_operand:V4SI 0 "register_operand" "=w")
9093         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9094                       (match_operand:V4SI 2 "register_operand" "w")]
9095          UNSPEC_SM4EKEY))]
9096   "TARGET_SM4"
9097   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
9098   [(set_attr "type" "crypto_sm4")]
9099 )
9100
9101 ;; fp16fml
9102
9103 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
9104   [(set (match_operand:VDQSF 0 "register_operand")
9105         (unspec:VDQSF
9106          [(match_operand:VDQSF 1 "register_operand")
9107           (match_operand:<VFMLA_W> 2 "register_operand")
9108           (match_operand:<VFMLA_W> 3 "register_operand")]
9109          VFMLA16_LOW))]
9110   "TARGET_F16FML"
9111 {
9112   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9113                                             <nunits> * 2, false);
9114   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9115                                             <nunits> * 2, false);
9116
9117   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
9118                                                                 operands[1],
9119                                                                 operands[2],
9120                                                                 operands[3],
9121                                                                 p1, p2));
9122   DONE;
9123
9124 })
9125
9126 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
9127   [(set (match_operand:VDQSF 0 "register_operand")
9128         (unspec:VDQSF
9129          [(match_operand:VDQSF 1 "register_operand")
9130           (match_operand:<VFMLA_W> 2 "register_operand")
9131           (match_operand:<VFMLA_W> 3 "register_operand")]
9132          VFMLA16_HIGH))]
9133   "TARGET_F16FML"
9134 {
9135   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9136   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9137
9138   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
9139                                                                  operands[1],
9140                                                                  operands[2],
9141                                                                  operands[3],
9142                                                                  p1, p2));
9143   DONE;
9144 })
9145
9146 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
9147   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9148         (fma:VDQSF
9149          (float_extend:VDQSF
9150           (vec_select:<VFMLA_SEL_W>
9151            (match_operand:<VFMLA_W> 2 "register_operand" "w")
9152            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
9153          (float_extend:VDQSF
9154           (vec_select:<VFMLA_SEL_W>
9155            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9156            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9157          (match_operand:VDQSF 1 "register_operand" "0")))]
9158   "TARGET_F16FML"
9159   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9160   [(set_attr "type" "neon_fp_mul_s")]
9161 )
9162
9163 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
9164   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9165         (fma:VDQSF
9166          (float_extend:VDQSF
9167           (neg:<VFMLA_SEL_W>
9168            (vec_select:<VFMLA_SEL_W>
9169             (match_operand:<VFMLA_W> 2 "register_operand" "w")
9170             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
9171          (float_extend:VDQSF
9172           (vec_select:<VFMLA_SEL_W>
9173            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9174            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9175          (match_operand:VDQSF 1 "register_operand" "0")))]
9176   "TARGET_F16FML"
9177   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9178   [(set_attr "type" "neon_fp_mul_s")]
9179 )
9180
9181 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
9182   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9183         (fma:VDQSF
9184          (float_extend:VDQSF
9185           (vec_select:<VFMLA_SEL_W>
9186            (match_operand:<VFMLA_W> 2 "register_operand" "w")
9187            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
9188          (float_extend:VDQSF
9189           (vec_select:<VFMLA_SEL_W>
9190            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9191            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9192          (match_operand:VDQSF 1 "register_operand" "0")))]
9193   "TARGET_F16FML"
9194   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9195   [(set_attr "type" "neon_fp_mul_s")]
9196 )
9197
9198 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
9199   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9200         (fma:VDQSF
9201          (float_extend:VDQSF
9202           (neg:<VFMLA_SEL_W>
9203            (vec_select:<VFMLA_SEL_W>
9204             (match_operand:<VFMLA_W> 2 "register_operand" "w")
9205             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
9206          (float_extend:VDQSF
9207           (vec_select:<VFMLA_SEL_W>
9208            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9209            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9210          (match_operand:VDQSF 1 "register_operand" "0")))]
9211   "TARGET_F16FML"
9212   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9213   [(set_attr "type" "neon_fp_mul_s")]
9214 )
9215
9216 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
9217   [(set (match_operand:V2SF 0 "register_operand")
9218         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9219                            (match_operand:V4HF 2 "register_operand")
9220                            (match_operand:V4HF 3 "register_operand")
9221                            (match_operand:SI 4 "aarch64_imm2")]
9222          VFMLA16_LOW))]
9223   "TARGET_F16FML"
9224 {
9225     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9226     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9227
9228     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
9229                                                             operands[1],
9230                                                             operands[2],
9231                                                             operands[3],
9232                                                             p1, lane));
9233     DONE;
9234 }
9235 )
9236
9237 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
9238   [(set (match_operand:V2SF 0 "register_operand")
9239         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9240                            (match_operand:V4HF 2 "register_operand")
9241                            (match_operand:V4HF 3 "register_operand")
9242                            (match_operand:SI 4 "aarch64_imm2")]
9243          VFMLA16_HIGH))]
9244   "TARGET_F16FML"
9245 {
9246     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9247     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9248
9249     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
9250                                                              operands[1],
9251                                                              operands[2],
9252                                                              operands[3],
9253                                                              p1, lane));
9254     DONE;
9255 })
9256
9257 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
9258   [(set (match_operand:V2SF 0 "register_operand" "=w")
9259         (fma:V2SF
9260          (float_extend:V2SF
9261            (vec_select:V2HF
9262             (match_operand:V4HF 2 "register_operand" "w")
9263             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9264          (float_extend:V2SF
9265            (vec_duplicate:V2HF
9266             (vec_select:HF
9267              (match_operand:V4HF 3 "register_operand" "x")
9268              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9269          (match_operand:V2SF 1 "register_operand" "0")))]
9270   "TARGET_F16FML"
9271   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9272   [(set_attr "type" "neon_fp_mul_s")]
9273 )
9274
9275 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
9276   [(set (match_operand:V2SF 0 "register_operand" "=w")
9277         (fma:V2SF
9278          (float_extend:V2SF
9279           (neg:V2HF
9280            (vec_select:V2HF
9281             (match_operand:V4HF 2 "register_operand" "w")
9282             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9283          (float_extend:V2SF
9284           (vec_duplicate:V2HF
9285            (vec_select:HF
9286             (match_operand:V4HF 3 "register_operand" "x")
9287             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9288          (match_operand:V2SF 1 "register_operand" "0")))]
9289   "TARGET_F16FML"
9290   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9291   [(set_attr "type" "neon_fp_mul_s")]
9292 )
9293
9294 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
9295   [(set (match_operand:V2SF 0 "register_operand" "=w")
9296         (fma:V2SF
9297          (float_extend:V2SF
9298            (vec_select:V2HF
9299             (match_operand:V4HF 2 "register_operand" "w")
9300             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9301          (float_extend:V2SF
9302            (vec_duplicate:V2HF
9303             (vec_select:HF
9304              (match_operand:V4HF 3 "register_operand" "x")
9305              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9306          (match_operand:V2SF 1 "register_operand" "0")))]
9307   "TARGET_F16FML"
9308   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9309   [(set_attr "type" "neon_fp_mul_s")]
9310 )
9311
9312 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
9313   [(set (match_operand:V2SF 0 "register_operand" "=w")
9314         (fma:V2SF
9315          (float_extend:V2SF
9316            (neg:V2HF
9317             (vec_select:V2HF
9318              (match_operand:V4HF 2 "register_operand" "w")
9319              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9320          (float_extend:V2SF
9321            (vec_duplicate:V2HF
9322             (vec_select:HF
9323              (match_operand:V4HF 3 "register_operand" "x")
9324              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9325          (match_operand:V2SF 1 "register_operand" "0")))]
9326   "TARGET_F16FML"
9327   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9328   [(set_attr "type" "neon_fp_mul_s")]
9329 )
9330
9331 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
9332   [(set (match_operand:V4SF 0 "register_operand")
9333         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9334                            (match_operand:V8HF 2 "register_operand")
9335                            (match_operand:V8HF 3 "register_operand")
9336                            (match_operand:SI 4 "aarch64_lane_imm3")]
9337          VFMLA16_LOW))]
9338   "TARGET_F16FML"
9339 {
9340     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9341     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9342
9343     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
9344                                                               operands[1],
9345                                                               operands[2],
9346                                                               operands[3],
9347                                                               p1, lane));
9348     DONE;
9349 })
9350
9351 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
9352   [(set (match_operand:V4SF 0 "register_operand")
9353         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9354                            (match_operand:V8HF 2 "register_operand")
9355                            (match_operand:V8HF 3 "register_operand")
9356                            (match_operand:SI 4 "aarch64_lane_imm3")]
9357          VFMLA16_HIGH))]
9358   "TARGET_F16FML"
9359 {
9360     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9361     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9362
9363     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
9364                                                                operands[1],
9365                                                                operands[2],
9366                                                                operands[3],
9367                                                                p1, lane));
9368     DONE;
9369 })
9370
9371 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
9372   [(set (match_operand:V4SF 0 "register_operand" "=w")
9373         (fma:V4SF
9374          (float_extend:V4SF
9375           (vec_select:V4HF
9376             (match_operand:V8HF 2 "register_operand" "w")
9377             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9378          (float_extend:V4SF
9379           (vec_duplicate:V4HF
9380            (vec_select:HF
9381             (match_operand:V8HF 3 "register_operand" "x")
9382             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9383          (match_operand:V4SF 1 "register_operand" "0")))]
9384   "TARGET_F16FML"
9385   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9386   [(set_attr "type" "neon_fp_mul_s")]
9387 )
9388
9389 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
9390   [(set (match_operand:V4SF 0 "register_operand" "=w")
9391         (fma:V4SF
9392           (float_extend:V4SF
9393            (neg:V4HF
9394             (vec_select:V4HF
9395              (match_operand:V8HF 2 "register_operand" "w")
9396              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9397          (float_extend:V4SF
9398           (vec_duplicate:V4HF
9399            (vec_select:HF
9400             (match_operand:V8HF 3 "register_operand" "x")
9401             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9402          (match_operand:V4SF 1 "register_operand" "0")))]
9403   "TARGET_F16FML"
9404   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9405   [(set_attr "type" "neon_fp_mul_s")]
9406 )
9407
9408 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
9409   [(set (match_operand:V4SF 0 "register_operand" "=w")
9410         (fma:V4SF
9411          (float_extend:V4SF
9412           (vec_select:V4HF
9413             (match_operand:V8HF 2 "register_operand" "w")
9414             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9415          (float_extend:V4SF
9416           (vec_duplicate:V4HF
9417            (vec_select:HF
9418             (match_operand:V8HF 3 "register_operand" "x")
9419             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9420          (match_operand:V4SF 1 "register_operand" "0")))]
9421   "TARGET_F16FML"
9422   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9423   [(set_attr "type" "neon_fp_mul_s")]
9424 )
9425
9426 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
9427   [(set (match_operand:V4SF 0 "register_operand" "=w")
9428         (fma:V4SF
9429          (float_extend:V4SF
9430           (neg:V4HF
9431            (vec_select:V4HF
9432             (match_operand:V8HF 2 "register_operand" "w")
9433             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9434          (float_extend:V4SF
9435           (vec_duplicate:V4HF
9436            (vec_select:HF
9437             (match_operand:V8HF 3 "register_operand" "x")
9438             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9439          (match_operand:V4SF 1 "register_operand" "0")))]
9440   "TARGET_F16FML"
9441   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9442   [(set_attr "type" "neon_fp_mul_s")]
9443 )
9444
9445 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
9446   [(set (match_operand:V2SF 0 "register_operand")
9447         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9448                       (match_operand:V4HF 2 "register_operand")
9449                       (match_operand:V8HF 3 "register_operand")
9450                       (match_operand:SI 4 "aarch64_lane_imm3")]
9451          VFMLA16_LOW))]
9452   "TARGET_F16FML"
9453 {
9454     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9455     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9456
9457     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
9458                                                              operands[1],
9459                                                              operands[2],
9460                                                              operands[3],
9461                                                              p1, lane));
9462     DONE;
9463
9464 })
9465
9466 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
9467   [(set (match_operand:V2SF 0 "register_operand")
9468         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9469                       (match_operand:V4HF 2 "register_operand")
9470                       (match_operand:V8HF 3 "register_operand")
9471                       (match_operand:SI 4 "aarch64_lane_imm3")]
9472          VFMLA16_HIGH))]
9473   "TARGET_F16FML"
9474 {
9475     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9476     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9477
9478     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
9479                                                               operands[1],
9480                                                               operands[2],
9481                                                               operands[3],
9482                                                               p1, lane));
9483     DONE;
9484
9485 })
9486
9487 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
9488   [(set (match_operand:V2SF 0 "register_operand" "=w")
9489         (fma:V2SF
9490          (float_extend:V2SF
9491            (vec_select:V2HF
9492             (match_operand:V4HF 2 "register_operand" "w")
9493             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9494          (float_extend:V2SF
9495           (vec_duplicate:V2HF
9496            (vec_select:HF
9497             (match_operand:V8HF 3 "register_operand" "x")
9498             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9499          (match_operand:V2SF 1 "register_operand" "0")))]
9500   "TARGET_F16FML"
9501   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9502   [(set_attr "type" "neon_fp_mul_s")]
9503 )
9504
9505 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
9506   [(set (match_operand:V2SF 0 "register_operand" "=w")
9507         (fma:V2SF
9508          (float_extend:V2SF
9509           (neg:V2HF
9510            (vec_select:V2HF
9511             (match_operand:V4HF 2 "register_operand" "w")
9512             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9513          (float_extend:V2SF
9514           (vec_duplicate:V2HF
9515            (vec_select:HF
9516             (match_operand:V8HF 3 "register_operand" "x")
9517             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9518          (match_operand:V2SF 1 "register_operand" "0")))]
9519   "TARGET_F16FML"
9520   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9521   [(set_attr "type" "neon_fp_mul_s")]
9522 )
9523
9524 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
9525   [(set (match_operand:V2SF 0 "register_operand" "=w")
9526         (fma:V2SF
9527          (float_extend:V2SF
9528            (vec_select:V2HF
9529             (match_operand:V4HF 2 "register_operand" "w")
9530             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9531          (float_extend:V2SF
9532           (vec_duplicate:V2HF
9533            (vec_select:HF
9534             (match_operand:V8HF 3 "register_operand" "x")
9535             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9536          (match_operand:V2SF 1 "register_operand" "0")))]
9537   "TARGET_F16FML"
9538   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9539   [(set_attr "type" "neon_fp_mul_s")]
9540 )
9541
9542 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
9543   [(set (match_operand:V2SF 0 "register_operand" "=w")
9544         (fma:V2SF
9545          (float_extend:V2SF
9546           (neg:V2HF
9547            (vec_select:V2HF
9548             (match_operand:V4HF 2 "register_operand" "w")
9549             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9550          (float_extend:V2SF
9551           (vec_duplicate:V2HF
9552            (vec_select:HF
9553             (match_operand:V8HF 3 "register_operand" "x")
9554             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9555          (match_operand:V2SF 1 "register_operand" "0")))]
9556   "TARGET_F16FML"
9557   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9558   [(set_attr "type" "neon_fp_mul_s")]
9559 )
9560
9561 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
9562   [(set (match_operand:V4SF 0 "register_operand")
9563         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9564                       (match_operand:V8HF 2 "register_operand")
9565                       (match_operand:V4HF 3 "register_operand")
9566                       (match_operand:SI 4 "aarch64_imm2")]
9567          VFMLA16_LOW))]
9568   "TARGET_F16FML"
9569 {
9570     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9571     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9572
9573     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
9574                                                              operands[1],
9575                                                              operands[2],
9576                                                              operands[3],
9577                                                              p1, lane));
9578     DONE;
9579 })
9580
9581 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
9582   [(set (match_operand:V4SF 0 "register_operand")
9583         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9584                       (match_operand:V8HF 2 "register_operand")
9585                       (match_operand:V4HF 3 "register_operand")
9586                       (match_operand:SI 4 "aarch64_imm2")]
9587          VFMLA16_HIGH))]
9588   "TARGET_F16FML"
9589 {
9590     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9591     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9592
9593     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
9594                                                               operands[1],
9595                                                               operands[2],
9596                                                               operands[3],
9597                                                               p1, lane));
9598     DONE;
9599 })
9600
9601 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
9602   [(set (match_operand:V4SF 0 "register_operand" "=w")
9603         (fma:V4SF
9604          (float_extend:V4SF
9605           (vec_select:V4HF
9606            (match_operand:V8HF 2 "register_operand" "w")
9607            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9608          (float_extend:V4SF
9609           (vec_duplicate:V4HF
9610            (vec_select:HF
9611             (match_operand:V4HF 3 "register_operand" "x")
9612             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9613          (match_operand:V4SF 1 "register_operand" "0")))]
9614   "TARGET_F16FML"
9615   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9616   [(set_attr "type" "neon_fp_mul_s")]
9617 )
9618
9619 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
9620   [(set (match_operand:V4SF 0 "register_operand" "=w")
9621         (fma:V4SF
9622          (float_extend:V4SF
9623           (neg:V4HF
9624            (vec_select:V4HF
9625             (match_operand:V8HF 2 "register_operand" "w")
9626             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9627          (float_extend:V4SF
9628           (vec_duplicate:V4HF
9629            (vec_select:HF
9630             (match_operand:V4HF 3 "register_operand" "x")
9631             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9632          (match_operand:V4SF 1 "register_operand" "0")))]
9633   "TARGET_F16FML"
9634   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9635   [(set_attr "type" "neon_fp_mul_s")]
9636 )
9637
9638 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
9639   [(set (match_operand:V4SF 0 "register_operand" "=w")
9640         (fma:V4SF
9641          (float_extend:V4SF
9642           (vec_select:V4HF
9643            (match_operand:V8HF 2 "register_operand" "w")
9644            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9645          (float_extend:V4SF
9646           (vec_duplicate:V4HF
9647            (vec_select:HF
9648             (match_operand:V4HF 3 "register_operand" "x")
9649             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9650          (match_operand:V4SF 1 "register_operand" "0")))]
9651   "TARGET_F16FML"
9652   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9653   [(set_attr "type" "neon_fp_mul_s")]
9654 )
9655
9656 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
9657   [(set (match_operand:V4SF 0 "register_operand" "=w")
9658         (fma:V4SF
9659          (float_extend:V4SF
9660           (neg:V4HF
9661            (vec_select:V4HF
9662             (match_operand:V8HF 2 "register_operand" "w")
9663             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9664          (float_extend:V4SF
9665           (vec_duplicate:V4HF
9666            (vec_select:HF
9667             (match_operand:V4HF 3 "register_operand" "x")
9668             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9669          (match_operand:V4SF 1 "register_operand" "0")))]
9670   "TARGET_F16FML"
9671   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9672   [(set_attr "type" "neon_fp_mul_s")]
9673 )
9674
9675 ;; pmull
9676
9677 (define_insn "aarch64_crypto_pmulldi"
9678   [(set (match_operand:TI 0 "register_operand" "=w")
9679         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
9680                      (match_operand:DI 2 "register_operand" "w")]
9681                     UNSPEC_PMULL))]
9682  "TARGET_AES"
9683  "pmull\\t%0.1q, %1.1d, %2.1d"
9684   [(set_attr "type" "crypto_pmull")]
9685 )
9686
9687 (define_insn "aarch64_crypto_pmullv2di"
9688  [(set (match_operand:TI 0 "register_operand" "=w")
9689        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
9690                    (match_operand:V2DI 2 "register_operand" "w")]
9691                   UNSPEC_PMULL2))]
9692   "TARGET_AES"
9693   "pmull2\\t%0.1q, %1.2d, %2.2d"
9694   [(set_attr "type" "crypto_pmull")]
9695 )
9696
9697 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
9698 (define_insn_and_split "<optab><Vnarrowq><mode>2"
9699   [(set (match_operand:VQN 0 "register_operand" "=w")
9700         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9701   "TARGET_SIMD"
9702   "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
9703   "&& <CODE> == ZERO_EXTEND
9704    && aarch64_split_simd_shift_p (insn)"
9705   [(const_int 0)]
9706   {
9707     /* On many cores, it is cheaper to implement UXTL using a ZIP1 with zero,
9708        provided that the cost of the zero can be amortized over several
9709        operations.  We'll later recombine the zero and zip if there are
9710        not sufficient uses of the zero to make the split worthwhile.  */
9711     rtx res = simplify_gen_subreg (<VNARROWQ2>mode, operands[0],
9712                                    <MODE>mode, 0);
9713     rtx zero = aarch64_gen_shareable_zero (<VNARROWQ>mode);
9714     emit_insn (gen_aarch64_zip1<Vnarrowq2>_low (res, operands[1], zero));
9715     DONE;
9716   }
9717   [(set_attr "type" "neon_shift_imm_long")]
9718 )
9719
9720 (define_expand "aarch64_<su>xtl<mode>"
9721   [(set (match_operand:VQN 0 "register_operand" "=w")
9722         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9723   "TARGET_SIMD"
9724   ""
9725 )
9726
9727 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9728 (define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
9729   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9730         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9731   "TARGET_SIMD"
9732   "xtn\t%0.<Vntype>, %1.<Vtype>"
9733   [(set_attr "type" "neon_move_narrow_q")]
9734 )
9735
9736 ;; Expander for the intrinsics that only takes one mode unlike the two-mode
9737 ;; trunc optab.
9738 (define_expand "aarch64_xtn<mode>"
9739   [(set (match_operand:<VNARROWQ> 0 "register_operand")
9740        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
9741   "TARGET_SIMD"
9742   {}
9743 )
9744
9745 (define_insn "aarch64_bfdot<mode>"
9746   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9747         (plus:VDQSF
9748           (unspec:VDQSF
9749            [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9750             (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9751             UNSPEC_BFDOT)
9752           (match_operand:VDQSF 1 "register_operand" "0")))]
9753   "TARGET_BF16_SIMD"
9754   "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9755   [(set_attr "type" "neon_dot<q>")]
9756 )
9757
9758 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9759   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9760         (plus:VDQSF
9761           (unspec:VDQSF
9762            [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9763             (match_operand:VBF 3 "register_operand" "w")
9764             (match_operand:SI 4 "const_int_operand" "n")]
9765             UNSPEC_BFDOT)
9766           (match_operand:VDQSF 1 "register_operand" "0")))]
9767   "TARGET_BF16_SIMD"
9768 {
9769   int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9770   int lane = INTVAL (operands[4]);
9771   operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9772   return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9773 }
9774   [(set_attr "type" "neon_dot<VDQSF:q>")]
9775 )
9776
9777 ;; vget_low/high_bf16
9778 (define_expand "aarch64_vget_lo_halfv8bf"
9779   [(match_operand:V4BF 0 "register_operand")
9780    (match_operand:V8BF 1 "register_operand")]
9781   "TARGET_BF16_SIMD"
9782 {
9783   rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
9784   emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9785   DONE;
9786 })
9787
9788 (define_expand "aarch64_vget_hi_halfv8bf"
9789   [(match_operand:V4BF 0 "register_operand")
9790    (match_operand:V8BF 1 "register_operand")]
9791   "TARGET_BF16_SIMD"
9792 {
9793   rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
9794   emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9795   DONE;
9796 })
9797
9798 ;; bfmmla
9799 (define_insn "aarch64_bfmmlaqv4sf"
9800   [(set (match_operand:V4SF 0 "register_operand" "=w")
9801         (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9802                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9803                                  (match_operand:V8BF 3 "register_operand" "w")]
9804                     UNSPEC_BFMMLA)))]
9805   "TARGET_BF16_SIMD"
9806   "bfmmla\\t%0.4s, %2.8h, %3.8h"
9807   [(set_attr "type" "neon_fp_mla_s_q")]
9808 )
9809
9810 ;; bfmlal<bt>
9811 (define_insn "aarch64_bfmlal<bt>v4sf"
9812   [(set (match_operand:V4SF 0 "register_operand" "=w")
9813         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9814                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9815                                   (match_operand:V8BF 3 "register_operand" "w")]
9816                      BF_MLA)))]
9817   "TARGET_BF16_SIMD"
9818   "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9819   [(set_attr "type" "neon_fp_mla_s_q")]
9820 )
9821
9822 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9823   [(set (match_operand:V4SF 0 "register_operand" "=w")
9824         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9825                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9826                                   (match_operand:VBF 3 "register_operand" "x")
9827                                   (match_operand:SI 4 "const_int_operand" "n")]
9828                      BF_MLA)))]
9829   "TARGET_BF16_SIMD"
9830 {
9831   operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9832   return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9833 }
9834   [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9835 )
9836
9837 ;; 8-bit integer matrix multiply-accumulate
9838 (define_insn "aarch64_simd_<sur>mmlav16qi"
9839   [(set (match_operand:V4SI 0 "register_operand" "=w")
9840         (plus:V4SI
9841          (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9842                        (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9843          (match_operand:V4SI 1 "register_operand" "0")))]
9844   "TARGET_I8MM"
9845   "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9846   [(set_attr "type" "neon_mla_s_q")]
9847 )
9848
9849 ;; bfcvtn
9850 (define_insn "aarch64_bfcvtn<q><mode>"
9851   [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9852         (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9853                             UNSPEC_BFCVTN))]
9854   "TARGET_BF16_SIMD"
9855   "bfcvtn\\t%0.4h, %1.4s"
9856   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9857 )
9858
9859 (define_insn "aarch64_bfcvtn2v8bf"
9860   [(set (match_operand:V8BF 0 "register_operand" "=w")
9861         (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9862                       (match_operand:V4SF 2 "register_operand" "w")]
9863                       UNSPEC_BFCVTN2))]
9864   "TARGET_BF16_SIMD"
9865   "bfcvtn2\\t%0.8h, %2.4s"
9866   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9867 )
9868
9869 (define_insn "aarch64_bfcvtbf"
9870   [(set (match_operand:BF 0 "register_operand" "=w")
9871         (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9872                     UNSPEC_BFCVT))]
9873   "TARGET_BF16_FP"
9874   "bfcvt\\t%h0, %s1"
9875   [(set_attr "type" "f_cvt")]
9876 )
9877
9878 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
9879 (define_insn "aarch64_vbfcvt<mode>"
9880   [(set (match_operand:V4SF 0 "register_operand" "=w")
9881         (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
9882                       UNSPEC_BFCVTN))]
9883   "TARGET_BF16_SIMD"
9884   "shll\\t%0.4s, %1.4h, #16"
9885   [(set_attr "type" "neon_shift_imm_long")]
9886 )
9887
9888 (define_insn "aarch64_vbfcvt_highv8bf"
9889   [(set (match_operand:V4SF 0 "register_operand" "=w")
9890         (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
9891                       UNSPEC_BFCVTN2))]
9892   "TARGET_BF16_SIMD"
9893   "shll2\\t%0.4s, %1.8h, #16"
9894   [(set_attr "type" "neon_shift_imm_long")]
9895 )
9896
9897 (define_insn "aarch64_bfcvtsf"
9898   [(set (match_operand:SF 0 "register_operand" "=w")
9899         (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
9900                     UNSPEC_BFCVT))]
9901   "TARGET_BF16_FP"
9902   "shl\\t%d0, %d1, #16"
9903   [(set_attr "type" "neon_shift_imm")]
9904 )