Fortran: Unlimited polymorphic intrinsic function arguments [PR84006]
[official-gcc.git] / gcc / config / aarch64 / aarch64-simd.md
blobf8bb973a278c7964f3e3a4f7154a0ab62214b7cf
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3.  If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The following define_subst rules are used to produce patterns representing
22 ;; the implicit zeroing effect of 64-bit Advanced SIMD operations, in effect
23 ;; a vec_concat with zeroes.  The order of the vec_concat operands differs
24 ;; for big-endian so we have a separate define_subst rule for each endianness.
25 (define_subst "add_vec_concat_subst_le"
26   [(set (match_operand:VDZ 0)
27         (match_operand:VDZ 1))]
28   "!BYTES_BIG_ENDIAN"
29   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
30         (vec_concat:<VDBL>
31          (match_dup 1)
32          (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")))])
34 (define_subst "add_vec_concat_subst_be"
35   [(set (match_operand:VDZ 0)
36         (match_operand:VDZ 1))]
37   "BYTES_BIG_ENDIAN"
38   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
39         (vec_concat:<VDBL>
40          (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")
41          (match_dup 1)))])
43 ;; The subst_attr definitions used to annotate patterns further in the file.
44 ;; Patterns that need to have the above substitutions added to them should
45 ;; have <vczle><vczbe> added to their name.
46 (define_subst_attr "vczle" "add_vec_concat_subst_le" "" "_vec_concatz_le")
47 (define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
49 (define_expand "mov<mode>"
50   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
51         (match_operand:VALL_F16 1 "general_operand"))]
52   "TARGET_FLOAT"
53   "
54   /* Force the operand into a register if it is not an
55      immediate whose use can be replaced with xzr.
56      If the mode is 16 bytes wide, then we will be doing
57      a stp in DI mode, so we check the validity of that.
58      If the mode is 8 bytes wide, then we will do doing a
59      normal str, so the check need not apply.  */
60   if (GET_CODE (operands[0]) == MEM
61       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
62            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
63                 && aarch64_mem_pair_operand (operands[0], DImode))
64                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
65       operands[1] = force_reg (<MODE>mode, operands[1]);
67   /* If a constant is too complex to force to memory (e.g. because it
68      contains CONST_POLY_INTs), build it up from individual elements instead.
69      We should only need to do this before RA; aarch64_legitimate_constant_p
70      should ensure that we don't try to rematerialize the constant later.  */
71   if (GET_CODE (operands[1]) == CONST_VECTOR
72       && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
73     {
74       aarch64_expand_vector_init (operands[0], operands[1]);
75       DONE;
76     }
77   "
80 (define_expand "movmisalign<mode>"
81   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
82         (match_operand:VALL_F16 1 "general_operand"))]
83   "TARGET_FLOAT && !STRICT_ALIGNMENT"
85   /* This pattern is not permitted to fail during expansion: if both arguments
86      are non-registers (e.g. memory := constant, which can be created by the
87      auto-vectorizer), force operand 1 into a register.  */
88   if (!register_operand (operands[0], <MODE>mode)
89       && !register_operand (operands[1], <MODE>mode))
90     operands[1] = force_reg (<MODE>mode, operands[1]);
93 (define_insn "aarch64_simd_dup<mode>"
94   [(set (match_operand:VDQ_I 0 "register_operand")
95         (vec_duplicate:VDQ_I
96           (match_operand:<VEL> 1 "register_operand")))]
97   "TARGET_SIMD"
98   {@ [ cons: =0 , 1  ; attrs: type      ]
99      [ w        , w  ; neon_dup<q>      ] dup\t%0.<Vtype>, %1.<Vetype>[0]
100      [ w        , ?r ; neon_from_gp<q>  ] dup\t%0.<Vtype>, %<vwcore>1
101   }
104 (define_insn "aarch64_simd_dup<mode>"
105   [(set (match_operand:VDQF_F16 0 "register_operand")
106         (vec_duplicate:VDQF_F16
107           (match_operand:<VEL> 1 "register_operand")))]
108   "TARGET_SIMD"
109   {@ [ cons: =0 , 1 ; attrs: type      ]
110      [ w        , w ; neon_dup<q>      ] dup\t%0.<Vtype>, %1.<Vetype>[0]
111      [ w        , r ; neon_from_gp<q>  ] dup\t%0.<Vtype>, %<vwcore>1
112   }
115 (define_insn "aarch64_dup_lane<mode>"
116   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
117         (vec_duplicate:VALL_F16
118           (vec_select:<VEL>
119             (match_operand:VALL_F16 1 "register_operand" "w")
120             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
121           )))]
122   "TARGET_SIMD"
123   {
124     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
125     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
126   }
127   [(set_attr "type" "neon_dup<q>")]
130 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
131   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
132         (vec_duplicate:VALL_F16_NO_V2Q
133           (vec_select:<VEL>
134             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
135             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
136           )))]
137   "TARGET_SIMD"
138   {
139     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
140     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
141   }
142   [(set_attr "type" "neon_dup<q>")]
145 (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>"
146   [(set (match_operand:VDMOV 0 "nonimmediate_operand")
147         (match_operand:VDMOV 1 "general_operand"))]
148   "TARGET_FLOAT
149    && (register_operand (operands[0], <MODE>mode)
150        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
151   {@ [cons: =0, 1; attrs: type, arch, length]
152      [w , m ; neon_load1_1reg<q> , *        , *] ldr\t%d0, %1
153      [r , m ; load_8             , *        , *] ldr\t%x0, %1
154      [m , Dz; store_8            , *        , *] str\txzr, %0
155      [m , w ; neon_store1_1reg<q>, *        , *] str\t%d1, %0
156      [m , r ; store_8            , *        , *] str\t%x1, %0
157      [w , w ; neon_logic<q>      , simd     , *] mov\t%0.<Vbtype>, %1.<Vbtype>
158      [w , w ; neon_logic<q>      , *        , *] fmov\t%d0, %d1
159      [?r, w ; neon_to_gp<q>      , base_simd, *] umov\t%0, %1.d[0]
160      [?r, w ; neon_to_gp<q>      , *        , *] fmov\t%x0, %d1
161      [?w, r ; f_mcr              , *        , *] fmov\t%d0, %1
162      [?r, r ; mov_reg            , *        , *] mov\t%0, %1
163      [w , Dn; neon_move<q>       , simd     , *] << aarch64_output_simd_mov_immediate (operands[1], 64);
164      [w , Dz; f_mcr              , *        , *] fmov\t%d0, xzr
165      [w , Dx; neon_move          , simd     , 8] #
166   }
167   "CONST_INT_P (operands[1])
168    && aarch64_simd_special_constant_p (operands[1], <MODE>mode)
169    && FP_REGNUM_P (REGNO (operands[0]))"
170   [(const_int 0)]
171   {
172     aarch64_maybe_generate_simd_constant (operands[0], operands[1], <MODE>mode);
173     DONE;
174   }
177 (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
178   [(set (match_operand:VQMOV 0 "nonimmediate_operand")
179         (match_operand:VQMOV 1 "general_operand"))]
180   "TARGET_FLOAT
181    && (register_operand (operands[0], <MODE>mode)
182        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
183   {@ [cons: =0, 1; attrs: type, arch, length]
184      [w  , m ; neon_load1_1reg<q> , *   , 4] ldr\t%q0, %1
185      [Umn, Dz; store_16           , *   , 4] stp\txzr, xzr, %0
186      [m  , w ; neon_store1_1reg<q>, *   , 4] str\t%q1, %0
187      [w  , w ; neon_logic<q>      , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
188      [w  , w ; *                  , sve , 4] mov\t%Z0.d, %Z1.d
189      [?r , w ; multiple           , *   , 8] #
190      [?w , r ; multiple           , *   , 8] #
191      [?r , r ; multiple           , *   , 8] #
192      [w  , Dn; neon_move<q>       , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
193      [w  , Dz; fmov               , *   , 4] fmov\t%d0, xzr
194      [w  , Dx; neon_move          , simd, 8] #
195   }
196   "&& reload_completed
197    && ((REG_P (operands[0])
198         && REG_P (operands[1])
199         && !(FP_REGNUM_P (REGNO (operands[0]))
200              && FP_REGNUM_P (REGNO (operands[1]))))
201        || (aarch64_simd_special_constant_p (operands[1], <MODE>mode)
202            && FP_REGNUM_P (REGNO (operands[0]))))"
203   [(const_int 0)]
204   {
205     if (GP_REGNUM_P (REGNO (operands[0]))
206         && GP_REGNUM_P (REGNO (operands[1])))
207       aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
208     else
209       {
210         if (FP_REGNUM_P (REGNO (operands[0]))
211             && <MODE>mode == V2DImode
212             && aarch64_maybe_generate_simd_constant (operands[0], operands[1],
213                                                      <MODE>mode))
214           ;
215         else
216           aarch64_split_simd_move (operands[0], operands[1]);
217       }
218     DONE;
219   }
222 ;; When storing lane zero we can use the normal STR and its more permissive
223 ;; addressing modes.
225 (define_insn "aarch64_store_lane0<mode>"
226   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
227         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
228                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
229   "TARGET_FLOAT
230    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
231   "str\\t%<Vetype>1, %0"
232   [(set_attr "type" "neon_store1_1reg<q>")]
235 (define_insn "aarch64_simd_stp<mode>"
236   [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand")
237         (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand")))]
238   "TARGET_SIMD"
239   {@ [ cons: =0 , 1 ; attrs: type            ]
240      [ Umn      , w ; neon_stp               ] stp\t%<Vetype>1, %<Vetype>1, %y0
241      [ Umn      , r ; store_<ldpstp_vel_sz>  ] stp\t%<vwcore>1, %<vwcore>1, %y0
242   }
245 (define_expand "@aarch64_split_simd_mov<mode>"
246   [(set (match_operand:VQMOV 0)
247         (match_operand:VQMOV 1))]
248   "TARGET_FLOAT"
249   {
250     rtx dst = operands[0];
251     rtx src = operands[1];
253     if (GP_REGNUM_P (REGNO (src)))
254       {
255         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
256         rtx src_high_part = gen_highpart (<VHALF>mode, src);
257         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
259         emit_move_insn (dst_low_part, src_low_part);
260         emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
261                                                src_high_part));
262       }
263     else
264       {
265         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
266         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
267         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
268         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
269         emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
270         emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
271       }
272     DONE;
273   }
276 (define_expand "aarch64_get_half<mode>"
277   [(set (match_operand:<VHALF> 0 "register_operand")
278         (vec_select:<VHALF>
279           (match_operand:VQMOV 1 "register_operand")
280           (match_operand 2 "ascending_int_parallel")))]
281   "TARGET_FLOAT"
282   {
283     if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
284       {
285         emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
286         DONE;
287       }
288   }
291 (define_expand "aarch64_get_low<mode>"
292   [(match_operand:<VHALF> 0 "register_operand")
293    (match_operand:VQMOV 1 "register_operand")]
294   "TARGET_FLOAT"
295   {
296     rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
297     emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
298     DONE;
299   }
302 (define_expand "aarch64_get_high<mode>"
303   [(match_operand:<VHALF> 0 "register_operand")
304    (match_operand:VQMOV 1 "register_operand")]
305   "TARGET_FLOAT"
306   {
307     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
308     emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
309     DONE;
310   }
313 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
314   [(set (match_operand:<VHALF> 0 "register_operand")
315         (vec_select:<VHALF>
316           (match_operand:VQMOV_NO2E 1 "register_operand")
317           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half")))]
318   "TARGET_FLOAT"
319   {@ [ cons: =0 , 1 ; attrs: type   , arch      ]
320      [ w        , w ; mov_reg       , simd      ] #
321      [ ?r       , w ; neon_to_gp<q> , base_simd ] umov\t%0, %1.d[0]
322      [ ?r       , w ; f_mrc         , *         ] fmov\t%0, %d1
323   }
324   "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
325   [(set (match_dup 0) (match_dup 1))]
326   {
327     operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
328   }
329   [(set_attr "length" "4")]
332 (define_insn "aarch64_simd_mov_from_<mode>high"
333   [(set (match_operand:<VHALF> 0 "register_operand")
334         (vec_select:<VHALF>
335           (match_operand:VQMOV_NO2E 1 "register_operand")
336           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half")))]
337   "TARGET_FLOAT"
338   {@ [ cons: =0 , 1 ; attrs: type   , arch  ]
339      [ w        , w ; neon_dup<q>   , simd  ] dup\t%d0, %1.d[1]
340      [ w        , w ; *             , sve   ] ext\t%Z0.b, %Z0.b, %Z0.b, #8
341      [ ?r       , w ; neon_to_gp<q> , simd  ] umov\t%0, %1.d[1]
342      [ ?r       , w ; f_mrc         , *     ] fmov\t%0, %1.d[1]
343   }
344   [(set_attr "length" "4")]
347 (define_insn "orn<mode>3<vczle><vczbe>"
348  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
349        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
350                 (match_operand:VDQ_I 2 "register_operand" "w")))]
351  "TARGET_SIMD"
352  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
353   [(set_attr "type" "neon_logic<q>")]
356 (define_insn "bic<mode>3<vczle><vczbe>"
357  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
358        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
359                 (match_operand:VDQ_I 2 "register_operand" "w")))]
360  "TARGET_SIMD"
361  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
362   [(set_attr "type" "neon_logic<q>")]
365 (define_insn "add<mode>3<vczle><vczbe>"
366   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
367         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
368                   (match_operand:VDQ_I 2 "register_operand" "w")))]
369   "TARGET_SIMD"
370   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
371   [(set_attr "type" "neon_add<q>")]
374 (define_insn "sub<mode>3<vczle><vczbe>"
375   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
376         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
377                    (match_operand:VDQ_I 2 "register_operand" "w")))]
378   "TARGET_SIMD"
379   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
380   [(set_attr "type" "neon_sub<q>")]
383 (define_insn "mul<mode>3<vczle><vczbe>"
384   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
385         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
386                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
387   "TARGET_SIMD"
388   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
389   [(set_attr "type" "neon_mul_<Vetype><q>")]
392 (define_insn "bswap<mode>2"
393   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
394         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
395   "TARGET_SIMD"
396   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
397   [(set_attr "type" "neon_rev<q>")]
400 (define_insn "aarch64_rbit<mode><vczle><vczbe>"
401   [(set (match_operand:VB 0 "register_operand" "=w")
402         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
403                    UNSPEC_RBIT))]
404   "TARGET_SIMD"
405   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
406   [(set_attr "type" "neon_rbit")]
409 (define_expand "ctz<mode>2"
410   [(set (match_operand:VS 0 "register_operand")
411         (ctz:VS (match_operand:VS 1 "register_operand")))]
412   "TARGET_SIMD"
413   {
414      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
415      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
416                                              <MODE>mode, 0);
417      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
418      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
419      DONE;
420   }
423 (define_expand "@xorsign<mode>3"
424   [(match_operand:VHSDF 0 "register_operand")
425    (match_operand:VHSDF 1 "register_operand")
426    (match_operand:VHSDF 2 "register_operand")]
427   "TARGET_SIMD"
430   machine_mode imode = <V_INT_EQUIV>mode;
431   rtx v_bitmask = gen_reg_rtx (imode);
432   rtx op1x = gen_reg_rtx (imode);
433   rtx op2x = gen_reg_rtx (imode);
435   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
436   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
438   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
440   emit_move_insn (v_bitmask,
441                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
442                                                      HOST_WIDE_INT_M1U << bits));
444   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
445   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
446   emit_move_insn (operands[0],
447                   lowpart_subreg (<MODE>mode, op1x, imode));
448   DONE;
452 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
453 ;; fact that their usage need to guarantee that the source vectors are
454 ;; contiguous.  It would be wrong to describe the operation without being able
455 ;; to describe the permute that is also required, but even if that is done
456 ;; the permute would have been created as a LOAD_LANES which means the values
457 ;; in the registers are in the wrong order.
458 (define_insn "aarch64_fcadd<rot><mode><vczle><vczbe>"
459   [(set (match_operand:VHSDF 0 "register_operand" "=w")
460         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
461                        (match_operand:VHSDF 2 "register_operand" "w")]
462                        FCADD))]
463   "TARGET_COMPLEX"
464   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
465   [(set_attr "type" "neon_fcadd")]
468 (define_expand "cadd<rot><mode>3"
469   [(set (match_operand:VHSDF 0 "register_operand")
470         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
471                        (match_operand:VHSDF 2 "register_operand")]
472                        FCADD))]
473   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
476 (define_insn "aarch64_fcmla<rot><mode><vczle><vczbe>"
477   [(set (match_operand:VHSDF 0 "register_operand" "=w")
478         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
479                                    (match_operand:VHSDF 3 "register_operand" "w")]
480                                    FCMLA)
481                     (match_operand:VHSDF 1 "register_operand" "0")))]
482   "TARGET_COMPLEX"
483   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
484   [(set_attr "type" "neon_fcmla")]
488 (define_insn "aarch64_fcmla_lane<rot><mode><vczle><vczbe>"
489   [(set (match_operand:VHSDF 0 "register_operand" "=w")
490         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
491                                    (match_operand:VHSDF 3 "register_operand" "w")
492                                    (match_operand:SI 4 "const_int_operand" "n")]
493                                    FCMLA)
494                     (match_operand:VHSDF 1 "register_operand" "0")))]
495   "TARGET_COMPLEX"
497   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
498   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
500   [(set_attr "type" "neon_fcmla")]
503 (define_insn "aarch64_fcmla_laneq<rot>v4hf<vczle><vczbe>"
504   [(set (match_operand:V4HF 0 "register_operand" "=w")
505         (plus:V4HF (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
506                                  (match_operand:V8HF 3 "register_operand" "w")
507                                  (match_operand:SI 4 "const_int_operand" "n")]
508                                  FCMLA)
509                    (match_operand:V4HF 1 "register_operand" "0")))]
510   "TARGET_COMPLEX"
512   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
513   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
515   [(set_attr "type" "neon_fcmla")]
518 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
519   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
520         (plus:VQ_HSF (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
521                                      (match_operand:<VHALF> 3 "register_operand" "w")
522                                      (match_operand:SI 4 "const_int_operand" "n")]
523                                      FCMLA)
524                      (match_operand:VQ_HSF 1 "register_operand" "0")))]
525   "TARGET_COMPLEX"
527   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
528   operands[4]
529     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
530   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
532   [(set_attr "type" "neon_fcmla")]
535 ;; The complex mla/mls operations always need to expand to two instructions.
536 ;; The first operation does half the computation and the second does the
537 ;; remainder.  Because of this, expand early.
538 (define_expand "cml<fcmac1><conj_op><mode>4"
539   [(set (match_operand:VHSDF 0 "register_operand")
540         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
541                                    (match_operand:VHSDF 2 "register_operand")]
542                                    FCMLA_OP)
543                     (match_operand:VHSDF 3 "register_operand")))]
544   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
546   rtx tmp = gen_reg_rtx (<MODE>mode);
547   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3],
548                                                  operands[2], operands[1]));
549   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
550                                                  operands[2], operands[1]));
551   DONE;
554 ;; The complex mul operations always need to expand to two instructions.
555 ;; The first operation does half the computation and the second does the
556 ;; remainder.  Because of this, expand early.
557 (define_expand "cmul<conj_op><mode>3"
558   [(set (match_operand:VHSDF 0 "register_operand")
559         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
560                        (match_operand:VHSDF 2 "register_operand")]
561                        FCMUL_OP))]
562   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
564   rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
565   rtx res1 = gen_reg_rtx (<MODE>mode);
566   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
567                                                  operands[2], operands[1]));
568   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
569                                                  operands[2], operands[1]));
570   DONE;
573 ;; These expands map to the Dot Product optab the vectorizer checks for
574 ;; and to the intrinsics patttern.
575 ;; The auto-vectorizer expects a dot product builtin that also does an
576 ;; accumulation into the provided register.
577 ;; Given the following pattern
579 ;; for (i=0; i<len; i++) {
580 ;;     c = a[i] * b[i];
581 ;;     r += c;
582 ;; }
583 ;; return result;
585 ;; This can be auto-vectorized to
586 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
588 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
589 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
590 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
591 ;; ...
593 ;; and so the vectorizer provides r, in which the result has to be accumulated.
594 (define_insn "<sur>dot_prod<vsi2qi><vczle><vczbe>"
595   [(set (match_operand:VS 0 "register_operand" "=w")
596         (plus:VS
597           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
598                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
599                       DOTPROD)
600           (match_operand:VS 3 "register_operand" "0")))]
601   "TARGET_DOTPROD"
602   "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
603   [(set_attr "type" "neon_dot<q>")]
606 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
607 ;; (vector) Dot Product operation and the vectorized optab.
608 (define_insn "usdot_prod<vsi2qi><vczle><vczbe>"
609   [(set (match_operand:VS 0 "register_operand" "=w")
610         (plus:VS
611           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
612                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
613           UNSPEC_USDOT)
614           (match_operand:VS 3 "register_operand" "0")))]
615   "TARGET_I8MM"
616   "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
617   [(set_attr "type" "neon_dot<q>")]
620 ;; These instructions map to the __builtins for the Dot Product
621 ;; indexed operations.
622 (define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
623   [(set (match_operand:VS 0 "register_operand" "=w")
624         (plus:VS
625           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
626                       (match_operand:V8QI 3 "register_operand" "<h_con>")
627                       (match_operand:SI 4 "immediate_operand" "i")]
628                       DOTPROD)
629           (match_operand:VS 1 "register_operand" "0")))]
630   "TARGET_DOTPROD"
631   {
632     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
633     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
634   }
635   [(set_attr "type" "neon_dot<q>")]
638 (define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
639   [(set (match_operand:VS 0 "register_operand" "=w")
640         (plus:VS
641           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
642                       (match_operand:V16QI 3 "register_operand" "<h_con>")
643                       (match_operand:SI 4 "immediate_operand" "i")]
644                       DOTPROD)
645           (match_operand:VS 1 "register_operand" "0")))]
646   "TARGET_DOTPROD"
647   {
648     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
649     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
650   }
651   [(set_attr "type" "neon_dot<q>")]
654 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
655 ;; (by element) Dot Product operations.
656 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
657   [(set (match_operand:VS 0 "register_operand" "=w")
658         (plus:VS
659           (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
660                       (match_operand:VB 3 "register_operand" "w")
661                       (match_operand:SI 4 "immediate_operand" "i")]
662           DOTPROD_I8MM)
663           (match_operand:VS 1 "register_operand" "0")))]
664   "TARGET_I8MM"
665   {
666     int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
667     int lane = INTVAL (operands[4]);
668     operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
669     return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
670   }
671   [(set_attr "type" "neon_dot<VS:q>")]
674 (define_expand "copysign<mode>3"
675   [(match_operand:VHSDF 0 "register_operand")
676    (match_operand:VHSDF 1 "register_operand")
677    (match_operand:VHSDF 2 "nonmemory_operand")]
678   "TARGET_SIMD"
680   machine_mode int_mode = <V_INT_EQUIV>mode;
681   rtx v_bitmask = gen_reg_rtx (int_mode);
682   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
684   emit_move_insn (v_bitmask,
685                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
686                                                      HOST_WIDE_INT_M1U << bits));
688   /* copysign (x, -1) should instead be expanded as orr with the sign
689      bit.  */
690   if (!REG_P (operands[2]))
691     {
692       rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
693       if (GET_CODE (op2_elt) == CONST_DOUBLE
694           && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
695         {
696           emit_insn (gen_ior<v_int_equiv>3 (
697             lowpart_subreg (int_mode, operands[0], <MODE>mode),
698             lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
699           DONE;
700         }
701     }
703   operands[2] = force_reg (<MODE>mode, operands[2]);
704   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
705                                          operands[2], operands[1]));
706   DONE;
710 (define_insn "mul_lane<mode>3"
711  [(set (match_operand:VMULD 0 "register_operand" "=w")
712        (mult:VMULD
713          (vec_duplicate:VMULD
714            (vec_select:<VEL>
715              (match_operand:<VCOND> 2 "register_operand" "<h_con>")
716              (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
717          (match_operand:VMULD 1 "register_operand" "w")))]
718   "TARGET_SIMD"
719   {
720     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
721     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
722   }
723   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
726 (define_insn "mul_laneq<mode>3"
727   [(set (match_operand:VMUL 0 "register_operand" "=w")
728      (mult:VMUL
729        (vec_duplicate:VMUL
730           (vec_select:<VEL>
731             (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
732             (parallel [(match_operand:SI 3 "immediate_operand")])))
733       (match_operand:VMUL 1 "register_operand" "w")))]
734   "TARGET_SIMD"
735   {
736     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
737     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
738   }
739   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
742 (define_insn "mul_n<mode>3"
743  [(set (match_operand:VMUL 0 "register_operand" "=w")
744        (mult:VMUL
745          (vec_duplicate:VMUL
746            (match_operand:<VEL> 2 "register_operand" "<h_con>"))
747          (match_operand:VMUL 1 "register_operand" "w")))]
748   "TARGET_SIMD"
749   "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
750   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
753 (define_insn "@aarch64_rsqrte<mode>"
754   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
755         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
756                      UNSPEC_RSQRTE))]
757   "TARGET_SIMD"
758   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
759   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
761 (define_insn "@aarch64_rsqrts<mode>"
762   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
763         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
764                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
765          UNSPEC_RSQRTS))]
766   "TARGET_SIMD"
767   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
768   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
770 (define_expand "rsqrt<mode>2"
771   [(set (match_operand:VALLF 0 "register_operand")
772         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
773                      UNSPEC_RSQRT))]
774   "TARGET_SIMD"
776   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
777   DONE;
780 (define_insn "aarch64_ursqrte<mode>"
781 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
782       (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
783                    UNSPEC_RSQRTE))]
784 "TARGET_SIMD"
785 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
786 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
788 (define_insn "*aarch64_mul3_elt_to_64v2df"
789   [(set (match_operand:DF 0 "register_operand" "=w")
790      (mult:DF
791        (vec_select:DF
792          (match_operand:V2DF 1 "register_operand" "w")
793          (parallel [(match_operand:SI 2 "immediate_operand")]))
794        (match_operand:DF 3 "register_operand" "w")))]
795   "TARGET_SIMD"
796   {
797     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
798     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
799   }
800   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
803 (define_insn "neg<mode>2<vczle><vczbe>"
804   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
805         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
806   "TARGET_SIMD"
807   "neg\t%0.<Vtype>, %1.<Vtype>"
808   [(set_attr "type" "neon_neg<q>")]
811 (define_insn "abs<mode>2<vczle><vczbe>"
812   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
813         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
814   "TARGET_SIMD"
815   "abs\t%0.<Vtype>, %1.<Vtype>"
816   [(set_attr "type" "neon_abs<q>")]
819 ;; The intrinsic version of integer ABS must not be allowed to
820 ;; combine with any operation with an integrated ABS step, such
821 ;; as SABD.
822 (define_insn "aarch64_abs<mode><vczle><vczbe>"
823   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
824           (unspec:VSDQ_I_DI
825             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
826            UNSPEC_ABS))]
827   "TARGET_SIMD"
828   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
829   [(set_attr "type" "neon_abs<q>")]
832 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
833 ;; This isn't accurate as ABS treats always its input as a signed value.
834 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
835 ;; Whereas SABD would return 192 (-64 signed) on the above example.
836 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
837 (define_insn "aarch64_<su>abd<mode><vczle><vczbe>"
838   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
839         (minus:VDQ_BHSI
840           (USMAX:VDQ_BHSI
841             (match_operand:VDQ_BHSI 1 "register_operand" "w")
842             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
843           (<max_opp>:VDQ_BHSI
844             (match_dup 1)
845             (match_dup 2))))]
846   "TARGET_SIMD"
847   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
848   [(set_attr "type" "neon_abd<q>")]
851 (define_expand "<su>abd<mode>3"
852   [(match_operand:VDQ_BHSI 0 "register_operand")
853    (USMAX:VDQ_BHSI
854      (match_operand:VDQ_BHSI 1 "register_operand")
855      (match_operand:VDQ_BHSI 2 "register_operand"))]
856   "TARGET_SIMD"
857   {
858     emit_insn (gen_aarch64_<su>abd<mode> (operands[0], operands[1], operands[2]));
859     DONE;
860   }
863 (define_insn "aarch64_<su>abdl<mode>"
864   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
865         (zero_extend:<VWIDE>
866           (minus:VD_BHSI
867             (USMAX:VD_BHSI
868               (match_operand:VD_BHSI 1 "register_operand" "w")
869               (match_operand:VD_BHSI 2 "register_operand" "w"))
870             (<max_opp>:VD_BHSI
871               (match_dup 1)
872               (match_dup 2)))))]
873   "TARGET_SIMD"
874   "<su>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
875   [(set_attr "type" "neon_abd<q>")]
878 (define_insn "aarch64_<su>abdl2<mode>_insn"
879   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
880         (zero_extend:<VDBLW>
881           (minus:<VHALF>
882             (USMAX:<VHALF>
883               (vec_select:<VHALF>
884                 (match_operand:VQW 1 "register_operand" "w")
885                 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))
886               (vec_select:<VHALF>
887                 (match_operand:VQW 2 "register_operand" "w")
888                 (match_dup 3)))
889             (<max_opp>:<VHALF>
890               (vec_select:<VHALF>
891                 (match_dup 1)
892                 (match_dup 3))
893               (vec_select:<VHALF>
894                 (match_dup 2)
895                 (match_dup 3))))))]
897   "TARGET_SIMD"
898   "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
899   [(set_attr "type" "neon_abd<q>")]
902 (define_expand "aarch64_<su>abdl2<mode>"
903   [(match_operand:<VDBLW> 0 "register_operand")
904    (USMAX:VQW
905      (match_operand:VQW 1 "register_operand")
906      (match_operand:VQW 2 "register_operand"))]
907   "TARGET_SIMD"
908   {
909     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
910     emit_insn (gen_aarch64_<su>abdl2<mode>_insn (operands[0], operands[1],
911                                                  operands[2], hi));
912     DONE;
913   }
916 (define_insn "aarch64_<su>abdl<mode>_hi_internal"
917   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
918         (abs:<VWIDE>
919           (minus:<VWIDE>
920             (ANY_EXTEND:<VWIDE>
921               (vec_select:<VHALF>
922                 (match_operand:VQW 1 "register_operand" "w")
923                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
924             (ANY_EXTEND:<VWIDE>
925               (vec_select:<VHALF>
926                 (match_operand:VQW 2 "register_operand" "w")
927                 (match_dup 3))))))]
928   "TARGET_SIMD"
929   "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
930   [(set_attr "type" "neon_abd_long")]
933 (define_insn "aarch64_<su>abdl<mode>_lo_internal"
934   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
935         (abs:<VWIDE>
936           (minus:<VWIDE>
937             (ANY_EXTEND:<VWIDE>
938               (vec_select:<VHALF>
939                 (match_operand:VQW 1 "register_operand" "w")
940                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
941             (ANY_EXTEND:<VWIDE>
942               (vec_select:<VHALF>
943                 (match_operand:VQW 2 "register_operand" "w")
944                 (match_dup 3))))))]
945   "TARGET_SIMD"
946   "<su>abdl\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
947   [(set_attr "type" "neon_abd_long")]
950 (define_expand "vec_widen_<su>abd_hi_<mode>"
951   [(match_operand:<VWIDE> 0 "register_operand")
952    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
953    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
954   "TARGET_SIMD"
955   {
956     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
957     emit_insn (gen_aarch64_<su>abdl<mode>_hi_internal (operands[0], operands[1],
958                                                        operands[2], p));
959     DONE;
960   }
963 (define_expand "vec_widen_<su>abd_lo_<mode>"
964   [(match_operand:<VWIDE> 0 "register_operand")
965    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
966    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
967   "TARGET_SIMD"
968   {
969     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
970     emit_insn (gen_aarch64_<su>abdl<mode>_lo_internal (operands[0], operands[1],
971                                                        operands[2], p));
972     DONE;
973   }
976 (define_insn "aarch64_<su>abal<mode>"
977   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
978         (plus:<VWIDE>
979           (zero_extend:<VWIDE>
980             (minus:VD_BHSI
981               (USMAX:VD_BHSI
982                 (match_operand:VD_BHSI 2 "register_operand" "w")
983                 (match_operand:VD_BHSI 3 "register_operand" "w"))
984               (<max_opp>:VD_BHSI
985                 (match_dup 2)
986                 (match_dup 3))))
987           (match_operand:<VWIDE> 1 "register_operand" "0")))]
988   "TARGET_SIMD"
989   "<su>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
990   [(set_attr "type" "neon_arith_acc<q>")]
993 (define_insn "aarch64_<su>abal2<mode>_insn"
994   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
995         (plus:<VDBLW>
996           (zero_extend:<VDBLW>
997             (minus:<VHALF>
998               (USMAX:<VHALF>
999                 (vec_select:<VHALF>
1000                   (match_operand:VQW 2 "register_operand" "w")
1001                   (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
1002                 (vec_select:<VHALF>
1003                   (match_operand:VQW 3 "register_operand" "w")
1004                   (match_dup 4)))
1005               (<max_opp>:<VHALF>
1006                 (vec_select:<VHALF>
1007                   (match_dup 2)
1008                   (match_dup 4))
1009                 (vec_select:<VHALF>
1010                   (match_dup 3)
1011                   (match_dup 4)))))
1012           (match_operand:<VDBLW> 1 "register_operand" "0")))]
1013   "TARGET_SIMD"
1014   "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1015   [(set_attr "type" "neon_arith_acc<q>")]
1018 (define_expand "aarch64_<su>abal2<mode>"
1019   [(match_operand:<VDBLW> 0 "register_operand")
1020    (match_operand:<VDBLW> 1 "register_operand")
1021    (USMAX:VQW
1022      (match_operand:VQW 2 "register_operand")
1023      (match_operand:VQW 3 "register_operand"))]
1024   "TARGET_SIMD"
1025   {
1026     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1027     emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
1028                                                  operands[2], operands[3], hi));
1029     DONE;
1030   }
1033 (define_expand "aarch64_<su>adalp<mode>"
1034   [(set (match_operand:<VDBLW> 0 "register_operand")
1035         (plus:<VDBLW>
1036           (plus:<VDBLW>
1037             (vec_select:<VDBLW>
1038               (ANY_EXTEND:<V2XWIDE>
1039                 (match_operand:VDQV_L 2 "register_operand"))
1040               (match_dup 3))
1041             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1042               (match_dup 4)))
1043           (match_operand:<VDBLW> 1 "register_operand")))]
1044  "TARGET_SIMD"
1046    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
1047    operands[3] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
1048    operands[4] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
1052 (define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn"
1053   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1054         (plus:<VDBLW>
1055           (plus:<VDBLW>
1056             (vec_select:<VDBLW>
1057               (ANY_EXTEND:<V2XWIDE>
1058                 (match_operand:VDQV_L 2 "register_operand" "w"))
1059               (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half" ""))
1060             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1061               (match_operand:<V2XWIDE> 4 "vect_par_cnst_even_or_odd_half" "")))
1062         (match_operand:<VDBLW> 1 "register_operand" "0")))]
1063  "TARGET_SIMD
1064   && !rtx_equal_p (operands[3], operands[4])"
1065  "<su>adalp\t%0.<Vwhalf>, %2.<Vtype>"
1066   [(set_attr "type" "neon_reduc_add<q>")]
1069 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
1070 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
1071 ;; reduction of the difference into a V4SI vector and accumulate that into
1072 ;; operand 3 before copying that into the result operand 0.
1073 ;; Perform that with a sequence of:
1074 ;; UABDL2       tmp.8h, op1.16b, op2.16b
1075 ;; UABAL        tmp.8h, op1.8b, op2.8b
1076 ;; UADALP       op3.4s, tmp.8h
1077 ;; MOV          op0, op3 // should be eliminated in later passes.
1079 ;; For TARGET_DOTPROD we do:
1080 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
1081 ;; UABD tmp2.16b, op1.16b, op2.16b
1082 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
1083 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
1085 ;; The signed version just uses the signed variants of the above instructions
1086 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
1087 ;; unsigned.
1089 (define_expand "<su>sadv16qi"
1090   [(use (match_operand:V4SI 0 "register_operand"))
1091    (USMAX:V16QI (match_operand:V16QI 1 "register_operand")
1092                 (match_operand:V16QI 2 "register_operand"))
1093    (use (match_operand:V4SI 3 "register_operand"))]
1094   "TARGET_SIMD"
1095   {
1096     if (TARGET_DOTPROD)
1097       {
1098         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
1099         rtx abd = gen_reg_rtx (V16QImode);
1100         emit_insn (gen_aarch64_<su>abdv16qi (abd, operands[1], operands[2]));
1101         emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
1102         DONE;
1103       }
1104     rtx reduc = gen_reg_rtx (V8HImode);
1105     emit_insn (gen_aarch64_<su>abdl2v16qi (reduc, operands[1],
1106                                             operands[2]));
1107     emit_insn (gen_aarch64_<su>abalv8qi (reduc, reduc,
1108                                          gen_lowpart (V8QImode, operands[1]),
1109                                          gen_lowpart (V8QImode,
1110                                                       operands[2])));
1111     emit_insn (gen_aarch64_<su>adalpv8hi (operands[3], operands[3], reduc));
1112     emit_move_insn (operands[0], operands[3]);
1113     DONE;
1114   }
1117 (define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
1118   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1119         (plus:VDQ_BHSI (minus:VDQ_BHSI
1120                          (USMAX:VDQ_BHSI
1121                            (match_operand:VDQ_BHSI 2 "register_operand" "w")
1122                            (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1123                          (<max_opp>:VDQ_BHSI
1124                            (match_dup 2)
1125                            (match_dup 3)))
1126                        (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1127   "TARGET_SIMD"
1128   "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1129   [(set_attr "type" "neon_arith_acc<q>")]
1132 (define_insn "fabd<mode>3<vczle><vczbe>"
1133   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
1134         (abs:VHSDF_HSDF
1135           (minus:VHSDF_HSDF
1136             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
1137             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
1138   "TARGET_SIMD"
1139   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
1140   [(set_attr "type" "neon_fp_abd_<stype><q>")]
1143 ;; For AND (vector, register) and BIC (vector, immediate)
1144 (define_insn "and<mode>3<vczle><vczbe>"
1145   [(set (match_operand:VDQ_I 0 "register_operand")
1146         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1147                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm")))]
1148   "TARGET_SIMD"
1149   {@ [ cons: =0 , 1 , 2   ]
1150      [ w        , w , w   ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1151      [ w        , 0 , Db  ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_BIC);
1152   }
1153   [(set_attr "type" "neon_logic<q>")]
1156 ;; For ORR (vector, register) and ORR (vector, immediate)
1157 (define_insn "ior<mode>3<vczle><vczbe>"
1158   [(set (match_operand:VDQ_I 0 "register_operand")
1159         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1160                    (match_operand:VDQ_I 2 "aarch64_orr_imm_sve_advsimd")))]
1161   "TARGET_SIMD"
1162   {@ [ cons: =0 , 1 , 2; attrs: arch ]
1163      [ w        , w , w  ; simd      ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1164      [ w        , 0 , vsl; sve       ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
1165      [ w        , 0 , Do ; simd      ] \
1166        << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \
1167                                              AARCH64_CHECK_ORR);
1168   }
1169   [(set_attr "type" "neon_logic<q>")]
1172 (define_insn "xor<mode>3<vczle><vczbe>"
1173   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1174         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1175                  (match_operand:VDQ_I 2 "register_operand" "w")))]
1176   "TARGET_SIMD"
1177   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
1178   [(set_attr "type" "neon_logic<q>")]
1181 (define_insn "one_cmpl<mode>2<vczle><vczbe>"
1182   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1183         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1184   "TARGET_SIMD"
1185   "not\t%0.<Vbtype>, %1.<Vbtype>"
1186   [(set_attr "type" "neon_logic<q>")]
1189 (define_insn "aarch64_simd_vec_set<mode>"
1190   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1191         (vec_merge:VALL_F16
1192             (vec_duplicate:VALL_F16
1193                 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
1194             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1195             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1196   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1197   {
1198    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1199    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1200    switch (which_alternative)
1201      {
1202      case 0:
1203         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1204      case 1:
1205         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1206      case 2:
1207         return "ld1\\t{%0.<Vetype>}[%p2], %1";
1208      default:
1209         gcc_unreachable ();
1210      }
1211   }
1212   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1215 (define_insn "aarch64_simd_vec_set_zero<mode>"
1216   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1217         (vec_merge:VALL_F16
1218             (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
1219             (match_operand:VALL_F16 3 "register_operand" "0")
1220             (match_operand:SI 2 "immediate_operand" "i")))]
1221   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1222   {
1223     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1224     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1225     return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
1226   }
1229 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1230   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1231         (vec_merge:VALL_F16
1232             (vec_duplicate:VALL_F16
1233               (vec_select:<VEL>
1234                 (match_operand:VALL_F16 3 "register_operand" "w")
1235                 (parallel
1236                   [(match_operand:SI 4 "immediate_operand" "i")])))
1237             (match_operand:VALL_F16 1 "register_operand" "0")
1238             (match_operand:SI 2 "immediate_operand" "i")))]
1239   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1240   {
1241     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1242     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1243     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1245     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1246   }
1247   [(set_attr "type" "neon_ins<q>")]
1250 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1251   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1252         (vec_merge:VALL_F16_NO_V2Q
1253             (vec_duplicate:VALL_F16_NO_V2Q
1254               (vec_select:<VEL>
1255                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1256                 (parallel
1257                   [(match_operand:SI 4 "immediate_operand" "i")])))
1258             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1259             (match_operand:SI 2 "immediate_operand" "i")))]
1260   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1261   {
1262     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1263     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1264     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1265                                            INTVAL (operands[4]));
1267     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1268   }
1269   [(set_attr "type" "neon_ins<q>")]
1272 (define_expand "signbit<mode>2"
1273   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1274    (use (match_operand:VDQSF 1 "register_operand"))]
1275   "TARGET_SIMD"
1277   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1278   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1279                                                         shift_amount);
1280   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1282   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1283                                                  shift_vector));
1284   DONE;
1287 (define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
1288  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1289        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1290                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1291  "TARGET_SIMD"
1292  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1293   [(set_attr "type" "neon_shift_imm<q>")]
1296 (define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
1297  [(set (match_operand:VDQ_I 0 "register_operand")
1298        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1299                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm")))]
1300  "TARGET_SIMD"
1301  {@ [ cons: =0 , 1 , 2  ; attrs: type        ]
1302     [ w        , w , D1 ; neon_compare<q>    ] cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1303     [ w        , w , Dr ; neon_shift_imm<q>  ] sshr\t%0.<Vtype>, %1.<Vtype>, %2
1304   }
1307 (define_insn "aarch64_<sra_op>sra_n<mode>_insn"
1308  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1309         (plus:VDQ_I
1310            (SHIFTRT:VDQ_I
1311                 (match_operand:VDQ_I 2 "register_operand" "w")
1312                 (match_operand:VDQ_I 3 "aarch64_simd_rshift_imm"))
1313            (match_operand:VDQ_I 1 "register_operand" "0")))]
1314   "TARGET_SIMD"
1315   "<sra_op>sra\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1316   [(set_attr "type" "neon_shift_acc<q>")]
1319 (define_insn "aarch64_<sra_op>rsra_n<mode>_insn"
1320  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
1321         (plus:VSDQ_I_DI
1322           (truncate:VSDQ_I_DI
1323             (SHIFTRT:<V2XWIDE>
1324               (plus:<V2XWIDE>
1325                 (<SHIFTEXTEND>:<V2XWIDE>
1326                   (match_operand:VSDQ_I_DI 2 "register_operand" "w"))
1327                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
1328               (match_operand:VSDQ_I_DI 3 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>")))
1329           (match_operand:VSDQ_I_DI 1 "register_operand" "0")))]
1330   "TARGET_SIMD
1331    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
1332   "<sra_op>rsra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1333   [(set_attr "type" "neon_shift_acc<q>")]
1336 (define_expand "aarch64_<sra_op>sra_n<mode>"
1337  [(set (match_operand:VDQ_I 0 "register_operand")
1338         (plus:VDQ_I
1339            (SHIFTRT:VDQ_I
1340                 (match_operand:VDQ_I 2 "register_operand")
1341                 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))
1342            (match_operand:VDQ_I 1 "register_operand")))]
1343   "TARGET_SIMD"
1344   {
1345     operands[3]
1346       = aarch64_simd_gen_const_vector_dup (<MODE>mode, UINTVAL (operands[3]));
1347   }
1350 (define_expand "aarch64_<sra_op>rsra_n<mode>"
1351   [(match_operand:VSDQ_I_DI 0 "register_operand")
1352    (match_operand:VSDQ_I_DI 1 "register_operand")
1353    (SHIFTRT:VSDQ_I_DI
1354      (match_operand:VSDQ_I_DI 2 "register_operand")
1355      (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
1356   "TARGET_SIMD"
1357   {
1358     /* Use this expander to create the rounding constant vector, which is
1359        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
1360        RTL is generated when handling the DImode expanders.  */
1361     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
1362     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
1363     rtx shft = gen_int_mode (INTVAL (operands[3]), DImode);
1364     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
1365     if (VECTOR_MODE_P (<MODE>mode))
1366       {
1367         shft = gen_const_vec_duplicate (<MODE>mode, shft);
1368         rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
1369       }
1371     emit_insn (gen_aarch64_<sra_op>rsra_n<mode>_insn (operands[0], operands[1],
1372                                                       operands[2], shft, rnd));
1373     DONE;
1374   }
1377 (define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
1378  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1379        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1380                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1381  "TARGET_SIMD"
1382   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1383   [(set_attr "type" "neon_shift_imm<q>")]
1386 (define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
1387  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1388        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1389                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1390  "TARGET_SIMD"
1391  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1392   [(set_attr "type" "neon_shift_reg<q>")]
1395 (define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
1396  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1397        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1398                     (match_operand:VDQ_I 2 "register_operand" "w")]
1399                    UNSPEC_ASHIFT_UNSIGNED))]
1400  "TARGET_SIMD"
1401  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1402   [(set_attr "type" "neon_shift_reg<q>")]
1405 (define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
1406  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1407        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1408                     (match_operand:VDQ_I 2 "register_operand" "w")]
1409                    UNSPEC_ASHIFT_SIGNED))]
1410  "TARGET_SIMD"
1411  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1412   [(set_attr "type" "neon_shift_reg<q>")]
1415 (define_expand "ashl<mode>3"
1416   [(match_operand:VDQ_I 0 "register_operand")
1417    (match_operand:VDQ_I 1 "register_operand")
1418    (match_operand:SI  2 "general_operand")]
1419  "TARGET_SIMD"
1421   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1422   int shift_amount;
1424   if (CONST_INT_P (operands[2]))
1425     {
1426       shift_amount = INTVAL (operands[2]);
1427       if (shift_amount >= 0 && shift_amount < bit_width)
1428         {
1429           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1430                                                        shift_amount);
1431           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1432                                                      operands[1],
1433                                                      tmp));
1434           DONE;
1435         }
1436     }
1438   operands[2] = force_reg (SImode, operands[2]);
1440   rtx tmp = gen_reg_rtx (<MODE>mode);
1441   emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1442                                                                operands[2],
1443                                                                0)));
1444   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1445   DONE;
1448 (define_expand "lshr<mode>3"
1449   [(match_operand:VDQ_I 0 "register_operand")
1450    (match_operand:VDQ_I 1 "register_operand")
1451    (match_operand:SI  2 "general_operand")]
1452  "TARGET_SIMD"
1454   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1455   int shift_amount;
1457   if (CONST_INT_P (operands[2]))
1458     {
1459       shift_amount = INTVAL (operands[2]);
1460       if (shift_amount > 0 && shift_amount <= bit_width)
1461         {
1462           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1463                                                        shift_amount);
1464           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1465                                                   operands[1],
1466                                                   tmp));
1467           DONE;
1468         }
1469     }
1471   operands[2] = force_reg (SImode, operands[2]);
1473   rtx tmp = gen_reg_rtx (SImode);
1474   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1475   emit_insn (gen_negsi2 (tmp, operands[2]));
1476   emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1477                                          convert_to_mode (<VEL>mode, tmp, 0)));
1478   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1479                                                       tmp1));
1480   DONE;
1483 (define_expand "ashr<mode>3"
1484   [(match_operand:VDQ_I 0 "register_operand")
1485    (match_operand:VDQ_I 1 "register_operand")
1486    (match_operand:SI  2 "general_operand")]
1487  "TARGET_SIMD"
1489   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1490   int shift_amount;
1492   if (CONST_INT_P (operands[2]))
1493     {
1494       shift_amount = INTVAL (operands[2]);
1495       if (shift_amount > 0 && shift_amount <= bit_width)
1496         {
1497           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1498                                                        shift_amount);
1499           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1500                                                   operands[1],
1501                                                   tmp));
1502           DONE;
1503         }
1504     }
1506   operands[2] = force_reg (SImode, operands[2]);
1508   rtx tmp = gen_reg_rtx (SImode);
1509   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1510   emit_insn (gen_negsi2 (tmp, operands[2]));
1511   emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1512                                                                 tmp, 0)));
1513   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1514                                                     tmp1));
1515   DONE;
1518 (define_expand "vashl<mode>3"
1519  [(match_operand:VDQ_I 0 "register_operand")
1520   (match_operand:VDQ_I 1 "register_operand")
1521   (match_operand:VDQ_I 2 "register_operand")]
1522  "TARGET_SIMD"
1524   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1525                                               operands[2]));
1526   DONE;
1529 (define_expand "vashr<mode>3"
1530  [(match_operand:VDQ_I 0 "register_operand")
1531   (match_operand:VDQ_I 1 "register_operand")
1532   (match_operand:VDQ_I 2 "register_operand")]
1533  "TARGET_SIMD"
1535   rtx neg = gen_reg_rtx (<MODE>mode);
1536   emit (gen_neg<mode>2 (neg, operands[2]));
1537   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1538                                                     neg));
1539   DONE;
1542 ;; DI vector shift
1543 (define_expand "aarch64_ashr_simddi"
1544   [(match_operand:DI 0 "register_operand")
1545    (match_operand:DI 1 "register_operand")
1546    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1547   "TARGET_SIMD"
1548   {
1549     /* An arithmetic shift right by 64 fills the result with copies of the sign
1550        bit, just like asr by 63 - however the standard pattern does not handle
1551        a shift by 64.  */
1552     if (INTVAL (operands[2]) == 64)
1553       operands[2] = GEN_INT (63);
1554     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1555     DONE;
1556   }
1559 (define_expand "vlshr<mode>3"
1560  [(match_operand:VDQ_I 0 "register_operand")
1561   (match_operand:VDQ_I 1 "register_operand")
1562   (match_operand:VDQ_I 2 "register_operand")]
1563  "TARGET_SIMD"
1565   rtx neg = gen_reg_rtx (<MODE>mode);
1566   emit (gen_neg<mode>2 (neg, operands[2]));
1567   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1568                                                       neg));
1569   DONE;
1572 (define_expand "aarch64_lshr_simddi"
1573   [(match_operand:DI 0 "register_operand")
1574    (match_operand:DI 1 "register_operand")
1575    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1576   "TARGET_SIMD"
1577   {
1578     if (INTVAL (operands[2]) == 64)
1579       emit_move_insn (operands[0], const0_rtx);
1580     else
1581       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1582     DONE;
1583   }
1586 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1587 (define_insn "vec_shr_<mode><vczle><vczbe>"
1588   [(set (match_operand:VD 0 "register_operand" "=w")
1589         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1590                     (match_operand:SI 2 "immediate_operand" "i")]
1591                    UNSPEC_VEC_SHR))]
1592   "TARGET_SIMD"
1593   {
1594     if (BYTES_BIG_ENDIAN)
1595       return "shl %d0, %d1, %2";
1596     else
1597       return "ushr %d0, %d1, %2";
1598   }
1599   [(set_attr "type" "neon_shift_imm")]
1602 (define_expand "vec_set<mode>"
1603   [(match_operand:VALL_F16 0 "register_operand")
1604    (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
1605    (match_operand:SI 2 "immediate_operand")]
1606   "TARGET_SIMD"
1607   {
1608     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1609     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1610                                           GEN_INT (elem), operands[0]));
1611     DONE;
1612   }
1616 (define_insn "aarch64_mla<mode><vczle><vczbe>"
1617  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1618        (plus:VDQ_BHSI (mult:VDQ_BHSI
1619                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1620                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1621                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1622  "TARGET_SIMD"
1623  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1624   [(set_attr "type" "neon_mla_<Vetype><q>")]
1627 (define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
1628  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1629        (plus:VDQHS
1630          (mult:VDQHS
1631            (vec_duplicate:VDQHS
1632               (vec_select:<VEL>
1633                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1634                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1635            (match_operand:VDQHS 3 "register_operand" "w"))
1636          (match_operand:VDQHS 4 "register_operand" "0")))]
1637  "TARGET_SIMD"
1638   {
1639     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1640     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1641   }
1642   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1645 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
1646  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1647        (plus:VDQHS
1648          (mult:VDQHS
1649            (vec_duplicate:VDQHS
1650               (vec_select:<VEL>
1651                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1652                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1653            (match_operand:VDQHS 3 "register_operand" "w"))
1654          (match_operand:VDQHS 4 "register_operand" "0")))]
1655  "TARGET_SIMD"
1656   {
1657     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1658     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1659   }
1660   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1663 (define_insn "aarch64_mla_n<mode><vczle><vczbe>"
1664  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1665         (plus:VDQHS
1666           (mult:VDQHS
1667             (vec_duplicate:VDQHS
1668               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1669             (match_operand:VDQHS 2 "register_operand" "w"))
1670           (match_operand:VDQHS 1 "register_operand" "0")))]
1671  "TARGET_SIMD"
1672  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1673   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1676 (define_insn "aarch64_mls<mode><vczle><vczbe>"
1677  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1678        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1679                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1680                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1681  "TARGET_SIMD"
1682  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1683   [(set_attr "type" "neon_mla_<Vetype><q>")]
1686 (define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
1687  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1688        (minus:VDQHS
1689          (match_operand:VDQHS 4 "register_operand" "0")
1690          (mult:VDQHS
1691            (vec_duplicate:VDQHS
1692               (vec_select:<VEL>
1693                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1694                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1695            (match_operand:VDQHS 3 "register_operand" "w"))))]
1696  "TARGET_SIMD"
1697   {
1698     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1699     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1700   }
1701   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1704 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
1705  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1706        (minus:VDQHS
1707          (match_operand:VDQHS 4 "register_operand" "0")
1708          (mult:VDQHS
1709            (vec_duplicate:VDQHS
1710               (vec_select:<VEL>
1711                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1712                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1713            (match_operand:VDQHS 3 "register_operand" "w"))))]
1714  "TARGET_SIMD"
1715   {
1716     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1717     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1718   }
1719   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1722 (define_insn "aarch64_mls_n<mode><vczle><vczbe>"
1723   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1724         (minus:VDQHS
1725           (match_operand:VDQHS 1 "register_operand" "0")
1726           (mult:VDQHS
1727             (vec_duplicate:VDQHS
1728               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1729             (match_operand:VDQHS 2 "register_operand" "w"))))]
1730   "TARGET_SIMD"
1731   "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1732   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1735 ;; Max/Min operations.
1736 (define_insn "<su><maxmin><mode>3<vczle><vczbe>"
1737  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1738        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1739                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1740  "TARGET_SIMD"
1741  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1742   [(set_attr "type" "neon_minmax<q>")]
1745 (define_expand "<su><maxmin>v2di3"
1746  [(set (match_operand:V2DI 0 "register_operand")
1747        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1748                     (match_operand:V2DI 2 "register_operand")))]
1749  "TARGET_SIMD"
1751   enum rtx_code cmp_operator;
1752   rtx cmp_fmt;
1754   switch (<CODE>)
1755     {
1756     case UMIN:
1757       cmp_operator = LTU;
1758       break;
1759     case SMIN:
1760       cmp_operator = LT;
1761       break;
1762     case UMAX:
1763       cmp_operator = GTU;
1764       break;
1765     case SMAX:
1766       cmp_operator = GT;
1767       break;
1768     default:
1769       gcc_unreachable ();
1770     }
1772   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1773   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1774               operands[2], cmp_fmt, operands[1], operands[2]));
1775   DONE;
1778 ;; Pairwise Integer Max/Min operations.
1779 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1780  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1781        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1782                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1783                         MAXMINV))]
1784  "TARGET_SIMD"
1785  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1786   [(set_attr "type" "neon_minmax<q>")]
1789 ;; Pairwise FP Max/Min operations.
1790 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1791  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1792        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1793                       (match_operand:VHSDF 2 "register_operand" "w")]
1794                       FMAXMINV))]
1795  "TARGET_SIMD"
1796  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1797   [(set_attr "type" "neon_minmax<q>")]
1800 ;; vec_concat gives a new vector with the low elements from operand 1, and
1801 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1802 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1803 ;; What that means, is that the RTL descriptions of the below patterns
1804 ;; need to change depending on endianness.
1806 ;; Narrowing operations.
1808 (define_insn "aarch64_xtn2<mode>_insn_le"
1809   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1810         (vec_concat:<VNARROWQ2>
1811           (match_operand:<VNARROWQ> 1 "register_operand" "0")
1812           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1813   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1814   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1815   [(set_attr "type" "neon_move_narrow_q")]
1818 (define_insn "aarch64_xtn2<mode>_insn_be"
1819   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1820         (vec_concat:<VNARROWQ2>
1821           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1822           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1823   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1824   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1825   [(set_attr "type" "neon_move_narrow_q")]
1828 (define_expand "aarch64_xtn2<mode>"
1829   [(match_operand:<VNARROWQ2> 0 "register_operand")
1830    (match_operand:<VNARROWQ> 1 "register_operand")
1831    (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1832   "TARGET_SIMD"
1833   {
1834     if (BYTES_BIG_ENDIAN)
1835       emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1836                                                  operands[2]));
1837     else
1838       emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1839                                                  operands[2]));
1840     DONE;
1841   }
1844 (define_insn "*aarch64_narrow_trunc<mode>"
1845   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1846         (vec_concat:<VNARROWQ2>
1847           (truncate:<VNARROWQ>
1848             (match_operand:VQN 1 "register_operand" "w"))
1849           (truncate:<VNARROWQ>
1850             (match_operand:VQN 2 "register_operand" "w"))))]
1851   "TARGET_SIMD"
1853   if (!BYTES_BIG_ENDIAN)
1854     return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1855   else
1856     return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1858   [(set_attr "type" "neon_permute<q>")]
1861 ;; Packing doubles.
1863 (define_expand "vec_pack_trunc_<mode>"
1864  [(match_operand:<VNARROWD> 0 "register_operand")
1865   (match_operand:VDN 1 "general_operand")
1866   (match_operand:VDN 2 "general_operand")]
1867  "TARGET_SIMD"
1869   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1870   emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2]));
1871   emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1872   DONE;
1875 ;; Packing quads.
1877 (define_expand "vec_pack_trunc_<mode>"
1878  [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1879        (vec_concat:<VNARROWQ2>
1880          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1881          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1882  "TARGET_SIMD"
1884    rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1885    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1886    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1888    emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1890    if (BYTES_BIG_ENDIAN)
1891      emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1892                                                 operands[hi]));
1893    else
1894      emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1895                                                 operands[hi]));
1896    DONE;
1900 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1901   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1902         (vec_concat:<VNARROWQ2>
1903           (truncate:<VNARROWQ>
1904             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1905               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1906           (truncate:<VNARROWQ>
1907             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1908               (match_dup 2)))))]
1909   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1910   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1911   [(set_attr "type" "neon_permute<q>")]
1914 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1915   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1916         (vec_concat:<VNARROWQ2>
1917           (truncate:<VNARROWQ>
1918             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1919               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1920           (truncate:<VNARROWQ>
1921             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1922               (match_dup 2)))))]
1923   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1924   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1925   [(set_attr "type" "neon_permute<q>")]
1928 ;; Widening operations.
1930 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1931   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1932         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1933                                (match_operand:VQW 1 "register_operand" "w")
1934                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1935                             )))]
1936   "TARGET_SIMD"
1937   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1938   [(set_attr "type" "neon_shift_imm_long")]
1941 (define_insn_and_split "aarch64_simd_vec_unpack<su>_hi_<mode>"
1942   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1943         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1944                                (match_operand:VQW 1 "register_operand" "w")
1945                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1946                             )))]
1947   "TARGET_SIMD"
1948   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1949   "&& <CODE> == ZERO_EXTEND
1950    && aarch64_split_simd_shift_p (insn)"
1951   [(const_int 0)]
1952   {
1953     /* On many cores, it is cheaper to implement UXTL2 using a ZIP2 with zero,
1954        provided that the cost of the zero can be amortized over several
1955        operations.  We'll later recombine the zero and zip if there are
1956        not sufficient uses of the zero to make the split worthwhile.  */
1957     rtx res = simplify_gen_subreg (<MODE>mode, operands[0], <VWIDE>mode, 0);
1958     rtx zero = aarch64_gen_shareable_zero (<MODE>mode);
1959     emit_insn (gen_aarch64_zip2<mode> (res, operands[1], zero));
1960     DONE;
1961   }
1962   [(set_attr "type" "neon_shift_imm_long")]
1965 (define_expand "vec_unpack<su>_hi_<mode>"
1966   [(match_operand:<VWIDE> 0 "register_operand")
1967    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1968   "TARGET_SIMD"
1969   {
1970     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1971     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1972                                                           operands[1], p));
1973     DONE;
1974   }
1977 (define_expand "vec_unpack<su>_lo_<mode>"
1978   [(match_operand:<VWIDE> 0 "register_operand")
1979    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1980   "TARGET_SIMD"
1981   {
1982     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1983     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1984                                                           operands[1], p));
1985     DONE;
1986   }
1989 ;; Widening arithmetic.
1991 (define_insn "*aarch64_<su>mlal_lo<mode>"
1992   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1993         (plus:<VWIDE>
1994           (mult:<VWIDE>
1995               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1996                  (match_operand:VQW 2 "register_operand" "w")
1997                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1998               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1999                  (match_operand:VQW 4 "register_operand" "w")
2000                  (match_dup 3))))
2001           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2002   "TARGET_SIMD"
2003   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2004   [(set_attr "type" "neon_mla_<Vetype>_long")]
2007 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2008   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2009         (plus:<VWIDE>
2010           (mult:<VWIDE>
2011               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2012                  (match_operand:VQW 2 "register_operand" "w")
2013                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2014               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2015                  (match_operand:VQW 4 "register_operand" "w")
2016                  (match_dup 3))))
2017           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2018   "TARGET_SIMD"
2019   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2020   [(set_attr "type" "neon_mla_<Vetype>_long")]
2023 (define_expand "aarch64_<su>mlal_hi<mode>"
2024   [(match_operand:<VWIDE> 0 "register_operand")
2025    (match_operand:<VWIDE> 1 "register_operand")
2026    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2027    (match_operand:VQW 3 "register_operand")]
2028   "TARGET_SIMD"
2030   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2031   emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2032                                                  operands[2], p, operands[3]));
2033   DONE;
2037 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2038   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2039         (plus:<VWIDE>
2040           (mult:<VWIDE>
2041             (ANY_EXTEND:<VWIDE>
2042               (vec_select:<VHALF>
2043                 (match_operand:VQ_HSI 2 "register_operand" "w")
2044                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2045             (vec_duplicate:<VWIDE>
2046               (ANY_EXTEND:<VWIDE_S>
2047                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2048           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2049   "TARGET_SIMD"
2050   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2051   [(set_attr "type" "neon_mla_<Vetype>_long")]
2054 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2055   [(match_operand:<VWIDE> 0 "register_operand")
2056    (match_operand:<VWIDE> 1 "register_operand")
2057    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2058    (match_operand:<VEL> 3 "register_operand")]
2059   "TARGET_SIMD"
2061   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2062   emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2063              operands[1], operands[2], p, operands[3]));
2064   DONE;
2068 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2069   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2070         (minus:<VWIDE>
2071           (match_operand:<VWIDE> 1 "register_operand" "0")
2072           (mult:<VWIDE>
2073               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2074                  (match_operand:VQW 2 "register_operand" "w")
2075                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2076               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2077                  (match_operand:VQW 4 "register_operand" "w")
2078                  (match_dup 3))))))]
2079   "TARGET_SIMD"
2080   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2081   [(set_attr "type" "neon_mla_<Vetype>_long")]
2084 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2085   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2086         (minus:<VWIDE>
2087           (match_operand:<VWIDE> 1 "register_operand" "0")
2088           (mult:<VWIDE>
2089               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2090                  (match_operand:VQW 2 "register_operand" "w")
2091                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2092               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2093                  (match_operand:VQW 4 "register_operand" "w")
2094                  (match_dup 3))))))]
2095   "TARGET_SIMD"
2096   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2097   [(set_attr "type" "neon_mla_<Vetype>_long")]
2100 (define_expand "aarch64_<su>mlsl_hi<mode>"
2101   [(match_operand:<VWIDE> 0 "register_operand")
2102    (match_operand:<VWIDE> 1 "register_operand")
2103    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2104    (match_operand:VQW 3 "register_operand")]
2105   "TARGET_SIMD"
2107   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108   emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2109                                                  operands[2], p, operands[3]));
2110   DONE;
2114 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2115   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2116         (minus:<VWIDE>
2117           (match_operand:<VWIDE> 1 "register_operand" "0")
2118           (mult:<VWIDE>
2119             (ANY_EXTEND:<VWIDE>
2120               (vec_select:<VHALF>
2121                 (match_operand:VQ_HSI 2 "register_operand" "w")
2122                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2123             (vec_duplicate:<VWIDE>
2124               (ANY_EXTEND:<VWIDE_S>
2125                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2126   "TARGET_SIMD"
2127   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2128   [(set_attr "type" "neon_mla_<Vetype>_long")]
2131 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2132   [(match_operand:<VWIDE> 0 "register_operand")
2133    (match_operand:<VWIDE> 1 "register_operand")
2134    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2135    (match_operand:<VEL> 3 "register_operand")]
2136   "TARGET_SIMD"
2138   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2139   emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2140              operands[1], operands[2], p, operands[3]));
2141   DONE;
2145 (define_insn "aarch64_<su>mlal<mode>"
2146   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2147         (plus:<VWIDE>
2148           (mult:<VWIDE>
2149             (ANY_EXTEND:<VWIDE>
2150               (match_operand:VD_BHSI 2 "register_operand" "w"))
2151             (ANY_EXTEND:<VWIDE>
2152               (match_operand:VD_BHSI 3 "register_operand" "w")))
2153           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2154   "TARGET_SIMD"
2155   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2156   [(set_attr "type" "neon_mla_<Vetype>_long")]
2159 (define_insn "aarch64_<su>mlal_n<mode>"
2160   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2161         (plus:<VWIDE>
2162           (mult:<VWIDE>
2163             (ANY_EXTEND:<VWIDE>
2164               (match_operand:VD_HSI 2 "register_operand" "w"))
2165             (vec_duplicate:<VWIDE>
2166               (ANY_EXTEND:<VWIDE_S>
2167                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2168           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2169   "TARGET_SIMD"
2170   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2171   [(set_attr "type" "neon_mla_<Vetype>_long")]
2174 (define_insn "aarch64_<su>mlsl<mode>"
2175   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2176         (minus:<VWIDE>
2177           (match_operand:<VWIDE> 1 "register_operand" "0")
2178           (mult:<VWIDE>
2179             (ANY_EXTEND:<VWIDE>
2180               (match_operand:VD_BHSI 2 "register_operand" "w"))
2181             (ANY_EXTEND:<VWIDE>
2182               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2183   "TARGET_SIMD"
2184   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2185   [(set_attr "type" "neon_mla_<Vetype>_long")]
2188 (define_insn "aarch64_<su>mlsl_n<mode>"
2189   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2190         (minus:<VWIDE>
2191           (match_operand:<VWIDE> 1 "register_operand" "0")
2192           (mult:<VWIDE>
2193             (ANY_EXTEND:<VWIDE>
2194               (match_operand:VD_HSI 2 "register_operand" "w"))
2195             (vec_duplicate:<VWIDE>
2196               (ANY_EXTEND:<VWIDE_S>
2197                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2198   "TARGET_SIMD"
2199   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2200   [(set_attr "type" "neon_mla_<Vetype>_long")]
2203 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2204  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2205        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2206                            (match_operand:VQW 1 "register_operand" "w")
2207                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2208                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2209                            (match_operand:VQW 2 "register_operand" "w")
2210                            (match_dup 3)))))]
2211   "TARGET_SIMD"
2212   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2213   [(set_attr "type" "neon_mul_<Vetype>_long")]
2216 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2217   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2218         (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2219                          (match_operand:VD_BHSI 1 "register_operand" "w"))
2220                       (ANY_EXTEND:<VWIDE>
2221                          (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2222   "TARGET_SIMD"
2223   "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2224   [(set_attr "type" "neon_mul_<Vetype>_long")]
2227 (define_expand "vec_widen_<su>mult_lo_<mode>"
2228   [(match_operand:<VWIDE> 0 "register_operand")
2229    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2230    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2231  "TARGET_SIMD"
2233    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2234    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2235                                                        operands[1],
2236                                                        operands[2], p));
2237    DONE;
2241 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2242  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2243       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2244                             (match_operand:VQW 1 "register_operand" "w")
2245                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2246                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2247                             (match_operand:VQW 2 "register_operand" "w")
2248                             (match_dup 3)))))]
2249   "TARGET_SIMD"
2250   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2251   [(set_attr "type" "neon_mul_<Vetype>_long")]
2254 (define_expand "vec_widen_<su>mult_hi_<mode>"
2255   [(match_operand:<VWIDE> 0 "register_operand")
2256    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2257    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2258  "TARGET_SIMD"
2260    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2261    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2262                                                        operands[1],
2263                                                        operands[2], p));
2264    DONE;
2269 ;; vmull_lane_s16 intrinsics
2270 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2271   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2272         (mult:<VWIDE>
2273           (ANY_EXTEND:<VWIDE>
2274             (match_operand:<VCOND> 1 "register_operand" "w"))
2275           (vec_duplicate:<VWIDE>
2276             (ANY_EXTEND:<VWIDE_S>
2277               (vec_select:<VEL>
2278                 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2279                 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2280   "TARGET_SIMD"
2281   {
2282     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2283     return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2284   }
2285   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2288 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2289   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2290         (mult:<VWIDE>
2291           (ANY_EXTEND:<VWIDE>
2292             (vec_select:<VHALF>
2293               (match_operand:VQ_HSI 1 "register_operand" "w")
2294               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2295           (vec_duplicate:<VWIDE>
2296             (ANY_EXTEND:<VWIDE_S>
2297               (vec_select:<VEL>
2298                 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2299                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2300   "TARGET_SIMD"
2301   {
2302     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2303     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2304   }
2305   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2308 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2309   [(match_operand:<VWIDE> 0 "register_operand")
2310    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2311    (match_operand:<VCOND> 2 "register_operand")
2312    (match_operand:SI 3 "immediate_operand")]
2313   "TARGET_SIMD"
2315   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2316   emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2317              operands[1], p, operands[2], operands[3]));
2318   DONE;
2322 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2323   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2324         (mult:<VWIDE>
2325           (ANY_EXTEND:<VWIDE>
2326             (vec_select:<VHALF>
2327               (match_operand:VQ_HSI 1 "register_operand" "w")
2328               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2329           (vec_duplicate:<VWIDE>
2330             (ANY_EXTEND:<VWIDE_S>
2331               (vec_select:<VEL>
2332                 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2333                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2334   "TARGET_SIMD"
2335   {
2336     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2337     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2338   }
2339   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2342 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2343   [(match_operand:<VWIDE> 0 "register_operand")
2344    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2345    (match_operand:<VCONQ> 2 "register_operand")
2346    (match_operand:SI 3 "immediate_operand")]
2347   "TARGET_SIMD"
2349   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2350   emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2351              operands[1], p, operands[2], operands[3]));
2352   DONE;
2356 (define_insn "aarch64_<su>mull_n<mode>"
2357   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2358         (mult:<VWIDE>
2359           (ANY_EXTEND:<VWIDE>
2360             (match_operand:VD_HSI 1 "register_operand" "w"))
2361           (vec_duplicate:<VWIDE>
2362             (ANY_EXTEND:<VWIDE_S>
2363               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2364   "TARGET_SIMD"
2365   "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2366   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2369 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2370   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2371         (mult:<VWIDE>
2372           (ANY_EXTEND:<VWIDE>
2373             (vec_select:<VHALF>
2374               (match_operand:VQ_HSI 1 "register_operand" "w")
2375               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2376           (vec_duplicate:<VWIDE>
2377             (ANY_EXTEND:<VWIDE_S>
2378               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2379   "TARGET_SIMD"
2380   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2381   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2384 (define_expand "aarch64_<su>mull_hi_n<mode>"
2385   [(match_operand:<VWIDE> 0 "register_operand")
2386    (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2387    (match_operand:<VEL> 2 "register_operand")]
2388  "TARGET_SIMD"
2390    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2391    emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2392                                                     operands[2], p));
2393    DONE;
2397 ;; vmlal_lane_s16 intrinsics
2398 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2399   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2400         (plus:<VWIDE>
2401           (mult:<VWIDE>
2402             (ANY_EXTEND:<VWIDE>
2403               (match_operand:<VCOND> 2 "register_operand" "w"))
2404             (vec_duplicate:<VWIDE>
2405               (ANY_EXTEND:<VWIDE_S>
2406                 (vec_select:<VEL>
2407                   (match_operand:VDQHS 3 "register_operand" "<vwx>")
2408                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2409           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2410   "TARGET_SIMD"
2411   {
2412     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2413     return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2414   }
2415   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2418 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2419   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2420         (plus:<VWIDE>
2421           (mult:<VWIDE>
2422             (ANY_EXTEND:<VWIDE>
2423               (vec_select:<VHALF>
2424                 (match_operand:VQ_HSI 2 "register_operand" "w")
2425                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2426             (vec_duplicate:<VWIDE>
2427               (ANY_EXTEND:<VWIDE_S>
2428                 (vec_select:<VEL>
2429                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2430                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2431           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2432   "TARGET_SIMD"
2433   {
2434     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2435     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2436   }
2437   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2440 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2441   [(match_operand:<VWIDE> 0 "register_operand")
2442    (match_operand:<VWIDE> 1 "register_operand")
2443    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2444    (match_operand:<VCOND> 3 "register_operand")
2445    (match_operand:SI 4 "immediate_operand")]
2446   "TARGET_SIMD"
2448   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2449   emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2450              operands[1], operands[2], p, operands[3], operands[4]));
2451   DONE;
2455 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2456   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2457         (plus:<VWIDE>
2458           (mult:<VWIDE>
2459             (ANY_EXTEND:<VWIDE>
2460               (vec_select:<VHALF>
2461                 (match_operand:VQ_HSI 2 "register_operand" "w")
2462                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2463             (vec_duplicate:<VWIDE>
2464               (ANY_EXTEND:<VWIDE_S>
2465                 (vec_select:<VEL>
2466                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2467                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2468           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2469   "TARGET_SIMD"
2470   {
2471     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2472     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2473   }
2474   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2477 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2478   [(match_operand:<VWIDE> 0 "register_operand")
2479    (match_operand:<VWIDE> 1 "register_operand")
2480    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2481    (match_operand:<VCONQ> 3 "register_operand")
2482    (match_operand:SI 4 "immediate_operand")]
2483   "TARGET_SIMD"
2485   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2486   emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2487              operands[1], operands[2], p, operands[3], operands[4]));
2488   DONE;
2492 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2493   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2494    (minus:<VWIDE>
2495      (match_operand:<VWIDE> 1 "register_operand" "0")
2496      (mult:<VWIDE>
2497        (ANY_EXTEND:<VWIDE>
2498          (match_operand:<VCOND> 2 "register_operand" "w"))
2499        (vec_duplicate:<VWIDE>
2500          (ANY_EXTEND:<VWIDE_S>
2501            (vec_select:<VEL>
2502              (match_operand:VDQHS 3 "register_operand" "<vwx>")
2503              (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2504   "TARGET_SIMD"
2505   {
2506     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2507     return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2508   }
2509   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2512 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2513   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2514         (minus:<VWIDE>
2515           (match_operand:<VWIDE> 1 "register_operand" "0")
2516           (mult:<VWIDE>
2517             (ANY_EXTEND:<VWIDE>
2518               (vec_select:<VHALF>
2519                 (match_operand:VQ_HSI 2 "register_operand" "w")
2520                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2521             (vec_duplicate:<VWIDE>
2522               (ANY_EXTEND:<VWIDE_S>
2523                 (vec_select:<VEL>
2524                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2525                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2526           )))]
2527   "TARGET_SIMD"
2528   {
2529     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2530     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2531   }
2532   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2535 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2536   [(match_operand:<VWIDE> 0 "register_operand")
2537    (match_operand:<VWIDE> 1 "register_operand")
2538    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2539    (match_operand:<VCOND> 3 "register_operand")
2540    (match_operand:SI 4 "immediate_operand")]
2541   "TARGET_SIMD"
2543   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2544   emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2545              operands[1], operands[2], p, operands[3], operands[4]));
2546   DONE;
2550 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2551   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2552         (minus:<VWIDE>
2553           (match_operand:<VWIDE> 1 "register_operand" "0")
2554           (mult:<VWIDE>
2555             (ANY_EXTEND:<VWIDE>
2556               (vec_select:<VHALF>
2557                 (match_operand:VQ_HSI 2 "register_operand" "w")
2558                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2559             (vec_duplicate:<VWIDE>
2560               (ANY_EXTEND:<VWIDE_S>
2561                 (vec_select:<VEL>
2562                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2563                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2564           )))]
2565   "TARGET_SIMD"
2566   {
2567     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2568     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2569   }
2570   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2573 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2574   [(match_operand:<VWIDE> 0 "register_operand")
2575    (match_operand:<VWIDE> 1 "register_operand")
2576    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2577    (match_operand:<VCONQ> 3 "register_operand")
2578    (match_operand:SI 4 "immediate_operand")]
2579   "TARGET_SIMD"
2581   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2582   emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2583              operands[1], operands[2], p, operands[3], operands[4]));
2584   DONE;
2588 ;; FP vector operations.
2589 ;; AArch64 AdvSIMD supports single-precision (32-bit) and 
2590 ;; double-precision (64-bit) floating-point data types and arithmetic as
2591 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable 
2592 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2594 ;; Floating-point operations can raise an exception.  Vectorizing such
2595 ;; operations are safe because of reasons explained below.
2597 ;; ARMv8 permits an extension to enable trapped floating-point
2598 ;; exception handling, however this is an optional feature.  In the
2599 ;; event of a floating-point exception being raised by vectorised
2600 ;; code then:
2601 ;; 1.  If trapped floating-point exceptions are available, then a trap
2602 ;;     will be taken when any lane raises an enabled exception.  A trap
2603 ;;     handler may determine which lane raised the exception.
2604 ;; 2.  Alternatively a sticky exception flag is set in the
2605 ;;     floating-point status register (FPSR).  Software may explicitly
2606 ;;     test the exception flags, in which case the tests will either
2607 ;;     prevent vectorisation, allowing precise identification of the
2608 ;;     failing operation, or if tested outside of vectorisable regions
2609 ;;     then the specific operation and lane are not of interest.
2611 ;; FP arithmetic operations.
2613 (define_insn "add<mode>3<vczle><vczbe>"
2614  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2615        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2616                    (match_operand:VHSDF 2 "register_operand" "w")))]
2617  "TARGET_SIMD"
2618  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2619   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2622 (define_insn "sub<mode>3<vczle><vczbe>"
2623  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2624        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2625                     (match_operand:VHSDF 2 "register_operand" "w")))]
2626  "TARGET_SIMD"
2627  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2628   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2631 (define_insn "mul<mode>3<vczle><vczbe>"
2632  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2633        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2634                    (match_operand:VHSDF 2 "register_operand" "w")))]
2635  "TARGET_SIMD"
2636  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2637   [(set_attr "type" "neon_fp_mul_<stype><q>")]
2640 (define_expand "div<mode>3"
2641  [(set (match_operand:VHSDF 0 "register_operand")
2642        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2643                   (match_operand:VHSDF 2 "register_operand")))]
2644  "TARGET_SIMD"
2646   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2647     DONE;
2649   operands[1] = force_reg (<MODE>mode, operands[1]);
2652 (define_insn "*div<mode>3<vczle><vczbe>"
2653  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2654        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2655                  (match_operand:VHSDF 2 "register_operand" "w")))]
2656  "TARGET_SIMD"
2657  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2658   [(set_attr "type" "neon_fp_div_<stype><q>")]
2661 (define_insn "neg<mode>2<vczle><vczbe>"
2662  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2663        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2664  "TARGET_SIMD"
2665  "fneg\\t%0.<Vtype>, %1.<Vtype>"
2666   [(set_attr "type" "neon_fp_neg_<stype><q>")]
2669 (define_insn "abs<mode>2<vczle><vczbe>"
2670  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2671        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2672  "TARGET_SIMD"
2673  "fabs\\t%0.<Vtype>, %1.<Vtype>"
2674   [(set_attr "type" "neon_fp_abs_<stype><q>")]
2677 (define_expand "aarch64_float_mla<mode>"
2678   [(set (match_operand:VDQF_DF 0 "register_operand")
2679         (plus:VDQF_DF
2680           (mult:VDQF_DF
2681             (match_operand:VDQF_DF 2 "register_operand")
2682             (match_operand:VDQF_DF 3 "register_operand"))
2683           (match_operand:VDQF_DF 1 "register_operand")))]
2684   "TARGET_SIMD"
2685   {
2686     rtx scratch = gen_reg_rtx (<MODE>mode);
2687     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2688     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2689     DONE;
2690   }
2693 (define_expand "aarch64_float_mls<mode>"
2694   [(set (match_operand:VDQF_DF 0 "register_operand")
2695         (minus:VDQF_DF
2696           (match_operand:VDQF_DF 1 "register_operand")
2697           (mult:VDQF_DF
2698             (match_operand:VDQF_DF 2 "register_operand")
2699             (match_operand:VDQF_DF 3 "register_operand"))))]
2700   "TARGET_SIMD"
2701   {
2702     rtx scratch = gen_reg_rtx (<MODE>mode);
2703     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2704     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2705     DONE;
2706   }
2709 (define_expand "aarch64_float_mla_n<mode>"
2710   [(set (match_operand:VDQSF 0 "register_operand")
2711         (plus:VDQSF
2712           (mult:VDQSF
2713             (vec_duplicate:VDQSF
2714               (match_operand:<VEL> 3 "register_operand"))
2715             (match_operand:VDQSF 2 "register_operand"))
2716           (match_operand:VDQSF 1 "register_operand")))]
2717   "TARGET_SIMD"
2718   {
2719     rtx scratch = gen_reg_rtx (<MODE>mode);
2720     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2721     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2722     DONE;
2723   }
2726 (define_expand "aarch64_float_mls_n<mode>"
2727   [(set (match_operand:VDQSF 0 "register_operand")
2728         (minus:VDQSF
2729           (match_operand:VDQSF 1 "register_operand")
2730           (mult:VDQSF
2731             (vec_duplicate:VDQSF
2732               (match_operand:<VEL> 3 "register_operand"))
2733             (match_operand:VDQSF 2 "register_operand"))))]
2734   "TARGET_SIMD"
2735   {
2736     rtx scratch = gen_reg_rtx (<MODE>mode);
2737     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2738     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2739     DONE;
2740   }
2743 (define_expand "aarch64_float_mla_lane<mode>"
2744   [(set (match_operand:VDQSF 0 "register_operand")
2745         (plus:VDQSF
2746           (mult:VDQSF
2747             (vec_duplicate:VDQSF
2748               (vec_select:<VEL>
2749                 (match_operand:V2SF 3 "register_operand")
2750                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2751             (match_operand:VDQSF 2 "register_operand"))
2752           (match_operand:VDQSF 1 "register_operand")))]
2753   "TARGET_SIMD"
2754   {
2755     rtx scratch = gen_reg_rtx (<MODE>mode);
2756     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2757                                     operands[3], operands[4]));
2758     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2759     DONE;
2760   }
2763 (define_expand "aarch64_float_mls_lane<mode>"
2764   [(set (match_operand:VDQSF 0 "register_operand")
2765         (minus:VDQSF
2766           (match_operand:VDQSF 1 "register_operand")
2767           (mult:VDQSF
2768             (vec_duplicate:VDQSF
2769               (vec_select:<VEL>
2770                 (match_operand:V2SF 3 "register_operand")
2771                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2772             (match_operand:VDQSF 2 "register_operand"))))]
2773   "TARGET_SIMD"
2774   {
2775     rtx scratch = gen_reg_rtx (<MODE>mode);
2776     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2777                                     operands[3], operands[4]));
2778     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2779     DONE;
2780   }
2783 (define_expand "aarch64_float_mla_laneq<mode>"
2784   [(set (match_operand:VDQSF 0 "register_operand")
2785         (plus:VDQSF
2786           (mult:VDQSF
2787             (vec_duplicate:VDQSF
2788               (vec_select:<VEL>
2789                 (match_operand:V4SF 3 "register_operand")
2790                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2791             (match_operand:VDQSF 2 "register_operand"))
2792           (match_operand:VDQSF 1 "register_operand")))]
2793   "TARGET_SIMD"
2794   {
2795     rtx scratch = gen_reg_rtx (<MODE>mode);
2796     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2797                                      operands[3], operands[4]));
2798     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2799     DONE;
2800   }
2803 (define_expand "aarch64_float_mls_laneq<mode>"
2804   [(set (match_operand:VDQSF 0 "register_operand")
2805         (minus:VDQSF
2806           (match_operand:VDQSF 1 "register_operand")
2807           (mult:VDQSF
2808             (vec_duplicate:VDQSF
2809               (vec_select:<VEL>
2810                 (match_operand:V4SF 3 "register_operand")
2811                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2812             (match_operand:VDQSF 2 "register_operand"))))]
2813   "TARGET_SIMD"
2814   {
2815     rtx scratch = gen_reg_rtx (<MODE>mode);
2816     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2817                                      operands[3], operands[4]));
2818     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2819     DONE;
2820   }
2823 (define_insn "fma<mode>4<vczle><vczbe>"
2824   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2825        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2826                   (match_operand:VHSDF 2 "register_operand" "w")
2827                   (match_operand:VHSDF 3 "register_operand" "0")))]
2828   "TARGET_SIMD"
2829  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2830   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2833 (define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
2834   [(set (match_operand:VDQF 0 "register_operand" "=w")
2835     (fma:VDQF
2836       (vec_duplicate:VDQF
2837         (vec_select:<VEL>
2838           (match_operand:VDQF 1 "register_operand" "<h_con>")
2839           (parallel [(match_operand:SI 2 "immediate_operand")])))
2840       (match_operand:VDQF 3 "register_operand" "w")
2841       (match_operand:VDQF 4 "register_operand" "0")))]
2842   "TARGET_SIMD"
2843   {
2844     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2845     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2846   }
2847   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2850 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2851   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2852     (fma:VDQSF
2853       (vec_duplicate:VDQSF
2854         (vec_select:<VEL>
2855           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2856           (parallel [(match_operand:SI 2 "immediate_operand")])))
2857       (match_operand:VDQSF 3 "register_operand" "w")
2858       (match_operand:VDQSF 4 "register_operand" "0")))]
2859   "TARGET_SIMD"
2860   {
2861     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2862     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2863   }
2864   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2867 (define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
2868   [(set (match_operand:VMUL 0 "register_operand" "=w")
2869     (fma:VMUL
2870       (vec_duplicate:VMUL
2871           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2872       (match_operand:VMUL 2 "register_operand" "w")
2873       (match_operand:VMUL 3 "register_operand" "0")))]
2874   "TARGET_SIMD"
2875   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2876   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2879 (define_insn "*aarch64_fma4_elt_to_64v2df"
2880   [(set (match_operand:DF 0 "register_operand" "=w")
2881     (fma:DF
2882         (vec_select:DF
2883           (match_operand:V2DF 1 "register_operand" "w")
2884           (parallel [(match_operand:SI 2 "immediate_operand")]))
2885       (match_operand:DF 3 "register_operand" "w")
2886       (match_operand:DF 4 "register_operand" "0")))]
2887   "TARGET_SIMD"
2888   {
2889     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2890     return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2891   }
2892   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2895 (define_insn "fnma<mode>4<vczle><vczbe>"
2896   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2897         (fma:VHSDF
2898           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2899           (match_operand:VHSDF 2 "register_operand" "w")
2900           (match_operand:VHSDF 3 "register_operand" "0")))]
2901   "TARGET_SIMD"
2902   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2903   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2906 (define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
2907   [(set (match_operand:VDQF 0 "register_operand" "=w")
2908     (fma:VDQF
2909       (neg:VDQF
2910         (match_operand:VDQF 3 "register_operand" "w"))
2911       (vec_duplicate:VDQF
2912         (vec_select:<VEL>
2913           (match_operand:VDQF 1 "register_operand" "<h_con>")
2914           (parallel [(match_operand:SI 2 "immediate_operand")])))
2915       (match_operand:VDQF 4 "register_operand" "0")))]
2916   "TARGET_SIMD"
2917   {
2918     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2919     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2920   }
2921   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2924 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2925   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2926     (fma:VDQSF
2927       (neg:VDQSF
2928         (match_operand:VDQSF 3 "register_operand" "w"))
2929       (vec_duplicate:VDQSF
2930         (vec_select:<VEL>
2931           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2932           (parallel [(match_operand:SI 2 "immediate_operand")])))
2933       (match_operand:VDQSF 4 "register_operand" "0")))]
2934   "TARGET_SIMD"
2935   {
2936     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2937     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2938   }
2939   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2942 (define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
2943   [(set (match_operand:VMUL 0 "register_operand" "=w")
2944     (fma:VMUL
2945       (neg:VMUL
2946         (match_operand:VMUL 2 "register_operand" "w"))
2947       (vec_duplicate:VMUL
2948         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2949       (match_operand:VMUL 3 "register_operand" "0")))]
2950   "TARGET_SIMD"
2951   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2952   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2955 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2956   [(set (match_operand:DF 0 "register_operand" "=w")
2957     (fma:DF
2958       (vec_select:DF
2959         (match_operand:V2DF 1 "register_operand" "w")
2960         (parallel [(match_operand:SI 2 "immediate_operand")]))
2961       (neg:DF
2962         (match_operand:DF 3 "register_operand" "w"))
2963       (match_operand:DF 4 "register_operand" "0")))]
2964   "TARGET_SIMD"
2965   {
2966     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2967     return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2968   }
2969   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2972 ;; Vector versions of the floating-point frint patterns.
2973 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2974 (define_insn "<frint_pattern><mode>2<vczle><vczbe>"
2975   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2976         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2977                        FRINT))]
2978   "TARGET_SIMD"
2979   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2980   [(set_attr "type" "neon_fp_round_<stype><q>")]
2983 ;; Vector versions of the fcvt standard patterns.
2984 ;; Expands to lbtrunc, lround, lceil, lfloor
2985 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2986   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2987         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2988                                [(match_operand:VHSDF 1 "register_operand" "w")]
2989                                FCVT)))]
2990   "TARGET_SIMD"
2991   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2992   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2995 ;; HF Scalar variants of related SIMD instructions.
2996 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2997   [(set (match_operand:HI 0 "register_operand" "=w")
2998         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2999                       FCVT)))]
3000   "TARGET_SIMD_F16INST"
3001   "fcvt<frint_suffix><su>\t%h0, %h1"
3002   [(set_attr "type" "neon_fp_to_int_s")]
3005 (define_insn "<optab>_trunchfhi2"
3006   [(set (match_operand:HI 0 "register_operand" "=w")
3007         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3008   "TARGET_SIMD_F16INST"
3009   "fcvtz<su>\t%h0, %h1"
3010   [(set_attr "type" "neon_fp_to_int_s")]
3013 (define_insn "<optab>hihf2"
3014   [(set (match_operand:HF 0 "register_operand" "=w")
3015         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3016   "TARGET_SIMD_F16INST"
3017   "<su_optab>cvtf\t%h0, %h1"
3018   [(set_attr "type" "neon_int_to_fp_s")]
3021 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3022   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3023         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3024                                [(mult:VDQF
3025          (match_operand:VDQF 1 "register_operand" "w")
3026          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3027                                UNSPEC_FRINTZ)))]
3028   "TARGET_SIMD
3029    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3030                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3031   {
3032     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3033     char buf[64];
3034     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3035     output_asm_insn (buf, operands);
3036     return "";
3037   }
3038   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3041 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3042   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3043         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3044                                [(match_operand:VHSDF 1 "register_operand")]
3045                                 UNSPEC_FRINTZ)))]
3046   "TARGET_SIMD"
3047   {})
3049 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3050   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3051         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3052                                [(match_operand:VHSDF 1 "register_operand")]
3053                                 UNSPEC_FRINTZ)))]
3054   "TARGET_SIMD"
3055   {})
3057 (define_expand "ftrunc<VHSDF:mode>2"
3058   [(set (match_operand:VHSDF 0 "register_operand")
3059         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3060                        UNSPEC_FRINTZ))]
3061   "TARGET_SIMD"
3062   {})
3064 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3065   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3066         (FLOATUORS:VHSDF
3067           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3068   "TARGET_SIMD"
3069   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3070   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3073 ;; Conversions between vectors of floats and doubles.
3074 ;; Contains a mix of patterns to match standard pattern names
3075 ;; and those for intrinsics.
3077 ;; Float widening operations.
3079 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3080   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3081         (float_extend:<VWIDE> (vec_select:<VHALF>
3082                                (match_operand:VQ_HSF 1 "register_operand" "w")
3083                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3084                             )))]
3085   "TARGET_SIMD"
3086   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3087   [(set_attr "type" "neon_fp_cvt_widen_s")]
3090 ;; Convert between fixed-point and floating-point (vector modes)
3092 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3093   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3094         (unspec:<VHSDF:FCVT_TARGET>
3095           [(match_operand:VHSDF 1 "register_operand" "w")
3096            (match_operand:SI 2 "immediate_operand" "i")]
3097          FCVT_F2FIXED))]
3098   "TARGET_SIMD"
3099   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3100   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3103 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3104   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3105         (unspec:<VDQ_HSDI:FCVT_TARGET>
3106           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3107            (match_operand:SI 2 "immediate_operand" "i")]
3108          FCVT_FIXED2F))]
3109   "TARGET_SIMD"
3110   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3111   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3114 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3115 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3116 ;; the meaning of HI and LO changes depending on the target endianness.
3117 ;; While elsewhere we map the higher numbered elements of a vector to
3118 ;; the lower architectural lanes of the vector, for these patterns we want
3119 ;; to always treat "hi" as referring to the higher architectural lanes.
3120 ;; Consequently, while the patterns below look inconsistent with our
3121 ;; other big-endian patterns their behavior is as required.
3123 (define_expand "vec_unpacks_lo_<mode>"
3124   [(match_operand:<VWIDE> 0 "register_operand")
3125    (match_operand:VQ_HSF 1 "register_operand")]
3126   "TARGET_SIMD"
3127   {
3128     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3129     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3130                                                        operands[1], p));
3131     DONE;
3132   }
3135 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3136   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3137         (float_extend:<VWIDE> (vec_select:<VHALF>
3138                                (match_operand:VQ_HSF 1 "register_operand" "w")
3139                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3140                             )))]
3141   "TARGET_SIMD"
3142   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3143   [(set_attr "type" "neon_fp_cvt_widen_s")]
3146 (define_expand "vec_unpacks_hi_<mode>"
3147   [(match_operand:<VWIDE> 0 "register_operand")
3148    (match_operand:VQ_HSF 1 "register_operand")]
3149   "TARGET_SIMD"
3150   {
3151     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3152     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3153                                                        operands[1], p));
3154     DONE;
3155   }
3157 (define_insn "aarch64_float_extend_lo_<Vwide>"
3158   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3159         (float_extend:<VWIDE>
3160           (match_operand:VDF 1 "register_operand" "w")))]
3161   "TARGET_SIMD"
3162   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3163   [(set_attr "type" "neon_fp_cvt_widen_s")]
3166 ;; Float narrowing operations.
3168 (define_insn "aarch64_float_trunc_rodd_df"
3169   [(set (match_operand:SF 0 "register_operand" "=w")
3170         (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3171                 UNSPEC_FCVTXN))]
3172   "TARGET_SIMD"
3173   "fcvtxn\\t%s0, %d1"
3174   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3177 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3178   [(set (match_operand:V2SF 0 "register_operand" "=w")
3179         (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3180                 UNSPEC_FCVTXN))]
3181   "TARGET_SIMD"
3182   "fcvtxn\\t%0.2s, %1.2d"
3183   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3186 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3187   [(set (match_operand:V4SF 0 "register_operand" "=w")
3188         (vec_concat:V4SF
3189           (match_operand:V2SF 1 "register_operand" "0")
3190           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3191                 UNSPEC_FCVTXN)))]
3192   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3193   "fcvtxn2\\t%0.4s, %2.2d"
3194   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3197 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3198   [(set (match_operand:V4SF 0 "register_operand" "=w")
3199         (vec_concat:V4SF
3200           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3201                 UNSPEC_FCVTXN)
3202           (match_operand:V2SF 1 "register_operand" "0")))]
3203   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3204   "fcvtxn2\\t%0.4s, %2.2d"
3205   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3208 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3209   [(match_operand:V4SF 0 "register_operand")
3210    (match_operand:V2SF 1 "register_operand")
3211    (match_operand:V2DF 2 "register_operand")]
3212   "TARGET_SIMD"
3214   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3215                              ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3216                              : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3217   emit_insn (gen (operands[0], operands[1], operands[2]));
3218   DONE;
3222 (define_insn "aarch64_float_truncate_lo_<mode><vczle><vczbe>"
3223   [(set (match_operand:VDF 0 "register_operand" "=w")
3224       (float_truncate:VDF
3225         (match_operand:<VWIDE> 1 "register_operand" "w")))]
3226   "TARGET_SIMD"
3227   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3228   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3231 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3232   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3233     (vec_concat:<VDBL>
3234       (match_operand:VDF 1 "register_operand" "0")
3235       (float_truncate:VDF
3236         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3237   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3238   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3239   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3242 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3243   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3244     (vec_concat:<VDBL>
3245       (float_truncate:VDF
3246         (match_operand:<VWIDE> 2 "register_operand" "w"))
3247       (match_operand:VDF 1 "register_operand" "0")))]
3248   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3249   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3250   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3253 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3254   [(match_operand:<VDBL> 0 "register_operand")
3255    (match_operand:VDF 1 "register_operand")
3256    (match_operand:<VWIDE> 2 "register_operand")]
3257   "TARGET_SIMD"
3259   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3260                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3261                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3262   emit_insn (gen (operands[0], operands[1], operands[2]));
3263   DONE;
3267 (define_expand "vec_pack_trunc_v2df"
3268   [(set (match_operand:V4SF 0 "register_operand")
3269       (vec_concat:V4SF
3270         (float_truncate:V2SF
3271             (match_operand:V2DF 1 "register_operand"))
3272         (float_truncate:V2SF
3273             (match_operand:V2DF 2 "register_operand"))
3274           ))]
3275   "TARGET_SIMD"
3276   {
3277     rtx tmp = gen_reg_rtx (V2SFmode);
3278     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3279     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3281     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
3282     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3283                                                    tmp, operands[hi]));
3284     DONE;
3285   }
3288 (define_expand "vec_pack_trunc_df"
3289   [(set (match_operand:V2SF 0 "register_operand")
3290         (vec_concat:V2SF
3291           (float_truncate:SF (match_operand:DF 1 "general_operand"))
3292           (float_truncate:SF (match_operand:DF 2 "general_operand"))))]
3293   "TARGET_SIMD"
3294   {
3295     rtx tmp = gen_reg_rtx (V2SFmode);
3296     emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2]));
3297     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
3298     DONE;
3299   }
3302 ;; FP Max/Min
3303 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
3304 ;; expression like:
3305 ;;      a = (b < c) ? b : c;
3306 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3307 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3308 ;; -ffast-math.
3310 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3311 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3312 ;; operand will be returned when both operands are zero (i.e. they may not
3313 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
3314 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3315 ;; NaNs.
3317 (define_insn "<su><maxmin><mode>3"
3318   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3319         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3320                        (match_operand:VHSDF 2 "register_operand" "w")))]
3321   "TARGET_SIMD"
3322   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3323   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3326 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3327 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3328 ;; which implement the IEEE fmax ()/fmin () functions.
3329 (define_insn "<fmaxmin><mode>3<vczle><vczbe>"
3330   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3331        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3332                       (match_operand:VHSDF 2 "register_operand" "w")]
3333                       FMAXMIN_UNS))]
3334   "TARGET_SIMD"
3335   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3336   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3339 ;; 'across lanes' add.
3341 (define_insn "aarch64_faddp<mode><vczle><vczbe>"
3342  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3343        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3344                       (match_operand:VHSDF 2 "register_operand" "w")]
3345         UNSPEC_FADDV))]
3346  "TARGET_SIMD"
3347  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3348   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3351 (define_insn "reduc_plus_scal_<mode>"
3352  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3353        (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
3354                     UNSPEC_ADDV))]
3355  "TARGET_SIMD"
3356  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3357   [(set_attr "type" "neon_reduc_add<q>")]
3360 (define_insn "reduc_plus_scal_v2si"
3361  [(set (match_operand:SI 0 "register_operand" "=w")
3362        (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
3363                     UNSPEC_ADDV))]
3364  "TARGET_SIMD"
3365  "addp\\t%0.2s, %1.2s, %1.2s"
3366   [(set_attr "type" "neon_reduc_add")]
3369 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3370 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3371  [(set (match_operand:GPI 0 "register_operand" "=w")
3372        (zero_extend:GPI
3373         (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3374                              UNSPEC_ADDV)))]
3375  "TARGET_SIMD"
3376  "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3377   [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3380 (define_insn "reduc_plus_scal_<mode>"
3381  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3382        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3383                    UNSPEC_FADDV))]
3384  "TARGET_SIMD"
3385  "faddp\\t%<Vetype>0, %1.<Vtype>"
3386   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3389 (define_expand "reduc_plus_scal_v4sf"
3390  [(set (match_operand:SF 0 "register_operand")
3391        (unspec:SF [(match_operand:V4SF 1 "register_operand")]
3392                     UNSPEC_FADDV))]
3393  "TARGET_SIMD"
3395   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3396   rtx scratch = gen_reg_rtx (V4SFmode);
3397   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3398   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3399   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3400   DONE;
3403 ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
3404 ;; sign or zero-extends its elements.
3405 (define_insn "aarch64_<su>addlv<mode>"
3406  [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3407        (unspec:<VWIDE_S>
3408          [(ANY_EXTEND:<V2XWIDE>
3409             (match_operand:VDQV_L 1 "register_operand" "w"))]
3410          UNSPEC_ADDV))]
3411  "TARGET_SIMD"
3412  "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3413   [(set_attr "type" "neon_reduc_add<q>")]
3416 ;; An ADDV over a vector PLUS of elements extracted and widened all from the
3417 ;; same vector is the same as an [SU]ADDLV above, so long as all the elements
3418 ;; of that vector are used.  We can greatly simplify the RTL expression using
3419 ;; this splitter.
3420 (define_insn_and_split "*aarch64_<su>addlv<mode>_reduction"
3421  [(set (match_operand:<VWIDE_S> 0 "register_operand")
3422        (unspec:<VWIDE_S>
3423          [(plus:<VDBLW>
3424             (vec_select:<VDBLW>
3425               (ANY_EXTEND:<V2XWIDE>
3426                 (match_operand:VDQV_L 1 "register_operand"))
3427               (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3428             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3429               (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half")))]
3430          UNSPEC_ADDV))]
3431  "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3432  "#"
3433  "&& 1"
3434   [(set (match_dup 0)
3435        (unspec:<VWIDE_S>
3436          [(ANY_EXTEND:<V2XWIDE>
3437             (match_dup 1))]
3438          UNSPEC_ADDV))]
3439   {}
3442 ;; Similar to the above but for two-step zero-widening reductions.
3443 ;; We can push the outer zero_extend outside the ADDV unspec and make
3444 ;; use of the implicit high-part zeroing semantics of UADDLV to do it all
3445 ;; in a single instruction.
3446 (define_insn_and_split "*aarch64_uaddlv<mode>_reduction_2"
3447  [(set (match_operand:<VWIDE2X_S> 0 "register_operand" "=w")
3448        (unspec:<VWIDE2X_S>
3449          [(zero_extend:<VQUADW>
3450             (plus:<VDBLW>
3451               (vec_select:<VDBLW>
3452                 (zero_extend:<V2XWIDE>
3453                   (match_operand:VDQQH 1 "register_operand" "w"))
3454                 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3455               (vec_select:<VDBLW> (zero_extend:<V2XWIDE> (match_dup 1))
3456                 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half"))))]
3457          UNSPEC_ADDV))]
3458  "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3459  "#"
3460  "&& 1"
3461   [(set (match_dup 0)
3462         (zero_extend:<VWIDE2X_S>
3463           (unspec:<VWIDE_S>
3464             [(zero_extend:<V2XWIDE>
3465                (match_dup 1))]
3466             UNSPEC_ADDV)))]
3467   {}
3470 ;; Zero-extending version of the above.  As these intrinsics produce a scalar
3471 ;; value that may be used by further intrinsics we want to avoid moving the
3472 ;; result into GP regs to do a zero-extension that ADDLV/ADDLP gives for free.
3474 (define_insn "*aarch64_<su>addlv<VDQV_L:mode>_ze<GPI:mode>"
3475  [(set (match_operand:GPI 0 "register_operand" "=w")
3476        (zero_extend:GPI
3477          (unspec:<VWIDE_S>
3478            [(ANY_EXTEND:<VDQV_L:V2XWIDE>
3479               (match_operand:VDQV_L 1 "register_operand" "w"))]
3480          UNSPEC_ADDV)))]
3481  "TARGET_SIMD
3482   && (GET_MODE_SIZE (<GPI:MODE>mode) > GET_MODE_SIZE (<VWIDE_S>mode))"
3483  "<su>addl<VDQV_L:vp>\\t%<VDQV_L:Vwstype>0<VDQV_L:Vwsuf>, %1.<VDQV_L:Vtype>"
3484   [(set_attr "type" "neon_reduc_add<VDQV_L:q>")]
3487 (define_expand "aarch64_<su>addlp<mode>"
3488   [(set (match_operand:<VDBLW> 0 "register_operand")
3489         (plus:<VDBLW>
3490           (vec_select:<VDBLW>
3491             (ANY_EXTEND:<V2XWIDE>
3492               (match_operand:VDQV_L 1 "register_operand"))
3493             (match_dup 2))
3494           (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3495             (match_dup 3))))]
3496  "TARGET_SIMD"
3498    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
3499    operands[2] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
3500    operands[3] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
3504 (define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn"
3505   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3506         (plus:<VDBLW>
3507           (vec_select:<VDBLW>
3508             (ANY_EXTEND:<V2XWIDE>
3509               (match_operand:VDQV_L 1 "register_operand" "w"))
3510             (match_operand:<V2XWIDE> 2 "vect_par_cnst_even_or_odd_half"))
3511           (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3512             (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half"))))]
3513  "TARGET_SIMD
3514   && !rtx_equal_p (operands[2], operands[3])"
3515  "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3516   [(set_attr "type" "neon_reduc_add<q>")]
3519 (define_insn "clrsb<mode>2<vczle><vczbe>"
3520   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3521         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3522   "TARGET_SIMD"
3523   "cls\\t%0.<Vtype>, %1.<Vtype>"
3524   [(set_attr "type" "neon_cls<q>")]
3527 (define_insn "clz<mode>2<vczle><vczbe>"
3528  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3529        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3530  "TARGET_SIMD"
3531  "clz\\t%0.<Vtype>, %1.<Vtype>"
3532   [(set_attr "type" "neon_cls<q>")]
3535 (define_insn "popcount<mode>2<vczle><vczbe>"
3536   [(set (match_operand:VB 0 "register_operand" "=w")
3537         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3538   "TARGET_SIMD"
3539   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3540   [(set_attr "type" "neon_cnt<q>")]
3543 ;; 'across lanes' max and min ops.
3545 ;; Template for outputting a scalar, so we can create __builtins which can be
3546 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
3547 (define_expand "reduc_<optab>_scal_<mode>"
3548   [(match_operand:<VEL> 0 "register_operand")
3549    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3550                  FMAXMINV)]
3551   "TARGET_SIMD"
3552   {
3553     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3554     rtx scratch = gen_reg_rtx (<MODE>mode);
3555     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3556                                                          operands[1]));
3557     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3558     DONE;
3559   }
3562 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3563   [(match_operand:<VEL> 0 "register_operand")
3564    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3565                  FMAXMINNMV)]
3566   "TARGET_SIMD"
3567   {
3568     emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3569     DONE;
3570   }
3573 ;; Likewise for integer cases, signed and unsigned.
3574 (define_expand "reduc_<optab>_scal_<mode>"
3575   [(match_operand:<VEL> 0 "register_operand")
3576    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3577                     MAXMINV)]
3578   "TARGET_SIMD"
3579   {
3580     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3581     rtx scratch = gen_reg_rtx (<MODE>mode);
3582     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3583                                                          operands[1]));
3584     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3585     DONE;
3586   }
3589 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3590  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3591        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3592                     MAXMINV))]
3593  "TARGET_SIMD"
3594  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3595   [(set_attr "type" "neon_reduc_minmax<q>")]
3598 (define_insn "aarch64_reduc_<optab>_internalv2si"
3599  [(set (match_operand:V2SI 0 "register_operand" "=w")
3600        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3601                     MAXMINV))]
3602  "TARGET_SIMD"
3603  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3604   [(set_attr "type" "neon_reduc_minmax")]
3607 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3608  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3609        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3610                       FMAXMINV))]
3611  "TARGET_SIMD"
3612  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3613   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3616 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3617 ;; allocation.
3618 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3619 ;; to select.
3621 ;; Thus our BSL is of the form:
3622 ;;   op0 = bsl (mask, op2, op3)
3623 ;; We can use any of:
3625 ;;   if (op0 = mask)
3626 ;;     bsl mask, op1, op2
3627 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3628 ;;     bit op0, op2, mask
3629 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3630 ;;     bif op0, op1, mask
3632 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3633 ;; Some forms of straight-line code may generate the equivalent form
3634 ;; in *aarch64_simd_bsl<mode>_alt.
3636 (define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
3637   [(set (match_operand:VDQ_I 0 "register_operand")
3638         (xor:VDQ_I
3639            (and:VDQ_I
3640              (xor:VDQ_I
3641                (match_operand:<V_INT_EQUIV> 3 "register_operand")
3642                (match_operand:VDQ_I 2 "register_operand"))
3643              (match_operand:VDQ_I 1 "register_operand"))
3644           (match_dup:<V_INT_EQUIV> 3)
3645         ))]
3646   "TARGET_SIMD"
3647   {@ [ cons: =0 , 1 , 2 , 3  ]
3648      [ w        , 0 , w , w  ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3649      [ w        , w , w , 0  ] bit\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3650      [ w        , w , 0 , w  ] bif\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3651   }
3652   [(set_attr "type" "neon_bsl<q>")]
3655 ;; We need this form in addition to the above pattern to match the case
3656 ;; when combine tries merging three insns such that the second operand of
3657 ;; the outer XOR matches the second operand of the inner XOR rather than
3658 ;; the first.  The two are equivalent but since recog doesn't try all
3659 ;; permutations of commutative operations, we have to have a separate pattern.
3661 (define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
3662   [(set (match_operand:VDQ_I 0 "register_operand")
3663         (xor:VDQ_I
3664            (and:VDQ_I
3665              (xor:VDQ_I
3666                (match_operand:VDQ_I 3 "register_operand")
3667                (match_operand:<V_INT_EQUIV> 2 "register_operand"))
3668               (match_operand:VDQ_I 1 "register_operand"))
3669           (match_dup:<V_INT_EQUIV> 2)))]
3670   "TARGET_SIMD"
3671   {@ [ cons: =0 , 1 , 2 , 3  ]
3672      [ w        , 0 , w , w  ] bsl\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3673      [ w        , w , 0 , w  ] bit\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3674      [ w        , w , w , 0  ] bif\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3675   }
3676   [(set_attr "type" "neon_bsl<q>")]
3679 ;; DImode is special, we want to avoid computing operations which are
3680 ;; more naturally computed in general purpose registers in the vector
3681 ;; registers.  If we do that, we need to move all three operands from general
3682 ;; purpose registers to vector registers, then back again.  However, we
3683 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3684 ;; optimizations based on the component operations of a BSL.
3686 ;; That means we need a splitter back to the individual operations, if they
3687 ;; would be better calculated on the integer side.
3689 (define_insn_and_split "aarch64_simd_bsldi_internal"
3690   [(set (match_operand:DI 0 "register_operand")
3691         (xor:DI
3692            (and:DI
3693              (xor:DI
3694                (match_operand:DI 3 "register_operand")
3695                (match_operand:DI 2 "register_operand"))
3696              (match_operand:DI 1 "register_operand"))
3697           (match_dup:DI 3)
3698         ))]
3699   "TARGET_SIMD"
3700   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3701      [ w        , 0 , w , w ; neon_bsl    , 4      ] bsl\t%0.8b, %2.8b, %3.8b
3702      [ w        , w , w , 0 ; neon_bsl    , 4      ] bit\t%0.8b, %2.8b, %1.8b
3703      [ w        , w , 0 , w ; neon_bsl    , 4      ] bif\t%0.8b, %3.8b, %1.8b
3704      [ &r       , r , r , r ; multiple    , 12     ] #
3705   }
3706   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3707   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3709   /* Split back to individual operations.  If we're before reload, and
3710      able to create a temporary register, do so.  If we're after reload,
3711      we've got an early-clobber destination register, so use that.
3712      Otherwise, we can't create pseudos and we can't yet guarantee that
3713      operands[0] is safe to write, so FAIL to split.  */
3715   rtx scratch;
3716   if (reload_completed)
3717     scratch = operands[0];
3718   else if (can_create_pseudo_p ())
3719     scratch = gen_reg_rtx (DImode);
3720   else
3721     FAIL;
3723   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3724   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3725   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3726   DONE;
3730 (define_insn_and_split "aarch64_simd_bsldi_alt"
3731   [(set (match_operand:DI 0 "register_operand")
3732         (xor:DI
3733            (and:DI
3734              (xor:DI
3735                (match_operand:DI 3 "register_operand")
3736                (match_operand:DI 2 "register_operand"))
3737              (match_operand:DI 1 "register_operand"))
3738           (match_dup:DI 2)
3739         ))]
3740   "TARGET_SIMD"
3741   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3742      [ w        , 0 , w , w ; neon_bsl    , 4      ] bsl\t%0.8b, %3.8b, %2.8b
3743      [ w        , w , 0 , w ; neon_bsl    , 4      ] bit\t%0.8b, %3.8b, %1.8b
3744      [ w        , w , w , 0 ; neon_bsl    , 4      ] bif\t%0.8b, %2.8b, %1.8b
3745      [ &r       , r , r , r ; multiple    , 12     ] #
3746   }
3747   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3748   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3750   /* Split back to individual operations.  If we're before reload, and
3751      able to create a temporary register, do so.  If we're after reload,
3752      we've got an early-clobber destination register, so use that.
3753      Otherwise, we can't create pseudos and we can't yet guarantee that
3754      operands[0] is safe to write, so FAIL to split.  */
3756   rtx scratch;
3757   if (reload_completed)
3758     scratch = operands[0];
3759   else if (can_create_pseudo_p ())
3760     scratch = gen_reg_rtx (DImode);
3761   else
3762     FAIL;
3764   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3765   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3766   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3767   DONE;
3771 (define_expand "aarch64_simd_bsl<mode>"
3772   [(match_operand:VALLDIF 0 "register_operand")
3773    (match_operand:<V_INT_EQUIV> 1 "register_operand")
3774    (match_operand:VALLDIF 2 "register_operand")
3775    (match_operand:VALLDIF 3 "register_operand")]
3776  "TARGET_SIMD"
3778   /* We can't alias operands together if they have different modes.  */
3779   rtx tmp = operands[0];
3780   if (FLOAT_MODE_P (<MODE>mode))
3781     {
3782       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3783       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3784       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3785     }
3786   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3787   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3788                                                          operands[1],
3789                                                          operands[2],
3790                                                          operands[3]));
3791   if (tmp != operands[0])
3792     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3794   DONE;
3797 (define_expand "vcond_mask_<mode><v_int_equiv>"
3798   [(match_operand:VALLDI 0 "register_operand")
3799    (match_operand:VALLDI 1 "nonmemory_operand")
3800    (match_operand:VALLDI 2 "nonmemory_operand")
3801    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3802   "TARGET_SIMD"
3804   /* If we have (a = (P) ? -1 : 0);
3805      Then we can simply move the generated mask (result must be int).  */
3806   if (operands[1] == CONSTM1_RTX (<MODE>mode)
3807       && operands[2] == CONST0_RTX (<MODE>mode))
3808     emit_move_insn (operands[0], operands[3]);
3809   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
3810   else if (operands[1] == CONST0_RTX (<MODE>mode)
3811            && operands[2] == CONSTM1_RTX (<MODE>mode))
3812     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3813   else
3814     {
3815       if (!REG_P (operands[1]))
3816         operands[1] = force_reg (<MODE>mode, operands[1]);
3817       if (!REG_P (operands[2]))
3818         operands[2] = force_reg (<MODE>mode, operands[2]);
3819       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3820                                              operands[1], operands[2]));
3821     }
3823   DONE;
3826 ;; Patterns comparing two vectors and conditionally jump
3828 (define_expand "cbranch<mode>4"
3829   [(set (pc)
3830         (if_then_else
3831           (match_operator 0 "aarch64_equality_operator"
3832             [(match_operand:VDQ_I 1 "register_operand")
3833              (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")])
3834           (label_ref (match_operand 3 ""))
3835           (pc)))]
3836   "TARGET_SIMD"
3838   auto code = GET_CODE (operands[0]);
3839   rtx tmp = operands[1];
3841   /* If comparing against a non-zero vector we have to do a comparison first
3842      so we can have a != 0 comparison with the result.  */
3843   if (operands[2] != CONST0_RTX (<MODE>mode))
3844     {
3845       tmp = gen_reg_rtx (<MODE>mode);
3846       emit_insn (gen_xor<mode>3 (tmp, operands[1], operands[2]));
3847     }
3849   /* For 64-bit vectors we need no reductions.  */
3850   if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
3851     {
3852       /* Always reduce using a V4SI.  */
3853       rtx reduc = gen_lowpart (V4SImode, tmp);
3854       rtx res = gen_reg_rtx (V4SImode);
3855       emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
3856       emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
3857     }
3859   rtx val = gen_reg_rtx (DImode);
3860   emit_move_insn (val, gen_lowpart (DImode, tmp));
3862   rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx);
3863   rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx);
3864   emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));
3865   DONE;
3868 ;; Patterns comparing two vectors to produce a mask.
3870 (define_expand "vec_cmp<mode><mode>"
3871   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3872           (match_operator 1 "comparison_operator"
3873             [(match_operand:VSDQ_I_DI 2 "register_operand")
3874              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3875   "TARGET_SIMD"
3877   rtx mask = operands[0];
3878   enum rtx_code code = GET_CODE (operands[1]);
3880   switch (code)
3881     {
3882     case NE:
3883     case LE:
3884     case LT:
3885     case GE:
3886     case GT:
3887     case EQ:
3888       if (operands[3] == CONST0_RTX (<MODE>mode))
3889         break;
3891       /* Fall through.  */
3892     default:
3893       if (!REG_P (operands[3]))
3894         operands[3] = force_reg (<MODE>mode, operands[3]);
3896       break;
3897     }
3899   switch (code)
3900     {
3901     case LT:
3902       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3903       break;
3905     case GE:
3906       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3907       break;
3909     case LE:
3910       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3911       break;
3913     case GT:
3914       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
3915       break;
3917     case LTU:
3918       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
3919       break;
3921     case GEU:
3922       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
3923       break;
3925     case LEU:
3926       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
3927       break;
3929     case GTU:
3930       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
3931       break;
3933     case NE:
3934       /* Handle NE as !EQ.  */
3935       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3936       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
3937       break;
3939     case EQ:
3940       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3941       break;
3943     default:
3944       gcc_unreachable ();
3945     }
3947   DONE;
3950 (define_expand "vec_cmp<mode><v_int_equiv>"
3951   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3952         (match_operator 1 "comparison_operator"
3953             [(match_operand:VDQF 2 "register_operand")
3954              (match_operand:VDQF 3 "nonmemory_operand")]))]
3955   "TARGET_SIMD"
3957   int use_zero_form = 0;
3958   enum rtx_code code = GET_CODE (operands[1]);
3959   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3961   rtx (*comparison) (rtx, rtx, rtx) = NULL;
3963   switch (code)
3964     {
3965     case LE:
3966     case LT:
3967     case GE:
3968     case GT:
3969     case EQ:
3970       if (operands[3] == CONST0_RTX (<MODE>mode))
3971         {
3972           use_zero_form = 1;
3973           break;
3974         }
3975       /* Fall through.  */
3976     default:
3977       if (!REG_P (operands[3]))
3978         operands[3] = force_reg (<MODE>mode, operands[3]);
3980       break;
3981     }
3983   switch (code)
3984     {
3985     case LT:
3986       if (use_zero_form)
3987         {
3988           comparison = gen_aarch64_cmlt<mode>;
3989           break;
3990         }
3991       /* Fall through.  */
3992     case UNLT:
3993       std::swap (operands[2], operands[3]);
3994       /* Fall through.  */
3995     case UNGT:
3996     case GT:
3997       comparison = gen_aarch64_cmgt<mode>;
3998       break;
3999     case LE:
4000       if (use_zero_form)
4001         {
4002           comparison = gen_aarch64_cmle<mode>;
4003           break;
4004         }
4005       /* Fall through.  */
4006     case UNLE:
4007       std::swap (operands[2], operands[3]);
4008       /* Fall through.  */
4009     case UNGE:
4010     case GE:
4011       comparison = gen_aarch64_cmge<mode>;
4012       break;
4013     case NE:
4014     case EQ:
4015       comparison = gen_aarch64_cmeq<mode>;
4016       break;
4017     case UNEQ:
4018     case ORDERED:
4019     case UNORDERED:
4020     case LTGT:
4021       break;
4022     default:
4023       gcc_unreachable ();
4024     }
4026   switch (code)
4027     {
4028     case UNGE:
4029     case UNGT:
4030     case UNLE:
4031     case UNLT:
4032       {
4033         /* All of the above must not raise any FP exceptions.  Thus we first
4034            check each operand for NaNs and force any elements containing NaN to
4035            zero before using them in the compare.
4036            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4037                                      (cm<cc> (isnan (a) ? 0.0 : a,
4038                                               isnan (b) ? 0.0 : b))
4039            We use the following transformations for doing the comparisions:
4040            a UNGE b -> a GE b
4041            a UNGT b -> a GT b
4042            a UNLE b -> b GE a
4043            a UNLT b -> b GT a.  */
4045         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4046         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4047         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4048         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4049         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4050         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4051         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4052                                           lowpart_subreg (<V_INT_EQUIV>mode,
4053                                                           operands[2],
4054                                                           <MODE>mode)));
4055         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4056                                           lowpart_subreg (<V_INT_EQUIV>mode,
4057                                                           operands[3],
4058                                                           <MODE>mode)));
4059         gcc_assert (comparison != NULL);
4060         emit_insn (comparison (operands[0],
4061                                lowpart_subreg (<MODE>mode,
4062                                                tmp0, <V_INT_EQUIV>mode),
4063                                lowpart_subreg (<MODE>mode,
4064                                                tmp1, <V_INT_EQUIV>mode)));
4065         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
4066       }
4067       break;
4069     case LT:
4070     case LE:
4071     case GT:
4072     case GE:
4073     case EQ:
4074     case NE:
4075       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
4076          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
4077          a GE b -> a GE b
4078          a GT b -> a GT b
4079          a LE b -> b GE a
4080          a LT b -> b GT a
4081          a EQ b -> a EQ b
4082          a NE b -> ~(a EQ b)  */
4083       gcc_assert (comparison != NULL);
4084       emit_insn (comparison (operands[0], operands[2], operands[3]));
4085       if (code == NE)
4086         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4087       break;
4089     case LTGT:
4090       /* LTGT is not guranteed to not generate a FP exception.  So let's
4091          go the faster way : ((a > b) || (b > a)).  */
4092       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4093                                          operands[2], operands[3]));
4094       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4095       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4096       break;
4098     case ORDERED:
4099     case UNORDERED:
4100     case UNEQ:
4101       /* cmeq (a, a) & cmeq (b, b).  */
4102       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4103                                          operands[2], operands[2]));
4104       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4105       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4107       if (code == UNORDERED)
4108         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4109       else if (code == UNEQ)
4110         {
4111           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4112           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
4113         }
4114       break;
4116     default:
4117       gcc_unreachable ();
4118     }
4120   DONE;
4123 (define_expand "vec_cmpu<mode><mode>"
4124   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4125           (match_operator 1 "comparison_operator"
4126             [(match_operand:VSDQ_I_DI 2 "register_operand")
4127              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4128   "TARGET_SIMD"
4130   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4131                                       operands[2], operands[3]));
4132   DONE;
4135 (define_expand "vcond<mode><mode>"
4136   [(set (match_operand:VALLDI 0 "register_operand")
4137         (if_then_else:VALLDI
4138           (match_operator 3 "comparison_operator"
4139             [(match_operand:VALLDI 4 "register_operand")
4140              (match_operand:VALLDI 5 "nonmemory_operand")])
4141           (match_operand:VALLDI 1 "nonmemory_operand")
4142           (match_operand:VALLDI 2 "nonmemory_operand")))]
4143   "TARGET_SIMD"
4145   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4146   enum rtx_code code = GET_CODE (operands[3]);
4148   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4149      it as well as switch operands 1/2 in order to avoid the additional
4150      NOT instruction.  */
4151   if (code == NE)
4152     {
4153       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4154                                     operands[4], operands[5]);
4155       std::swap (operands[1], operands[2]);
4156     }
4157   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4158                                              operands[4], operands[5]));
4159   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4160                                                  operands[2], mask));
4162   DONE;
4165 (define_expand "vcond<v_cmp_mixed><mode>"
4166   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
4167         (if_then_else:<V_cmp_mixed>
4168           (match_operator 3 "comparison_operator"
4169             [(match_operand:VDQF_COND 4 "register_operand")
4170              (match_operand:VDQF_COND 5 "nonmemory_operand")])
4171           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
4172           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
4173   "TARGET_SIMD"
4175   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4176   enum rtx_code code = GET_CODE (operands[3]);
4178   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4179      it as well as switch operands 1/2 in order to avoid the additional
4180      NOT instruction.  */
4181   if (code == NE)
4182     {
4183       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4184                                     operands[4], operands[5]);
4185       std::swap (operands[1], operands[2]);
4186     }
4187   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4188                                              operands[4], operands[5]));
4189   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
4190                                                 operands[0], operands[1],
4191                                                 operands[2], mask));
4193   DONE;
4196 (define_expand "vcondu<mode><mode>"
4197   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4198         (if_then_else:VSDQ_I_DI
4199           (match_operator 3 "comparison_operator"
4200             [(match_operand:VSDQ_I_DI 4 "register_operand")
4201              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
4202           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
4203           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
4204   "TARGET_SIMD"
4206   rtx mask = gen_reg_rtx (<MODE>mode);
4207   enum rtx_code code = GET_CODE (operands[3]);
4209   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4210      it as well as switch operands 1/2 in order to avoid the additional
4211      NOT instruction.  */
4212   if (code == NE)
4213     {
4214       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4215                                     operands[4], operands[5]);
4216       std::swap (operands[1], operands[2]);
4217     }
4218   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
4219                                       operands[4], operands[5]));
4220   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4221                                                  operands[2], mask));
4222   DONE;
4225 (define_expand "vcondu<mode><v_cmp_mixed>"
4226   [(set (match_operand:VDQF 0 "register_operand")
4227         (if_then_else:VDQF
4228           (match_operator 3 "comparison_operator"
4229             [(match_operand:<V_cmp_mixed> 4 "register_operand")
4230              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
4231           (match_operand:VDQF 1 "nonmemory_operand")
4232           (match_operand:VDQF 2 "nonmemory_operand")))]
4233   "TARGET_SIMD"
4235   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4236   enum rtx_code code = GET_CODE (operands[3]);
4238   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4239      it as well as switch operands 1/2 in order to avoid the additional
4240      NOT instruction.  */
4241   if (code == NE)
4242     {
4243       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4244                                     operands[4], operands[5]);
4245       std::swap (operands[1], operands[2]);
4246     }
4247   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
4248                                                   mask, operands[3],
4249                                                   operands[4], operands[5]));
4250   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4251                                                  operands[2], mask));
4252   DONE;
4255 ;; Patterns for AArch64 SIMD Intrinsics.
4257 ;; Lane extraction with sign extension to general purpose register.
4258 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4259   [(set (match_operand:GPI 0 "register_operand" "=r")
4260         (sign_extend:GPI
4261           (vec_select:<VDQQH:VEL>
4262             (match_operand:VDQQH 1 "register_operand" "w")
4263             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4264   "TARGET_SIMD"
4265   {
4266     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4267                                            INTVAL (operands[2]));
4268     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4269   }
4270   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4273 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4274   [(set (match_operand:GPI 0 "register_operand" "=r")
4275         (zero_extend:GPI
4276           (vec_select:<VDQQH:VEL>
4277             (match_operand:VDQQH 1 "register_operand" "w")
4278             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4279   "TARGET_SIMD"
4280   {
4281     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4282                                            INTVAL (operands[2]));
4283     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4284   }
4285   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4288 ;; Lane extraction of a value, neither sign nor zero extension
4289 ;; is guaranteed so upper bits should be considered undefined.
4290 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4291 ;; Extracting lane zero is split into a simple move when it is between SIMD
4292 ;; registers or a store.
4293 (define_insn_and_split "aarch64_get_lane<mode>"
4294   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4295         (vec_select:<VEL>
4296           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4297           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4298   "TARGET_SIMD"
4299   {
4300     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4301     switch (which_alternative)
4302       {
4303         case 0:
4304           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4305         case 1:
4306           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4307         case 2:
4308           return "st1\\t{%1.<Vetype>}[%2], %0";
4309         default:
4310           gcc_unreachable ();
4311       }
4312   }
4313  "&& reload_completed
4314   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4315  [(set (match_dup 0) (match_dup 1))]
4317    operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4319   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4322 (define_insn "*aarch64_get_high<mode>"
4323   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
4324         (vec_select:<VEL>
4325           (match_operand:VQ_2E 1 "register_operand" "w")
4326           (parallel [(match_operand:SI 2 "immediate_operand")])))]
4327   "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
4328   "fmov\t%0, %1.d[1]"
4329   [(set_attr "type" "f_mrc")]
4332 (define_insn "load_pair_lanes<mode>"
4333   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4334         (vec_concat:<VDBL>
4335            (match_operand:VDCSIF 1 "memory_operand" "Utq")
4336            (match_operand:VDCSIF 2 "memory_operand" "m")))]
4337   "TARGET_FLOAT
4338    && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
4339   "ldr\\t%<single_dtype>0, %1"
4340   [(set_attr "type" "neon_load1_1reg<dblq>")]
4343 ;; This STP pattern is a partial duplicate of the general vec_concat patterns
4344 ;; below.  The reason for having both of them is that the alternatives of
4345 ;; the later patterns do not have consistent register preferences: the STP
4346 ;; alternatives have no preference between GPRs and FPRs (and if anything,
4347 ;; the GPR form is more natural for scalar integers) whereas the other
4348 ;; alternatives *require* an FPR for operand 1 and prefer one for operand 2.
4350 ;; Using "*" to hide the STP alternatives from the RA penalizes cases in
4351 ;; which the destination was always memory.  On the other hand, expressing
4352 ;; the true preferences makes GPRs seem more palatable than they really are
4353 ;; for register destinations.
4355 ;; Despite that, we do still want the general form to have STP alternatives,
4356 ;; in order to handle cases where a register destination is spilled.
4358 ;; The best compromise therefore seemed to be to have a dedicated STP
4359 ;; pattern to catch cases in which the destination was always memory.
4360 ;; This dedicated pattern must come first.
4362 (define_insn "store_pair_lanes<mode>"
4363   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand")
4364         (vec_concat:<VDBL>
4365            (match_operand:VDCSIF 1 "register_operand")
4366            (match_operand:VDCSIF 2 "register_operand")))]
4367   "TARGET_FLOAT"
4368   {@ [ cons: =0 , 1 , 2 ; attrs: type ]
4369      [ Umn      , w , w ; neon_stp    ] stp\t%<single_type>1, %<single_type>2, %y0
4370      [ Umn      , r , r ; store_16    ] stp\t%<single_wx>1, %<single_wx>2, %y0
4371   }
4374 ;; Form a vector whose least significant half comes from operand 1 and whose
4375 ;; most significant half comes from operand 2.  The register alternatives
4376 ;; tie the least significant half to the same register as the destination,
4377 ;; so that only the other half needs to be handled explicitly.  For the
4378 ;; reasons given above, the STP alternatives use ? for constraints that
4379 ;; the register alternatives either don't accept or themselves disparage.
4381 (define_insn "*aarch64_combine_internal<mode>"
4382   [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4383         (vec_concat:<VDBL>
4384           (match_operand:VDCSIF 1 "register_operand")
4385           (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")))]
4386   "TARGET_FLOAT
4387    && !BYTES_BIG_ENDIAN
4388    && (register_operand (operands[0], <VDBL>mode)
4389        || register_operand (operands[2], <MODE>mode))"
4390   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
4391      [ w        , 0  , w   ; neon_ins<dblq>            , simd  ] ins\t%0.<single_type>[1], %2.<single_type>[0]
4392      [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ] ins\t%0.<single_type>[1], %<single_wx>2
4393      [ w        , 0  , ?r  ; f_mcr                     , *     ] fmov\t%0.d[1], %2
4394      [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ] ld1\t{%0.<single_type>}[1], %2
4395      [ Umn      , ?w , w   ; neon_stp                  , *     ] stp\t%<single_type>1, %<single_type>2, %y0
4396      [ Umn      , ?r , ?r  ; store_16                  , *     ] stp\t%<single_wx>1, %<single_wx>2, %y0
4397   }
4400 (define_insn "*aarch64_combine_internal_be<mode>"
4401   [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4402         (vec_concat:<VDBL>
4403           (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")
4404           (match_operand:VDCSIF 1 "register_operand")))]
4405   "TARGET_FLOAT
4406    && BYTES_BIG_ENDIAN
4407    && (register_operand (operands[0], <VDBL>mode)
4408        || register_operand (operands[2], <MODE>mode))"
4409   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
4410      [ w        , 0  , w   ; neon_ins<dblq>            , simd  ] ins\t%0.<single_type>[1], %2.<single_type>[0]
4411      [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ] ins\t%0.<single_type>[1], %<single_wx>2
4412      [ w        , 0  , ?r  ; f_mcr                     , *     ] fmov\t%0.d[1], %2
4413      [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ] ld1\t{%0.<single_type>}[1], %2
4414      [ Umn      , ?w , ?w  ; neon_stp                  , *     ] stp\t%<single_type>2, %<single_type>1, %y0
4415      [ Umn      , ?r , ?r  ; store_16                  , *     ] stp\t%<single_wx>2, %<single_wx>1, %y0
4416   }
4419 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4420 ;; dest vector.
4422 (define_insn "*aarch64_combinez<mode>"
4423   [(set (match_operand:<VDBL> 0 "register_operand")
4424         (vec_concat:<VDBL>
4425           (match_operand:VDCSIF 1 "nonimmediate_operand")
4426           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
4427   "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
4428   {@ [ cons: =0 , 1  ; attrs: type      ]
4429      [ w        , w  ; neon_move<q>     ] fmov\t%<single_type>0, %<single_type>1
4430      [ w        , ?r ; neon_from_gp     ] fmov\t%<single_type>0, %<single_wx>1
4431      [ w        , m  ; neon_load1_1reg  ] ldr\t%<single_type>0, %1
4432   }
4435 (define_insn "*aarch64_combinez_be<mode>"
4436   [(set (match_operand:<VDBL> 0 "register_operand")
4437         (vec_concat:<VDBL>
4438           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
4439           (match_operand:VDCSIF 1 "nonimmediate_operand")))]
4440   "TARGET_FLOAT && BYTES_BIG_ENDIAN"
4441   {@ [ cons: =0 , 1  ; attrs: type      ]
4442      [ w        , w  ; neon_move<q>     ] fmov\t%<single_type>0, %<single_type>1
4443      [ w        , ?r ; neon_from_gp     ] fmov\t%<single_type>0, %<single_wx>1
4444      [ w        , m  ; neon_load1_1reg  ] ldr\t%<single_type>0, %1
4445   }
4448 ;; Form a vector whose first half (in array order) comes from operand 1
4449 ;; and whose second half (in array order) comes from operand 2.
4450 ;; This operand order follows the RTL vec_concat operation.
4451 (define_expand "@aarch64_vec_concat<mode>"
4452   [(set (match_operand:<VDBL> 0 "register_operand")
4453         (vec_concat:<VDBL>
4454           (match_operand:VDCSIF 1 "general_operand")
4455           (match_operand:VDCSIF 2 "general_operand")))]
4456   "TARGET_FLOAT"
4458   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
4459   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
4461   if (MEM_P (operands[1])
4462       && MEM_P (operands[2])
4463       && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
4464     /* Use load_pair_lanes<mode>.  */
4465     ;
4466   else if (operands[hi] == CONST0_RTX (<MODE>mode))
4467     {
4468       /* Use *aarch64_combinez<mode>.  */
4469       if (!nonimmediate_operand (operands[lo], <MODE>mode))
4470         operands[lo] = force_reg (<MODE>mode, operands[lo]);
4471     }
4472   else
4473     {
4474       /* Use *aarch64_combine_internal<mode>.  */
4475       operands[lo] = force_reg (<MODE>mode, operands[lo]);
4476       if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
4477         {
4478           if (MEM_P (operands[hi]))
4479             {
4480               rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
4481               operands[hi] = replace_equiv_address (operands[hi], addr);
4482             }
4483           else
4484             operands[hi] = force_reg (<MODE>mode, operands[hi]);
4485         }
4486     }
4489 ;; Form a vector whose least significant half comes from operand 1 and whose
4490 ;; most significant half comes from operand 2.  This operand order follows
4491 ;; arm_neon.h vcombine* intrinsics.
4492 (define_expand "aarch64_combine<mode>"
4493   [(match_operand:<VDBL> 0 "register_operand")
4494    (match_operand:VDC 1 "general_operand")
4495    (match_operand:VDC 2 "general_operand")]
4496   "TARGET_FLOAT"
4498   if (BYTES_BIG_ENDIAN)
4499     std::swap (operands[1], operands[2]);
4500   emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
4501                                            operands[2]));
4502   DONE;
4506 ;; <su><addsub>l<q>.
4508 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4509  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4510        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4511                            (match_operand:VQW 1 "register_operand" "w")
4512                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4513                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4514                            (match_operand:VQW 2 "register_operand" "w")
4515                            (match_dup 3)))))]
4516   "TARGET_SIMD"
4517   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4518   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4521 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4522  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4523        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4524                            (match_operand:VQW 1 "register_operand" "w")
4525                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4526                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4527                            (match_operand:VQW 2 "register_operand" "w")
4528                            (match_dup 3)))))]
4529   "TARGET_SIMD"
4530   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4531   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4534 (define_expand "vec_widen_<su>add_lo_<mode>"
4535   [(match_operand:<VWIDE> 0 "register_operand")
4536    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4537    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4538   "TARGET_SIMD"
4540   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4541   emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4542                                                      operands[2], p));
4543   DONE;
4546 (define_expand "vec_widen_<su>add_hi_<mode>"
4547   [(match_operand:<VWIDE> 0 "register_operand")
4548    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4549    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4550   "TARGET_SIMD"
4552   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4553   emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4554                                                      operands[2], p));
4555   DONE;
4558 (define_expand "vec_widen_<su>sub_lo_<mode>"
4559   [(match_operand:<VWIDE> 0 "register_operand")
4560    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4561    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4562   "TARGET_SIMD"
4564   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4565   emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4566                                                      operands[2], p));
4567   DONE;
4570 (define_expand "vec_widen_<su>sub_hi_<mode>"
4571   [(match_operand:<VWIDE> 0 "register_operand")
4572    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4573    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4574   "TARGET_SIMD"
4576   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4577   emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4578                                                      operands[2], p));
4579   DONE;
4582 (define_expand "aarch64_saddl2<mode>"
4583   [(match_operand:<VWIDE> 0 "register_operand")
4584    (match_operand:VQW 1 "register_operand")
4585    (match_operand:VQW 2 "register_operand")]
4586   "TARGET_SIMD"
4588   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4589   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4590                                                   operands[2], p));
4591   DONE;
4594 (define_expand "aarch64_uaddl2<mode>"
4595   [(match_operand:<VWIDE> 0 "register_operand")
4596    (match_operand:VQW 1 "register_operand")
4597    (match_operand:VQW 2 "register_operand")]
4598   "TARGET_SIMD"
4600   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4601   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4602                                                   operands[2], p));
4603   DONE;
4606 (define_expand "aarch64_ssubl2<mode>"
4607   [(match_operand:<VWIDE> 0 "register_operand")
4608    (match_operand:VQW 1 "register_operand")
4609    (match_operand:VQW 2 "register_operand")]
4610   "TARGET_SIMD"
4612   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4613   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4614                                                 operands[2], p));
4615   DONE;
4618 (define_expand "aarch64_usubl2<mode>"
4619   [(match_operand:<VWIDE> 0 "register_operand")
4620    (match_operand:VQW 1 "register_operand")
4621    (match_operand:VQW 2 "register_operand")]
4622   "TARGET_SIMD"
4624   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4625   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4626                                                 operands[2], p));
4627   DONE;
4630 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4631  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4632        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4633                            (match_operand:VD_BHSI 1 "register_operand" "w"))
4634                        (ANY_EXTEND:<VWIDE>
4635                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4636   "TARGET_SIMD"
4637   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4638   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4641 ;; <su><addsub>w<q>.
4643 (define_expand "widen_ssum<mode>3"
4644   [(set (match_operand:<VDBLW> 0 "register_operand")
4645         (plus:<VDBLW> (sign_extend:<VDBLW> 
4646                         (match_operand:VQW 1 "register_operand"))
4647                       (match_operand:<VDBLW> 2 "register_operand")))]
4648   "TARGET_SIMD"
4649   {
4650     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4651     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4653     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4654                                                 operands[1], p));
4655     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4656     DONE;
4657   }
4660 (define_expand "widen_ssum<mode>3"
4661   [(set (match_operand:<VWIDE> 0 "register_operand")
4662         (plus:<VWIDE> (sign_extend:<VWIDE>
4663                         (match_operand:VD_BHSI 1 "register_operand"))
4664                       (match_operand:<VWIDE> 2 "register_operand")))]
4665   "TARGET_SIMD"
4667   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4668   DONE;
4671 (define_expand "widen_usum<mode>3"
4672   [(set (match_operand:<VDBLW> 0 "register_operand")
4673         (plus:<VDBLW> (zero_extend:<VDBLW> 
4674                         (match_operand:VQW 1 "register_operand"))
4675                       (match_operand:<VDBLW> 2 "register_operand")))]
4676   "TARGET_SIMD"
4677   {
4678     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4679     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4681     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4682                                                  operands[1], p));
4683     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4684     DONE;
4685   }
4688 (define_expand "widen_usum<mode>3"
4689   [(set (match_operand:<VWIDE> 0 "register_operand")
4690         (plus:<VWIDE> (zero_extend:<VWIDE>
4691                         (match_operand:VD_BHSI 1 "register_operand"))
4692                       (match_operand:<VWIDE> 2 "register_operand")))]
4693   "TARGET_SIMD"
4695   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4696   DONE;
4699 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4700   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4701         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4702           (ANY_EXTEND:<VWIDE>
4703             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4704   "TARGET_SIMD"
4705   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4706   [(set_attr "type" "neon_sub_widen")]
4709 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4710   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4711         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4712           (ANY_EXTEND:<VWIDE>
4713             (vec_select:<VHALF>
4714               (match_operand:VQW 2 "register_operand" "w")
4715               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4716   "TARGET_SIMD"
4717   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4718   [(set_attr "type" "neon_sub_widen")]
4721 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4722   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4723         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4724           (ANY_EXTEND:<VWIDE>
4725             (vec_select:<VHALF>
4726               (match_operand:VQW 2 "register_operand" "w")
4727               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4728   "TARGET_SIMD"
4729   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4730   [(set_attr "type" "neon_sub_widen")]
4733 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4734   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4735         (plus:<VWIDE>
4736           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4737           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4738   "TARGET_SIMD"
4739   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4740   [(set_attr "type" "neon_add_widen")]
4743 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4744   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4745         (plus:<VWIDE>
4746           (ANY_EXTEND:<VWIDE>
4747             (vec_select:<VHALF>
4748               (match_operand:VQW 2 "register_operand" "w")
4749               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4750           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4751   "TARGET_SIMD"
4752   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4753   [(set_attr "type" "neon_add_widen")]
4756 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4757   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4758         (plus:<VWIDE>
4759           (ANY_EXTEND:<VWIDE>
4760             (vec_select:<VHALF>
4761               (match_operand:VQW 2 "register_operand" "w")
4762               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4763           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4764   "TARGET_SIMD"
4765   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4766   [(set_attr "type" "neon_add_widen")]
4769 (define_expand "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>"
4770   [(set (match_operand:<VWIDE> 0 "register_operand")
4771         (ADDSUB:<VWIDE>
4772           (ANY_EXTEND:<VWIDE>
4773             (vec_select:<VHALF>
4774               (match_operand:VQW 2 "register_operand")
4775               (match_dup 3)))
4776           (match_operand:<VWIDE> 1 "register_operand")))]
4777   "TARGET_SIMD"
4779   /* We still do an emit_insn rather than relying on the pattern above
4780      because for the MINUS case the operands would need to be swapped
4781      around.  */
4782   operands[3]
4783     = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4784   emit_insn (gen_aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal(
4785                                                        operands[0],
4786                                                        operands[1],
4787                                                        operands[2],
4788                                                        operands[3]));
4789   DONE;
4792 ;; <su><r>h<addsub>.
4794 (define_expand "<su_optab>avg<mode>3_floor"
4795   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4796         (truncate:VDQ_BHSI
4797           (ashiftrt:<V2XWIDE>
4798             (plus:<V2XWIDE>
4799               (ANY_EXTEND:<V2XWIDE>
4800                 (match_operand:VDQ_BHSI 1 "register_operand"))
4801               (ANY_EXTEND:<V2XWIDE>
4802                 (match_operand:VDQ_BHSI 2 "register_operand")))
4803             (match_dup 3))))]
4804   "TARGET_SIMD"
4805   {
4806     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4807   }
4810 (define_expand "<su_optab>avg<mode>3_ceil"
4811   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4812         (truncate:VDQ_BHSI
4813           (ashiftrt:<V2XWIDE>
4814             (plus:<V2XWIDE>
4815               (plus:<V2XWIDE>
4816                 (ANY_EXTEND:<V2XWIDE>
4817                   (match_operand:VDQ_BHSI 1 "register_operand"))
4818                 (ANY_EXTEND:<V2XWIDE>
4819                   (match_operand:VDQ_BHSI 2 "register_operand")))
4820                (match_dup 3))
4821             (match_dup 3))))]
4822   "TARGET_SIMD"
4823   {
4824     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4825   }
4828 (define_expand "aarch64_<su>hsub<mode>"
4829   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4830         (truncate:VDQ_BHSI
4831           (ashiftrt:<V2XWIDE>
4832             (minus:<V2XWIDE>
4833               (ANY_EXTEND:<V2XWIDE>
4834                 (match_operand:VDQ_BHSI 1 "register_operand"))
4835               (ANY_EXTEND:<V2XWIDE>
4836                 (match_operand:VDQ_BHSI 2 "register_operand")))
4837             (match_dup 3))))]
4838   "TARGET_SIMD"
4839   {
4840     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4841   }
4844 (define_insn "*aarch64_<su>h<ADDSUB:optab><mode><vczle><vczbe>_insn"
4845   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4846         (truncate:VDQ_BHSI
4847           (ashiftrt:<V2XWIDE>
4848             (ADDSUB:<V2XWIDE>
4849               (ANY_EXTEND:<V2XWIDE>
4850                 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4851               (ANY_EXTEND:<V2XWIDE>
4852                 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4853             (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))))]
4854   "TARGET_SIMD"
4855   "<su>h<ADDSUB:optab>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4856   [(set_attr "type" "neon_<ADDSUB:optab>_halve<q>")]
4859 (define_insn "*aarch64_<su>rhadd<mode><vczle><vczbe>_insn"
4860   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4861         (truncate:VDQ_BHSI
4862           (ashiftrt:<V2XWIDE>
4863             (plus:<V2XWIDE>
4864               (plus:<V2XWIDE>
4865                 (ANY_EXTEND:<V2XWIDE>
4866                   (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4867                 (ANY_EXTEND:<V2XWIDE>
4868                   (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4869                (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))
4870             (match_dup 3))))]
4871   "TARGET_SIMD"
4872   "<su>rhadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4873   [(set_attr "type" "neon_add_halve<q>")]
4876 ;; <r><addsub>hn<q>.
4878 (define_insn "aarch64_<optab>hn<mode>_insn<vczle><vczbe>"
4879   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4880         (truncate:<VNARROWQ>
4881           (ashiftrt:VQN
4882             (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4883                         (match_operand:VQN 2 "register_operand" "w"))
4884             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top"))))]
4885   "TARGET_SIMD"
4886   "<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4887   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4890 (define_insn "aarch64_r<optab>hn<mode>_insn<vczle><vczbe>"
4891   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4892         (truncate:<VNARROWQ>
4893           (ashiftrt:VQN
4894             (plus:VQN
4895               (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4896                           (match_operand:VQN 2 "register_operand" "w"))
4897               (match_operand:VQN 3 "aarch64_simd_raddsubhn_imm_vec"))
4898             (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top"))))]
4899   "TARGET_SIMD"
4900   "r<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4901   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4904 (define_expand "aarch64_<optab>hn<mode>"
4905   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4906         (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4907                     (match_operand:VQN 2 "register_operand")))]
4908   "TARGET_SIMD"
4909   {
4910     rtx shft
4911       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4912                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4913     emit_insn (gen_aarch64_<optab>hn<mode>_insn (operands[0], operands[1],
4914                                                  operands[2], shft));
4915     DONE;
4916   }
4919 (define_expand "aarch64_r<optab>hn<mode>"
4920   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4921         (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4922                     (match_operand:VQN 2 "register_operand")))]
4923   "TARGET_SIMD"
4924   {
4925     rtx shft
4926       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4927                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4928     rtx rnd
4929       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4930         HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4931     emit_insn (gen_aarch64_r<optab>hn<mode>_insn (operands[0], operands[1],
4932                                                   operands[2], rnd, shft));
4933     DONE;
4934   }
4937 (define_insn "aarch64_<optab>hn2<mode>_insn_le"
4938   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4939         (vec_concat:<VNARROWQ2>
4940           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4941           (truncate:<VNARROWQ>
4942             (ashiftrt:VQN
4943               (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4944                           (match_operand:VQN 3 "register_operand" "w"))
4945               (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))))]
4946   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4947   "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4948   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4951 (define_insn "aarch64_r<optab>hn2<mode>_insn_le"
4952   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4953         (vec_concat:<VNARROWQ2>
4954           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4955           (truncate:<VNARROWQ>
4956             (ashiftrt:VQN
4957               (plus:VQN
4958                 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4959                             (match_operand:VQN 3 "register_operand" "w"))
4960                 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4961               (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))))]
4962   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4963   "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4964   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4967 (define_insn "aarch64_<optab>hn2<mode>_insn_be"
4968   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4969         (vec_concat:<VNARROWQ2>
4970           (truncate:<VNARROWQ>
4971             (ashiftrt:VQN
4972               (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4973                           (match_operand:VQN 3 "register_operand" "w"))
4974               (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))
4975           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4976   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4977   "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4978   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4981 (define_insn "aarch64_r<optab>hn2<mode>_insn_be"
4982   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4983         (vec_concat:<VNARROWQ2>
4984           (truncate:<VNARROWQ>
4985             (ashiftrt:VQN
4986               (plus:VQN
4987                 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4988                             (match_operand:VQN 3 "register_operand" "w"))
4989                 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4990               (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))
4991           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4992   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4993   "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4994   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4997 (define_expand "aarch64_<optab>hn2<mode>"
4998   [(match_operand:<VNARROWQ2> 0 "register_operand")
4999    (match_operand:<VNARROWQ> 1 "register_operand")
5000    (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
5001                (match_operand:VQN 3 "register_operand"))]
5002   "TARGET_SIMD"
5003   {
5004     rtx shft
5005       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5006                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
5007     if (BYTES_BIG_ENDIAN)
5008       emit_insn (gen_aarch64_<optab>hn2<mode>_insn_be (operands[0],
5009                                 operands[1], operands[2], operands[3], shft));
5010     else
5011       emit_insn (gen_aarch64_<optab>hn2<mode>_insn_le (operands[0],
5012                                 operands[1], operands[2], operands[3], shft));
5013     DONE;
5014   }
5017 (define_expand "aarch64_r<optab>hn2<mode>"
5018   [(match_operand:<VNARROWQ2> 0 "register_operand")
5019    (match_operand:<VNARROWQ> 1 "register_operand")
5020    (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
5021                (match_operand:VQN 3 "register_operand"))]
5022   "TARGET_SIMD"
5023   {
5024     rtx shft
5025       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5026                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
5027     rtx rnd
5028       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5029         HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
5030     if (BYTES_BIG_ENDIAN)
5031       emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_be (operands[0],
5032                                 operands[1], operands[2], operands[3], rnd, shft));
5033     else
5034       emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_le (operands[0],
5035                                 operands[1], operands[2], operands[3], rnd, shft));
5036     DONE;
5037   }
5040 ;; Optimize ((a + b) >> n) + c where n is half the bitsize of the vector
5041 (define_insn_and_split "*bitmask_shift_plus<mode>"
5042   [(set (match_operand:VQN 0 "register_operand" "=&w")
5043         (plus:VQN
5044           (lshiftrt:VQN
5045             (plus:VQN (match_operand:VQN 1 "register_operand" "w")
5046                       (match_operand:VQN 2 "register_operand" "w"))
5047             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top" ""))
5048           (match_operand:VQN 4 "register_operand" "w")))]
5049   "TARGET_SIMD"
5050   "#"
5051   "&& true"
5052   [(const_int 0)]
5054   rtx tmp;
5055   if (can_create_pseudo_p ())
5056     tmp = gen_reg_rtx (<VNARROWQ>mode);
5057   else
5058     tmp = gen_rtx_REG (<VNARROWQ>mode, REGNO (operands[0]));
5059   emit_insn (gen_aarch64_addhn<mode> (tmp, operands[1], operands[2]));
5060   emit_insn (gen_aarch64_uaddw<Vnarrowq> (operands[0], operands[4], tmp));
5061   DONE;
5064 ;; pmul.
5066 (define_insn "aarch64_pmul<mode>"
5067   [(set (match_operand:VB 0 "register_operand" "=w")
5068         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
5069                     (match_operand:VB 2 "register_operand" "w")]
5070                    UNSPEC_PMUL))]
5071  "TARGET_SIMD"
5072  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5073   [(set_attr "type" "neon_mul_<Vetype><q>")]
5076 (define_insn "aarch64_pmullv8qi"
5077   [(set (match_operand:V8HI 0 "register_operand" "=w")
5078         (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
5079                       (match_operand:V8QI 2 "register_operand" "w")]
5080                      UNSPEC_PMULL))]
5081  "TARGET_SIMD"
5082  "pmull\\t%0.8h, %1.8b, %2.8b"
5083   [(set_attr "type" "neon_mul_b_long")]
5086 (define_insn "aarch64_pmull_hiv16qi_insn"
5087   [(set (match_operand:V8HI 0 "register_operand" "=w")
5088         (unspec:V8HI
5089           [(vec_select:V8QI
5090              (match_operand:V16QI 1 "register_operand" "w")
5091              (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
5092            (vec_select:V8QI
5093              (match_operand:V16QI 2 "register_operand" "w")
5094              (match_dup 3))]
5095           UNSPEC_PMULL))]
5096  "TARGET_SIMD"
5097  "pmull2\\t%0.8h, %1.16b, %2.16b"
5098   [(set_attr "type" "neon_mul_b_long")]
5101 (define_expand "aarch64_pmull_hiv16qi"
5102   [(match_operand:V8HI 0 "register_operand")
5103    (match_operand:V16QI 1 "register_operand")
5104    (match_operand:V16QI 2 "register_operand")]
5105  "TARGET_SIMD"
5107    rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
5108    emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
5109                                               operands[2], p));
5110    DONE;
5114 ;; fmulx.
5116 (define_insn "aarch64_fmulx<mode>"
5117   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5118         (unspec:VHSDF_HSDF
5119           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5120            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5121            UNSPEC_FMULX))]
5122  "TARGET_SIMD"
5123  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5124  [(set_attr "type" "neon_fp_mul_<stype>")]
5127 ;; vmulxq_lane_f32, and vmulx_laneq_f32
5129 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
5130   [(set (match_operand:VDQSF 0 "register_operand" "=w")
5131         (unspec:VDQSF
5132          [(match_operand:VDQSF 1 "register_operand" "w")
5133           (vec_duplicate:VDQSF
5134            (vec_select:<VEL>
5135             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
5136             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5137          UNSPEC_FMULX))]
5138   "TARGET_SIMD"
5139   {
5140     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
5141     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5142   }
5143   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
5146 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
5148 (define_insn "*aarch64_mulx_elt<mode>"
5149   [(set (match_operand:VDQF 0 "register_operand" "=w")
5150         (unspec:VDQF
5151          [(match_operand:VDQF 1 "register_operand" "w")
5152           (vec_duplicate:VDQF
5153            (vec_select:<VEL>
5154             (match_operand:VDQF 2 "register_operand" "w")
5155             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5156          UNSPEC_FMULX))]
5157   "TARGET_SIMD"
5158   {
5159     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5160     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5161   }
5162   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
5165 ;; vmulxq_lane
5167 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
5168   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5169         (unspec:VHSDF
5170          [(match_operand:VHSDF 1 "register_operand" "w")
5171           (vec_duplicate:VHSDF
5172             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5173          UNSPEC_FMULX))]
5174   "TARGET_SIMD"
5175   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
5176   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
5179 ;; vmulxs_lane_f32, vmulxs_laneq_f32
5180 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
5181 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
5183 (define_insn "*aarch64_vgetfmulx<mode>"
5184   [(set (match_operand:<VEL> 0 "register_operand" "=w")
5185         (unspec:<VEL>
5186          [(match_operand:<VEL> 1 "register_operand" "w")
5187           (vec_select:<VEL>
5188            (match_operand:VDQF 2 "register_operand" "w")
5189             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5190          UNSPEC_FMULX))]
5191   "TARGET_SIMD"
5192   {
5193     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5194     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5195   }
5196   [(set_attr "type" "fmul<Vetype>")]
5198 ;; <su>q<addsub>
5200 (define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
5201   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5202         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
5203                         (match_operand:VSDQ_I 2 "register_operand" "w")))]
5204   "TARGET_SIMD"
5205   "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5206   [(set_attr "type" "neon_q<addsub><q>")]
5209 ;; suqadd and usqadd
5211 (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
5212   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5213         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5214                         (match_operand:VSDQ_I 2 "register_operand" "w")]
5215                        USSUQADD))]
5216   "TARGET_SIMD"
5217   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5218   [(set_attr "type" "neon_qadd<q>")]
5221 ;; sqmovn and uqmovn
5223 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5224   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5225         (SAT_TRUNC:<VNARROWQ>
5226           (match_operand:SD_HSDI 1 "register_operand" "w")))]
5227   "TARGET_SIMD"
5228   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5229   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5232 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5233   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5234         (SAT_TRUNC:<VNARROWQ>
5235           (match_operand:VQN 1 "register_operand" "w")))]
5236   "TARGET_SIMD"
5237   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5238   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5241 (define_insn "aarch64_<su>qxtn2<mode>_le"
5242   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5243         (vec_concat:<VNARROWQ2>
5244           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5245           (SAT_TRUNC:<VNARROWQ>
5246             (match_operand:VQN 2 "register_operand" "w"))))]
5247   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5248   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5249    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5252 (define_insn "aarch64_<su>qxtn2<mode>_be"
5253   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5254         (vec_concat:<VNARROWQ2>
5255           (SAT_TRUNC:<VNARROWQ>
5256             (match_operand:VQN 2 "register_operand" "w"))
5257           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5258   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5259   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5260    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5263 (define_expand "aarch64_<su>qxtn2<mode>"
5264   [(match_operand:<VNARROWQ2> 0 "register_operand")
5265    (match_operand:<VNARROWQ> 1 "register_operand")
5266    (SAT_TRUNC:<VNARROWQ>
5267      (match_operand:VQN 2 "register_operand"))]
5268   "TARGET_SIMD"
5269   {
5270     if (BYTES_BIG_ENDIAN)
5271       emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5272                                                  operands[2]));
5273     else
5274       emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5275                                                  operands[2]));
5276     DONE;
5277   }
5280 ;; sqmovun
5282 (define_insn "aarch64_sqmovun<mode>"
5283   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5284         (truncate:<VNARROWQ>
5285           (smin:SD_HSDI
5286             (smax:SD_HSDI
5287               (match_operand:SD_HSDI 1 "register_operand" "w")
5288               (const_int 0))
5289             (const_int <half_mask>))))]
5290    "TARGET_SIMD"
5291    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5292    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5295 (define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
5296   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5297         (truncate:<VNARROWQ>
5298           (smin:VQN
5299             (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5300                       (match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
5301             (match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
5302   "TARGET_SIMD"
5303   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5304   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5307 (define_expand "aarch64_sqmovun<mode>"
5308   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5309         (truncate:<VNARROWQ>
5310           (smin:VQN
5311             (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5312                       (match_dup 2))
5313             (match_dup 3))))]
5314   "TARGET_SIMD"
5315   {
5316     operands[2] = CONST0_RTX (<MODE>mode);
5317     operands[3]
5318       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5319                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5320   }
5323 (define_insn "aarch64_sqxtun2<mode>_le"
5324   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5325         (vec_concat:<VNARROWQ2>
5326           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5327           (truncate:<VNARROWQ>
5328             (smin:VQN
5329               (smax:VQN
5330                 (match_operand:VQN 2 "register_operand" "w")
5331                 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5332               (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
5333   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5334   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5335    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5338 (define_insn "aarch64_sqxtun2<mode>_be"
5339   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5340         (vec_concat:<VNARROWQ2>
5341           (truncate:<VNARROWQ>
5342             (smin:VQN
5343               (smax:VQN
5344                 (match_operand:VQN 2 "register_operand" "w")
5345                 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5346               (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
5347           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5348   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5349   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5350    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5353 (define_expand "aarch64_sqxtun2<mode>"
5354   [(match_operand:<VNARROWQ2> 0 "register_operand")
5355    (match_operand:<VNARROWQ> 1 "register_operand")
5356    (match_operand:VQN 2 "register_operand")]
5357   "TARGET_SIMD"
5358   {
5359     rtx zeros = CONST0_RTX (<MODE>mode);
5360     rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5361                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5362     if (BYTES_BIG_ENDIAN)
5363       emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5364                                                operands[2], zeros, half_umax));
5365     else
5366       emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5367                                                operands[2], zeros, half_umax));
5368     DONE;
5369   }
5372 ;; <su>q<absneg>
5374 (define_insn "aarch64_s<optab><mode><vczle><vczbe>"
5375   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5376         (UNQOPS:VSDQ_I
5377           (match_operand:VSDQ_I 1 "register_operand" "w")))]
5378   "TARGET_SIMD"
5379   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5380   [(set_attr "type" "neon_<optab><q>")]
5383 ;; sq<r>dmulh.
5385 (define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
5386   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5387         (unspec:VSDQ_HSI
5388           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5389            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5390          VQDMULH))]
5391   "TARGET_SIMD"
5392   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5393   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5396 (define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
5397   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5398         (unspec:VDQHS
5399           [(match_operand:VDQHS 1 "register_operand" "w")
5400            (vec_duplicate:VDQHS
5401              (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5402          VQDMULH))]
5403   "TARGET_SIMD"
5404   "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5405   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5408 ;; sq<r>dmulh_lane
5410 (define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
5411   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5412         (unspec:VDQHS
5413           [(match_operand:VDQHS 1 "register_operand" "w")
5414            (vec_select:<VEL>
5415              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5416              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5417          VQDMULH))]
5418   "TARGET_SIMD"
5419   "*
5420    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5421    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5422   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5425 (define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
5426   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5427         (unspec:VDQHS
5428           [(match_operand:VDQHS 1 "register_operand" "w")
5429            (vec_select:<VEL>
5430              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5431              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5432          VQDMULH))]
5433   "TARGET_SIMD"
5434   "*
5435    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5436    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5437   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5440 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5441   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5442         (unspec:SD_HSI
5443           [(match_operand:SD_HSI 1 "register_operand" "w")
5444            (vec_select:<VEL>
5445              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5446              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5447          VQDMULH))]
5448   "TARGET_SIMD"
5449   "*
5450    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5451    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5452   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5455 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5456   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5457         (unspec:SD_HSI
5458           [(match_operand:SD_HSI 1 "register_operand" "w")
5459            (vec_select:<VEL>
5460              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5461              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5462          VQDMULH))]
5463   "TARGET_SIMD"
5464   "*
5465    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5466    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5467   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5470 ;; sqrdml[as]h.
5472 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
5473   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5474         (unspec:VSDQ_HSI
5475           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5476            (match_operand:VSDQ_HSI 2 "register_operand" "w")
5477            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5478           SQRDMLH_AS))]
5479    "TARGET_SIMD_RDMA"
5480    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5481    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5484 ;; sqrdml[as]h_lane.
5486 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5487   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5488         (unspec:VDQHS
5489           [(match_operand:VDQHS 1 "register_operand" "0")
5490            (match_operand:VDQHS 2 "register_operand" "w")
5491            (vec_select:<VEL>
5492              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5493              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5494           SQRDMLH_AS))]
5495    "TARGET_SIMD_RDMA"
5496    {
5497      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5498      return
5499       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5500    }
5501    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5504 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5505   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5506         (unspec:SD_HSI
5507           [(match_operand:SD_HSI 1 "register_operand" "0")
5508            (match_operand:SD_HSI 2 "register_operand" "w")
5509            (vec_select:<VEL>
5510              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5511              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5512           SQRDMLH_AS))]
5513    "TARGET_SIMD_RDMA"
5514    {
5515      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5516      return
5517       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5518    }
5519    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5522 ;; sqrdml[as]h_laneq.
5524 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5525   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5526         (unspec:VDQHS
5527           [(match_operand:VDQHS 1 "register_operand" "0")
5528            (match_operand:VDQHS 2 "register_operand" "w")
5529            (vec_select:<VEL>
5530              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5531              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5532           SQRDMLH_AS))]
5533    "TARGET_SIMD_RDMA"
5534    {
5535      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5536      return
5537       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5538    }
5539    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5542 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5543   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5544         (unspec:SD_HSI
5545           [(match_operand:SD_HSI 1 "register_operand" "0")
5546            (match_operand:SD_HSI 2 "register_operand" "w")
5547            (vec_select:<VEL>
5548              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5549              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5550           SQRDMLH_AS))]
5551    "TARGET_SIMD_RDMA"
5552    {
5553      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5554      return
5555       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5556    }
5557    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5560 ;; vqdml[sa]l
5562 (define_insn "aarch64_sqdmlal<mode>"
5563   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5564         (ss_plus:<VWIDE>
5565           (ss_ashift:<VWIDE>
5566               (mult:<VWIDE>
5567                 (sign_extend:<VWIDE>
5568                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5569                 (sign_extend:<VWIDE>
5570                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5571               (const_int 1))
5572           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5573   "TARGET_SIMD"
5574   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5575   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5578 (define_insn "aarch64_sqdmlsl<mode>"
5579   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5580         (ss_minus:<VWIDE>
5581           (match_operand:<VWIDE> 1 "register_operand" "0")
5582           (ss_ashift:<VWIDE>
5583               (mult:<VWIDE>
5584                 (sign_extend:<VWIDE>
5585                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5586                 (sign_extend:<VWIDE>
5587                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5588               (const_int 1))))]
5589   "TARGET_SIMD"
5590   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5591   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5594 ;; vqdml[sa]l_lane
5596 (define_insn "aarch64_sqdmlal_lane<mode>"
5597   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5598         (ss_plus:<VWIDE>
5599           (ss_ashift:<VWIDE>
5600             (mult:<VWIDE>
5601               (sign_extend:<VWIDE>
5602                 (match_operand:VD_HSI 2 "register_operand" "w"))
5603               (vec_duplicate:<VWIDE>
5604                 (sign_extend:<VWIDE_S>
5605                   (vec_select:<VEL>
5606                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5607                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5608               ))
5609             (const_int 1))
5610           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5611   "TARGET_SIMD"
5612   {
5613     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5614     return
5615       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5616   }
5617   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5620 (define_insn "aarch64_sqdmlsl_lane<mode>"
5621   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5622         (ss_minus:<VWIDE>
5623           (match_operand:<VWIDE> 1 "register_operand" "0")
5624           (ss_ashift:<VWIDE>
5625             (mult:<VWIDE>
5626               (sign_extend:<VWIDE>
5627                 (match_operand:VD_HSI 2 "register_operand" "w"))
5628               (vec_duplicate:<VWIDE>
5629                 (sign_extend:<VWIDE_S>
5630                   (vec_select:<VEL>
5631                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5632                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5633               ))
5634             (const_int 1))))]
5635   "TARGET_SIMD"
5636   {
5637     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5638     return
5639       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5640   }
5641   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5645 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5646   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5647         (ss_minus:<VWIDE>
5648           (match_operand:<VWIDE> 1 "register_operand" "0")
5649           (ss_ashift:<VWIDE>
5650             (mult:<VWIDE>
5651               (sign_extend:<VWIDE>
5652                 (match_operand:VD_HSI 2 "register_operand" "w"))
5653               (vec_duplicate:<VWIDE>
5654                 (sign_extend:<VWIDE_S>
5655                   (vec_select:<VEL>
5656                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5657                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5658               ))
5659             (const_int 1))))]
5660   "TARGET_SIMD"
5661   {
5662     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5663     return
5664       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5665   }
5666   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5669 (define_insn "aarch64_sqdmlal_laneq<mode>"
5670   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5671         (ss_plus:<VWIDE>
5672           (ss_ashift:<VWIDE>
5673             (mult:<VWIDE>
5674               (sign_extend:<VWIDE>
5675                 (match_operand:VD_HSI 2 "register_operand" "w"))
5676               (vec_duplicate:<VWIDE>
5677                 (sign_extend:<VWIDE_S>
5678                   (vec_select:<VEL>
5679                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5680                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5681               ))
5682             (const_int 1))
5683           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5684   "TARGET_SIMD"
5685   {
5686     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5687     return
5688       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5689   }
5690   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5694 (define_insn "aarch64_sqdmlal_lane<mode>"
5695   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5696         (ss_plus:<VWIDE>
5697           (ss_ashift:<VWIDE>
5698             (mult:<VWIDE>
5699               (sign_extend:<VWIDE>
5700                 (match_operand:SD_HSI 2 "register_operand" "w"))
5701               (sign_extend:<VWIDE>
5702                 (vec_select:<VEL>
5703                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5704                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5705               )
5706             (const_int 1))
5707           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5708   "TARGET_SIMD"
5709   {
5710     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5711     return
5712       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5713   }
5714   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5717 (define_insn "aarch64_sqdmlsl_lane<mode>"
5718   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5719         (ss_minus:<VWIDE>
5720           (match_operand:<VWIDE> 1 "register_operand" "0")
5721           (ss_ashift:<VWIDE>
5722             (mult:<VWIDE>
5723               (sign_extend:<VWIDE>
5724                 (match_operand:SD_HSI 2 "register_operand" "w"))
5725               (sign_extend:<VWIDE>
5726                 (vec_select:<VEL>
5727                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5728                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5729               )
5730             (const_int 1))))]
5731   "TARGET_SIMD"
5732   {
5733     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5734     return
5735       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5736   }
5737   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5741 (define_insn "aarch64_sqdmlal_laneq<mode>"
5742   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5743         (ss_plus:<VWIDE>
5744           (ss_ashift:<VWIDE>
5745             (mult:<VWIDE>
5746               (sign_extend:<VWIDE>
5747                 (match_operand:SD_HSI 2 "register_operand" "w"))
5748               (sign_extend:<VWIDE>
5749                 (vec_select:<VEL>
5750                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5751                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5752               )
5753             (const_int 1))
5754           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5755   "TARGET_SIMD"
5756   {
5757     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5758     return
5759       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5760   }
5761   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5764 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5765   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5766         (ss_minus:<VWIDE>
5767           (match_operand:<VWIDE> 1 "register_operand" "0")
5768           (ss_ashift:<VWIDE>
5769             (mult:<VWIDE>
5770               (sign_extend:<VWIDE>
5771                 (match_operand:SD_HSI 2 "register_operand" "w"))
5772               (sign_extend:<VWIDE>
5773                 (vec_select:<VEL>
5774                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5775                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5776               )
5777             (const_int 1))))]
5778   "TARGET_SIMD"
5779   {
5780     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5781     return
5782       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5783   }
5784   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5787 ;; vqdml[sa]l_n
5789 (define_insn "aarch64_sqdmlsl_n<mode>"
5790   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5791         (ss_minus:<VWIDE>
5792           (match_operand:<VWIDE> 1 "register_operand" "0")
5793           (ss_ashift:<VWIDE>
5794               (mult:<VWIDE>
5795                 (sign_extend:<VWIDE>
5796                       (match_operand:VD_HSI 2 "register_operand" "w"))
5797                 (vec_duplicate:<VWIDE>
5798                   (sign_extend:<VWIDE_S>
5799                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5800               (const_int 1))))]
5801   "TARGET_SIMD"
5802   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5803   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5806 (define_insn "aarch64_sqdmlal_n<mode>"
5807   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5808         (ss_plus:<VWIDE>
5809           (ss_ashift:<VWIDE>
5810               (mult:<VWIDE>
5811                 (sign_extend:<VWIDE>
5812                       (match_operand:VD_HSI 2 "register_operand" "w"))
5813                 (vec_duplicate:<VWIDE>
5814                   (sign_extend:<VWIDE_S>
5815                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5816               (const_int 1))
5817           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5818   "TARGET_SIMD"
5819   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5820   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5824 ;; sqdml[as]l2
5826 (define_insn "aarch64_sqdmlal2<mode>_internal"
5827   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5828         (ss_plus:<VWIDE>
5829          (ss_ashift:<VWIDE>
5830              (mult:<VWIDE>
5831                (sign_extend:<VWIDE>
5832                  (vec_select:<VHALF>
5833                      (match_operand:VQ_HSI 2 "register_operand" "w")
5834                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5835                (sign_extend:<VWIDE>
5836                  (vec_select:<VHALF>
5837                      (match_operand:VQ_HSI 3 "register_operand" "w")
5838                      (match_dup 4))))
5839              (const_int 1))
5840           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5841   "TARGET_SIMD"
5842   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5843   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5846 (define_insn "aarch64_sqdmlsl2<mode>_internal"
5847   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5848         (ss_minus:<VWIDE>
5849          (match_operand:<VWIDE> 1 "register_operand" "0")
5850          (ss_ashift:<VWIDE>
5851              (mult:<VWIDE>
5852                (sign_extend:<VWIDE>
5853                  (vec_select:<VHALF>
5854                      (match_operand:VQ_HSI 2 "register_operand" "w")
5855                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5856                (sign_extend:<VWIDE>
5857                  (vec_select:<VHALF>
5858                      (match_operand:VQ_HSI 3 "register_operand" "w")
5859                      (match_dup 4))))
5860              (const_int 1))))]
5861   "TARGET_SIMD"
5862   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5863   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5866 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
5867   [(match_operand:<VWIDE> 0 "register_operand")
5868    (SBINQOPS:<VWIDE>
5869      (match_operand:<VWIDE> 1 "register_operand")
5870      (match_dup 1))
5871    (match_operand:VQ_HSI 2 "register_operand")
5872    (match_operand:VQ_HSI 3 "register_operand")]
5873   "TARGET_SIMD"
5875   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5876   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
5877                                                 operands[1], operands[2],
5878                                                 operands[3], p));
5879   DONE;
5882 ;; vqdml[sa]l2_lane
5884 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
5885   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5886         (ss_minus:<VWIDE>
5887           (match_operand:<VWIDE> 1 "register_operand" "0")
5888           (ss_ashift:<VWIDE>
5889               (mult:<VWIDE>
5890                 (sign_extend:<VWIDE>
5891                   (vec_select:<VHALF>
5892                     (match_operand:VQ_HSI 2 "register_operand" "w")
5893                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5894                 (vec_duplicate:<VWIDE>
5895                   (sign_extend:<VWIDE_S>
5896                     (vec_select:<VEL>
5897                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5898                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5899                     ))))
5900               (const_int 1))))]
5901   "TARGET_SIMD"
5902   {
5903     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5904     return
5905      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5906   }
5907   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5910 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
5911   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5912         (ss_plus:<VWIDE>
5913           (ss_ashift:<VWIDE>
5914               (mult:<VWIDE>
5915                 (sign_extend:<VWIDE>
5916                   (vec_select:<VHALF>
5917                     (match_operand:VQ_HSI 2 "register_operand" "w")
5918                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5919                 (vec_duplicate:<VWIDE>
5920                   (sign_extend:<VWIDE_S>
5921                     (vec_select:<VEL>
5922                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5923                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5924                     ))))
5925               (const_int 1))
5926           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5927   "TARGET_SIMD"
5928   {
5929     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5930     return
5931      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5932   }
5933   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5936 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
5937   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5938         (ss_minus:<VWIDE>
5939           (match_operand:<VWIDE> 1 "register_operand" "0")
5940           (ss_ashift:<VWIDE>
5941               (mult:<VWIDE>
5942                 (sign_extend:<VWIDE>
5943                   (vec_select:<VHALF>
5944                     (match_operand:VQ_HSI 2 "register_operand" "w")
5945                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5946                 (vec_duplicate:<VWIDE>
5947                   (sign_extend:<VWIDE_S>
5948                     (vec_select:<VEL>
5949                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5950                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5951                     ))))
5952               (const_int 1))))]
5953   "TARGET_SIMD"
5954   {
5955     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5956     return
5957      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5958   }
5959   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5962 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
5963   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5964         (ss_plus:<VWIDE>
5965           (ss_ashift:<VWIDE>
5966               (mult:<VWIDE>
5967                 (sign_extend:<VWIDE>
5968                   (vec_select:<VHALF>
5969                     (match_operand:VQ_HSI 2 "register_operand" "w")
5970                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5971                 (vec_duplicate:<VWIDE>
5972                   (sign_extend:<VWIDE_S>
5973                     (vec_select:<VEL>
5974                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5975                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5976                     ))))
5977               (const_int 1))
5978           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5979   "TARGET_SIMD"
5980   {
5981     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5982     return
5983      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5984   }
5985   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5988 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
5989   [(match_operand:<VWIDE> 0 "register_operand")
5990    (SBINQOPS:<VWIDE>
5991      (match_operand:<VWIDE> 1 "register_operand")
5992      (match_dup 1))
5993    (match_operand:VQ_HSI 2 "register_operand")
5994    (match_operand:<VCOND> 3 "register_operand")
5995    (match_operand:SI 4 "immediate_operand")]
5996   "TARGET_SIMD"
5998   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5999   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
6000                                                 operands[1], operands[2],
6001                                                 operands[3], operands[4], p));
6002   DONE;
6005 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
6006   [(match_operand:<VWIDE> 0 "register_operand")
6007    (SBINQOPS:<VWIDE>
6008      (match_operand:<VWIDE> 1 "register_operand")
6009      (match_dup 1))
6010    (match_operand:VQ_HSI 2 "register_operand")
6011    (match_operand:<VCONQ> 3 "register_operand")
6012    (match_operand:SI 4 "immediate_operand")]
6013   "TARGET_SIMD"
6015   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6016   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
6017                                                 operands[1], operands[2],
6018                                                 operands[3], operands[4], p));
6019   DONE;
6022 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
6023   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6024         (ss_minus:<VWIDE>
6025           (match_operand:<VWIDE> 1 "register_operand" "0")
6026           (ss_ashift:<VWIDE>
6027             (mult:<VWIDE>
6028               (sign_extend:<VWIDE>
6029                 (vec_select:<VHALF>
6030                   (match_operand:VQ_HSI 2 "register_operand" "w")
6031                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6032               (vec_duplicate:<VWIDE>
6033                 (sign_extend:<VWIDE_S>
6034                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6035             (const_int 1))))]
6036   "TARGET_SIMD"
6037   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6038   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6041 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
6042   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6043         (ss_plus:<VWIDE>
6044           (ss_ashift:<VWIDE>
6045             (mult:<VWIDE>
6046               (sign_extend:<VWIDE>
6047                 (vec_select:<VHALF>
6048                   (match_operand:VQ_HSI 2 "register_operand" "w")
6049                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6050               (vec_duplicate:<VWIDE>
6051                 (sign_extend:<VWIDE_S>
6052                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6053             (const_int 1))
6054           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6055   "TARGET_SIMD"
6056   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6057   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6060 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
6061   [(match_operand:<VWIDE> 0 "register_operand")
6062    (SBINQOPS:<VWIDE>
6063      (match_operand:<VWIDE> 1 "register_operand")
6064      (match_dup 1))
6065    (match_operand:VQ_HSI 2 "register_operand")
6066    (match_operand:<VEL> 3 "register_operand")]
6067   "TARGET_SIMD"
6069   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6070   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
6071                                                 operands[1], operands[2],
6072                                                 operands[3], p));
6073   DONE;
6076 ;; vqdmull
6078 (define_insn "aarch64_sqdmull<mode>"
6079   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6080         (ss_ashift:<VWIDE>
6081              (mult:<VWIDE>
6082                (sign_extend:<VWIDE>
6083                      (match_operand:VSD_HSI 1 "register_operand" "w"))
6084                (sign_extend:<VWIDE>
6085                      (match_operand:VSD_HSI 2 "register_operand" "w")))
6086              (const_int 1)))]
6087   "TARGET_SIMD"
6088   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6089   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
6092 ;; vqdmull_lane
6094 (define_insn "aarch64_sqdmull_lane<mode>"
6095   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6096         (ss_ashift:<VWIDE>
6097              (mult:<VWIDE>
6098                (sign_extend:<VWIDE>
6099                  (match_operand:VD_HSI 1 "register_operand" "w"))
6100                (vec_duplicate:<VWIDE>
6101                  (sign_extend:<VWIDE_S>
6102                    (vec_select:<VEL>
6103                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6104                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6105                ))
6106              (const_int 1)))]
6107   "TARGET_SIMD"
6108   {
6109     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6110     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6111   }
6112   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6115 (define_insn "aarch64_sqdmull_laneq<mode>"
6116   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6117         (ss_ashift:<VWIDE>
6118              (mult:<VWIDE>
6119                (sign_extend:<VWIDE>
6120                  (match_operand:VD_HSI 1 "register_operand" "w"))
6121                (vec_duplicate:<VWIDE>
6122                  (sign_extend:<VWIDE_S>
6123                    (vec_select:<VEL>
6124                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6125                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6126                ))
6127              (const_int 1)))]
6128   "TARGET_SIMD"
6129   {
6130     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6131     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6132   }
6133   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6136 (define_insn "aarch64_sqdmull_lane<mode>"
6137   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6138         (ss_ashift:<VWIDE>
6139              (mult:<VWIDE>
6140                (sign_extend:<VWIDE>
6141                  (match_operand:SD_HSI 1 "register_operand" "w"))
6142                (sign_extend:<VWIDE>
6143                  (vec_select:<VEL>
6144                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6145                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6146                ))
6147              (const_int 1)))]
6148   "TARGET_SIMD"
6149   {
6150     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6151     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6152   }
6153   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6156 (define_insn "aarch64_sqdmull_laneq<mode>"
6157   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6158         (ss_ashift:<VWIDE>
6159              (mult:<VWIDE>
6160                (sign_extend:<VWIDE>
6161                  (match_operand:SD_HSI 1 "register_operand" "w"))
6162                (sign_extend:<VWIDE>
6163                  (vec_select:<VEL>
6164                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6165                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6166                ))
6167              (const_int 1)))]
6168   "TARGET_SIMD"
6169   {
6170     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6171     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6172   }
6173   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6176 ;; vqdmull_n
6178 (define_insn "aarch64_sqdmull_n<mode>"
6179   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6180         (ss_ashift:<VWIDE>
6181              (mult:<VWIDE>
6182                (sign_extend:<VWIDE>
6183                  (match_operand:VD_HSI 1 "register_operand" "w"))
6184                (vec_duplicate:<VWIDE>
6185                  (sign_extend:<VWIDE_S>
6186                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6187                )
6188              (const_int 1)))]
6189   "TARGET_SIMD"
6190   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6191   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6194 ;; vqdmull2
6196 (define_insn "aarch64_sqdmull2<mode>_internal"
6197   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6198         (ss_ashift:<VWIDE>
6199              (mult:<VWIDE>
6200                (sign_extend:<VWIDE>
6201                  (vec_select:<VHALF>
6202                    (match_operand:VQ_HSI 1 "register_operand" "w")
6203                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6204                (sign_extend:<VWIDE>
6205                  (vec_select:<VHALF>
6206                    (match_operand:VQ_HSI 2 "register_operand" "w")
6207                    (match_dup 3)))
6208                )
6209              (const_int 1)))]
6210   "TARGET_SIMD"
6211   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6212   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6215 (define_expand "aarch64_sqdmull2<mode>"
6216   [(match_operand:<VWIDE> 0 "register_operand")
6217    (match_operand:VQ_HSI 1 "register_operand")
6218    (match_operand:VQ_HSI 2 "register_operand")]
6219   "TARGET_SIMD"
6221   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6222   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6223                                                   operands[2], p));
6224   DONE;
6227 ;; vqdmull2_lane
6229 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6230   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6231         (ss_ashift:<VWIDE>
6232              (mult:<VWIDE>
6233                (sign_extend:<VWIDE>
6234                  (vec_select:<VHALF>
6235                    (match_operand:VQ_HSI 1 "register_operand" "w")
6236                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6237                (vec_duplicate:<VWIDE>
6238                  (sign_extend:<VWIDE_S>
6239                    (vec_select:<VEL>
6240                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6241                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6242                ))
6243              (const_int 1)))]
6244   "TARGET_SIMD"
6245   {
6246     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6247     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6248   }
6249   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6252 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6253   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6254         (ss_ashift:<VWIDE>
6255              (mult:<VWIDE>
6256                (sign_extend:<VWIDE>
6257                  (vec_select:<VHALF>
6258                    (match_operand:VQ_HSI 1 "register_operand" "w")
6259                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6260                (vec_duplicate:<VWIDE>
6261                  (sign_extend:<VWIDE_S>
6262                    (vec_select:<VEL>
6263                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6264                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6265                ))
6266              (const_int 1)))]
6267   "TARGET_SIMD"
6268   {
6269     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6270     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6271   }
6272   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6275 (define_expand "aarch64_sqdmull2_lane<mode>"
6276   [(match_operand:<VWIDE> 0 "register_operand")
6277    (match_operand:VQ_HSI 1 "register_operand")
6278    (match_operand:<VCOND> 2 "register_operand")
6279    (match_operand:SI 3 "immediate_operand")]
6280   "TARGET_SIMD"
6282   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6283   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6284                                                        operands[2], operands[3],
6285                                                        p));
6286   DONE;
6289 (define_expand "aarch64_sqdmull2_laneq<mode>"
6290   [(match_operand:<VWIDE> 0 "register_operand")
6291    (match_operand:VQ_HSI 1 "register_operand")
6292    (match_operand:<VCONQ> 2 "register_operand")
6293    (match_operand:SI 3 "immediate_operand")]
6294   "TARGET_SIMD"
6296   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6297   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6298                                                        operands[2], operands[3],
6299                                                        p));
6300   DONE;
6303 ;; vqdmull2_n
6305 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6306   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6307         (ss_ashift:<VWIDE>
6308              (mult:<VWIDE>
6309                (sign_extend:<VWIDE>
6310                  (vec_select:<VHALF>
6311                    (match_operand:VQ_HSI 1 "register_operand" "w")
6312                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6313                (vec_duplicate:<VWIDE>
6314                  (sign_extend:<VWIDE_S>
6315                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6316                )
6317              (const_int 1)))]
6318   "TARGET_SIMD"
6319   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6320   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6323 (define_expand "aarch64_sqdmull2_n<mode>"
6324   [(match_operand:<VWIDE> 0 "register_operand")
6325    (match_operand:VQ_HSI 1 "register_operand")
6326    (match_operand:<VEL> 2 "register_operand")]
6327   "TARGET_SIMD"
6329   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6330   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6331                                                     operands[2], p));
6332   DONE;
6335 ;; vshl
6337 (define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
6338   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6339         (unspec:VSDQ_I_DI
6340           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6341            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6342          VSHL))]
6343   "TARGET_SIMD"
6344   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6345   [(set_attr "type" "neon_shift_reg<q>")]
6349 ;; vqshl
6351 (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
6352   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6353         (unspec:VSDQ_I
6354           [(match_operand:VSDQ_I 1 "register_operand" "w")
6355            (match_operand:VSDQ_I 2 "register_operand" "w")]
6356          VQSHL))]
6357   "TARGET_SIMD"
6358   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6359   [(set_attr "type" "neon_sat_shift_reg<q>")]
6362 ;; vshll_n
6364 (define_insn "aarch64_<su>shll<mode>"
6365   [(set (match_operand:<VWIDE> 0 "register_operand")
6366         (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6367                             (match_operand:VD_BHSI 1 "register_operand"))
6368                          (match_operand:<VWIDE> 2
6369                            "aarch64_simd_shll_imm_vec")))]
6370   "TARGET_SIMD"
6371   {@ [cons: =0, 1, 2]
6372      [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6373      [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6374   }
6375   [(set_attr "type" "neon_shift_imm_long")]
6378 (define_expand "aarch64_<sur>shll_n<mode>"
6379   [(set (match_operand:<VWIDE> 0 "register_operand")
6380         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
6381                          (match_operand:SI 2
6382                            "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6383                          VSHLL))]
6384   "TARGET_SIMD"
6385   {
6386     rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6387     emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
6388     DONE;
6389   }
6392 ;; vshll_high_n
6394 (define_insn "aarch64_<su>shll2<mode>"
6395   [(set (match_operand:<VWIDE> 0 "register_operand")
6396         (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6397                           (vec_select:<VHALF>
6398                             (match_operand:VQW 1 "register_operand")
6399                             (match_operand:VQW 2 "vect_par_cnst_hi_half")))
6400                          (match_operand:<VWIDE> 3
6401                            "aarch64_simd_shll_imm_vec")))]
6402   "TARGET_SIMD"
6403   {@ [cons: =0, 1, 2, 3]
6404      [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6405      [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6406   }
6407   [(set_attr "type" "neon_shift_imm_long")]
6410 (define_expand "aarch64_<sur>shll2_n<mode>"
6411   [(set (match_operand:<VWIDE> 0 "register_operand")
6412         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
6413                          (match_operand:SI 2
6414                            "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6415                          VSHLL))]
6416   "TARGET_SIMD"
6417   {
6418     rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6419     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6420     emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
6421     DONE;
6422   }
6425 ;; vrshr_n
6427 (define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn"
6428   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6429         (truncate:VSDQ_I_DI
6430           (SHIFTRT:<V2XWIDE>
6431             (plus:<V2XWIDE>
6432               (<SHIFTEXTEND>:<V2XWIDE>
6433                 (match_operand:VSDQ_I_DI 1 "register_operand" "w"))
6434               (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6435             (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))]
6436   "TARGET_SIMD
6437    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6438   "<sra_op>rshr\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6439   [(set_attr "type" "neon_sat_shift_imm<q>")]
6442 (define_expand "aarch64_<sra_op>rshr_n<mode>"
6443   [(match_operand:VSDQ_I_DI 0 "register_operand")
6444    (SHIFTRT:VSDQ_I_DI
6445      (match_operand:VSDQ_I_DI 1 "register_operand")
6446      (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
6447   "TARGET_SIMD"
6448   {
6449     /* Use this expander to create the rounding constant vector, which is
6450        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6451        RTL is generated when handling the DImode expanders.  */
6452     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6453     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6454     rtx shft = gen_int_mode (INTVAL (operands[2]), DImode);
6455     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6456     if (VECTOR_MODE_P (<MODE>mode))
6457       {
6458         shft = gen_const_vec_duplicate (<MODE>mode, shft);
6459         rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6460       }
6462     emit_insn (gen_aarch64_<sra_op>rshr_n<mode>_insn (operands[0], operands[1],
6463                                                       shft, rnd));
6464     DONE;
6465   }
6468 ;; v(r)sra_n
6470 (define_insn "aarch64_<sur>sra_ndi"
6471   [(set (match_operand:DI 0 "register_operand" "=w")
6472        (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6473                       (match_operand:DI 2 "register_operand" "w")
6474                        (match_operand:SI 3
6475                         "aarch64_simd_shift_imm_offset_di" "i")]
6476                       VSRA))]
6477   "TARGET_SIMD"
6478   "<sur>sra\\t%d0, %d2, %3"
6479   [(set_attr "type" "neon_shift_acc")]
6482 ;; vs<lr>i_n
6484 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6485   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6486         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6487                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6488                        (match_operand:SI 3
6489                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6490                       VSLRI))]
6491   "TARGET_SIMD"
6492   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6493   [(set_attr "type" "neon_shift_imm<q>")]
6496 ;; vqshl(u)
6498 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6499   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6500         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6501                        (match_operand:SI 2
6502                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
6503                       VQSHL_N))]
6504   "TARGET_SIMD"
6505   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6506   [(set_attr "type" "neon_sat_shift_imm<q>")]
6510 ;; vq(r)shr(u)n_n
6512 (define_insn "aarch64_<shrn_op>shrn_n<mode>"
6513   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6514         (SAT_TRUNC:<VNARROWQ>
6515           (<TRUNC_SHIFT>:SD_HSDI
6516             (match_operand:SD_HSDI 1 "register_operand" "w")
6517             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6518   "TARGET_SIMD"
6519   "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6520   [(set_attr "type" "neon_shift_imm_narrow_q")]
6523 (define_insn "*aarch64_<shrn_op><shrn_s>shrn_n<mode>_insn<vczle><vczbe>"
6524   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6525         (ALL_TRUNC:<VNARROWQ>
6526           (SHIFTRT:VQN
6527             (match_operand:VQN 1 "register_operand" "w")
6528             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6529   "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6530   "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6531   [(set_attr "type" "neon_shift_imm_narrow_q")]
6534 (define_expand "aarch64_<shrn_op>shrn_n<mode>"
6535   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6536         (ALL_TRUNC:<VNARROWQ>
6537           (<TRUNC_SHIFT>:VQN
6538             (match_operand:VQN 1 "register_operand")
6539             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6540   "TARGET_SIMD"
6541   {
6542     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6543                                                  INTVAL (operands[2]));
6544   }
6547 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
6548   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6549         (ALL_TRUNC:<VNARROWQ>
6550           (<TRUNC_SHIFT>:<V2XWIDE>
6551             (plus:<V2XWIDE>
6552               (<TRUNCEXTEND>:<V2XWIDE>
6553                 (match_operand:VQN 1 "register_operand" "w"))
6554               (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6555             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6556   "TARGET_SIMD
6557    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6558   "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6559   [(set_attr "type" "neon_shift_imm_narrow_q")]
6562 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
6563   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6564         (SAT_TRUNC:<VNARROWQ>
6565           (<TRUNC_SHIFT>:<DWI>
6566             (plus:<DWI>
6567               (<TRUNCEXTEND>:<DWI>
6568                 (match_operand:SD_HSDI 1 "register_operand" "w"))
6569               (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6570             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6571   "TARGET_SIMD
6572    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6573   "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6574   [(set_attr "type" "neon_shift_imm_narrow_q")]
6577 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6578   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6579         (SAT_TRUNC:<VNARROWQ>
6580           (<TRUNC_SHIFT>:<V2XWIDE>
6581             (plus:<V2XWIDE>
6582               (<TRUNCEXTEND>:<V2XWIDE>
6583                 (match_operand:SD_HSDI 1 "register_operand"))
6584               (match_dup 3))
6585             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6586   "TARGET_SIMD"
6587   {
6588     /* Use this expander to create the rounding constant vector, which is
6589        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6590        RTL is generated when handling the DImode expanders.  */
6591     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6592     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6593     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6594   }
6597 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6598   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6599         (ALL_TRUNC:<VNARROWQ>
6600           (<TRUNC_SHIFT>:<V2XWIDE>
6601             (plus:<V2XWIDE>
6602               (<TRUNCEXTEND>:<V2XWIDE>
6603                 (match_operand:VQN 1 "register_operand"))
6604               (match_dup 3))
6605             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6606   "TARGET_SIMD"
6607   {
6608     if (<CODE> == TRUNCATE
6609         && INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6610       {
6611         rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
6612         emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
6613         DONE;
6614       }
6615     /* Use this expander to create the rounding constant vector, which is
6616        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6617        RTL is generated when handling the DImode expanders.  */
6618     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6619     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6620     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6621     operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6622     operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6623   }
6626 (define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
6627   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6628         (truncate:<VNARROWQ>
6629           (smin:VQN
6630             (smax:VQN
6631               (ashiftrt:VQN
6632                 (match_operand:VQN 1 "register_operand" "w")
6633                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6634               (match_operand:VQN 3 "aarch64_simd_imm_zero"))
6635             (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))]
6636   "TARGET_SIMD"
6637   "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6638   [(set_attr "type" "neon_shift_imm_narrow_q")]
6641 (define_insn "aarch64_sqshrun_n<mode>_insn"
6642   [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
6643         (smin:SD_HSDI
6644           (smax:SD_HSDI
6645             (ashiftrt:SD_HSDI
6646               (match_operand:SD_HSDI 1 "register_operand" "w")
6647               (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6648             (const_int 0))
6649           (const_int <half_mask>)))]
6650   "TARGET_SIMD"
6651   "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6652   [(set_attr "type" "neon_shift_imm_narrow_q")]
6655 (define_expand "aarch64_sqshrun_n<mode>"
6656   [(match_operand:<VNARROWQ> 0 "register_operand")
6657    (match_operand:SD_HSDI 1 "register_operand")
6658    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6659   "TARGET_SIMD"
6660   {
6661     rtx dst = gen_reg_rtx (<MODE>mode);
6662     emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
6663                                                  operands[2]));
6664     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6665     DONE;
6666   }
6669 (define_expand "aarch64_sqshrun_n<mode>"
6670   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6671         (truncate:<VNARROWQ>
6672           (smin:VQN
6673             (smax:VQN
6674               (ashiftrt:VQN
6675                 (match_operand:VQN 1 "register_operand")
6676                 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6677               (match_dup 3))
6678             (match_dup 4))))]
6679   "TARGET_SIMD"
6680   {
6681     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6682                                                  INTVAL (operands[2]));
6683     operands[3] = CONST0_RTX (<MODE>mode);
6684     operands[4]
6685       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6686                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6687   }
6690 (define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
6691   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6692         (truncate:<VNARROWQ>
6693           (smin:<V2XWIDE>
6694             (smax:<V2XWIDE>
6695               (ashiftrt:<V2XWIDE>
6696                 (plus:<V2XWIDE>
6697                   (sign_extend:<V2XWIDE>
6698                     (match_operand:VQN 1 "register_operand" "w"))
6699                   (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6700                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6701               (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
6702             (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
6703   "TARGET_SIMD
6704    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6705   "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6706   [(set_attr "type" "neon_shift_imm_narrow_q")]
6709 (define_insn "aarch64_sqrshrun_n<mode>_insn"
6710   [(set (match_operand:<DWI> 0 "register_operand" "=w")
6711         (smin:<DWI>
6712           (smax:<DWI>
6713             (ashiftrt:<DWI>
6714               (plus:<DWI>
6715                 (sign_extend:<DWI>
6716                   (match_operand:SD_HSDI 1 "register_operand" "w"))
6717                 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6718               (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6719             (const_int 0))
6720           (const_int <half_mask>)))]
6721   "TARGET_SIMD
6722    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6723   "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6724   [(set_attr "type" "neon_shift_imm_narrow_q")]
6727 (define_expand "aarch64_sqrshrun_n<mode>"
6728   [(match_operand:<VNARROWQ> 0 "register_operand")
6729    (match_operand:SD_HSDI 1 "register_operand")
6730    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6731   "TARGET_SIMD"
6732   {
6733     int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
6734     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6735     rtx rnd = immed_wide_int_const (rnd_wi, <DWI>mode);
6736     rtx dst = gen_reg_rtx (<DWI>mode);
6737     emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
6738     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6739     DONE;
6740   }
6743 (define_expand "aarch64_sqrshrun_n<mode>"
6744   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6745         (truncate:<VNARROWQ>
6746           (smin:<V2XWIDE>
6747             (smax:<V2XWIDE>
6748               (ashiftrt:<V2XWIDE>
6749                 (plus:<V2XWIDE>
6750                   (sign_extend:<V2XWIDE>
6751                     (match_operand:VQN 1 "register_operand"))
6752                   (match_dup 3))
6753                 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6754               (match_dup 4))
6755             (match_dup 5))))]
6756   "TARGET_SIMD"
6757   {
6758     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6759     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6760     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6761     operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6762     operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6763     operands[4] = CONST0_RTX (<V2XWIDE>mode);
6764     operands[5]
6765       = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
6766     operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]);
6767   }
6770 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le"
6771   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6772         (vec_concat:<VNARROWQ2>
6773           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6774           (ALL_TRUNC:<VNARROWQ>
6775             (SHIFTRT:VQN
6776               (match_operand:VQN 2 "register_operand" "w")
6777               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6778   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6779    && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6780   "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6781   [(set_attr "type" "neon_shift_imm_narrow_q")]
6784 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be"
6785   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6786         (vec_concat:<VNARROWQ2>
6787           (ALL_TRUNC:<VNARROWQ>
6788             (SHIFTRT:VQN
6789               (match_operand:VQN 2 "register_operand" "w")
6790               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6791           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6792   "TARGET_SIMD && BYTES_BIG_ENDIAN
6793    && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6794   "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6795   [(set_attr "type" "neon_shift_imm_narrow_q")]
6798 (define_expand "aarch64_<shrn_op><sra_op>shrn2_n<mode>"
6799   [(match_operand:<VNARROWQ2> 0 "register_operand")
6800    (match_operand:<VNARROWQ> 1 "register_operand")
6801    (ALL_TRUNC:<VNARROWQ>
6802      (SHIFTRT:VQN (match_operand:VQN 2 "register_operand")))
6803    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6804   "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6805   {
6806     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6807                                                  INTVAL (operands[3]));
6809     if (BYTES_BIG_ENDIAN)
6810       emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be (
6811                 operands[0], operands[1], operands[2], operands[3]));
6812     else
6813       emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le (
6814                 operands[0], operands[1], operands[2], operands[3]));
6815     DONE;
6816   }
6819 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le"
6820   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6821         (vec_concat:<VNARROWQ2>
6822           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6823           (ALL_TRUNC:<VNARROWQ>
6824             (<TRUNC_SHIFT>:<V2XWIDE>
6825               (plus:<V2XWIDE>
6826                 (<TRUNCEXTEND>:<V2XWIDE>
6827                   (match_operand:VQN 2 "register_operand" "w"))
6828                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6829               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6830   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6831    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6832   "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6833   [(set_attr "type" "neon_shift_imm_narrow_q")]
6836 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be"
6837   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6838         (vec_concat:<VNARROWQ2>
6839           (ALL_TRUNC:<VNARROWQ>
6840             (<TRUNC_SHIFT>:<V2XWIDE>
6841               (plus:<V2XWIDE>
6842                 (<TRUNCEXTEND>:<V2XWIDE>
6843                   (match_operand:VQN 2 "register_operand" "w"))
6844                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6845               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6846           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6847   "TARGET_SIMD && BYTES_BIG_ENDIAN
6848    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6849   "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6850   [(set_attr "type" "neon_shift_imm_narrow_q")]
6853 (define_expand "aarch64_<shrn_op>rshrn2_n<mode>"
6854   [(match_operand:<VNARROWQ2> 0 "register_operand")
6855    (match_operand:<VNARROWQ> 1 "register_operand")
6856    (ALL_TRUNC:<VNARROWQ> (match_operand:VQN 2 "register_operand"))
6857    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6858   "TARGET_SIMD"
6859   {
6860     if (<CODE> == TRUNCATE
6861         && INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6862       {
6863         rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
6864         emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
6865                                               operands[2], tmp));
6866         DONE;
6867       }
6868     /* Use this expander to create the rounding constant vector, which is
6869        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6870        RTL is generated when handling the DImode expanders.  */
6871     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6872     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
6873     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6874     rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6875     operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
6876     if (BYTES_BIG_ENDIAN)
6877       emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[0],
6878                                                               operands[1],
6879                                                               operands[2],
6880                                                               operands[3],
6881                                                               rnd));
6882     else
6883       emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[0],
6884                                                               operands[1],
6885                                                               operands[2],
6886                                                               operands[3],
6887                                                               rnd));
6888     DONE;
6889   }
6892 (define_insn "aarch64_sqshrun2_n<mode>_insn_le"
6893   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6894         (vec_concat:<VNARROWQ2>
6895           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6896           (truncate:<VNARROWQ>
6897             (smin:VQN
6898               (smax:VQN
6899                 (ashiftrt:VQN
6900                   (match_operand:VQN 2 "register_operand" "w")
6901                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6902                 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
6903               (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))))]
6904   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6905   "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6906   [(set_attr "type" "neon_shift_imm_narrow_q")]
6909 (define_insn "aarch64_sqshrun2_n<mode>_insn_be"
6910   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6911         (vec_concat:<VNARROWQ2>
6912           (truncate:<VNARROWQ>
6913             (smin:VQN
6914               (smax:VQN
6915                 (ashiftrt:VQN
6916                   (match_operand:VQN 2 "register_operand" "w")
6917                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6918                 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
6919               (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))
6920           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6921   "TARGET_SIMD && BYTES_BIG_ENDIAN"
6922   "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6923   [(set_attr "type" "neon_shift_imm_narrow_q")]
6926 (define_expand "aarch64_sqshrun2_n<mode>"
6927   [(match_operand:<VNARROWQ2> 0 "register_operand")
6928    (match_operand:<VNARROWQ> 1 "register_operand")
6929    (match_operand:VQN 2 "register_operand")
6930    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6931   "TARGET_SIMD"
6932   {
6933     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6934                                                  INTVAL (operands[3]));
6935     rtx zeros = CONST0_RTX (<MODE>mode);
6936     rtx half_umax
6937       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6938                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6939     if (BYTES_BIG_ENDIAN)
6940       emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[0],
6941                                 operands[1], operands[2], operands[3],
6942                                 zeros, half_umax));
6943     else
6944       emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[0],
6945                                 operands[1], operands[2], operands[3],
6946                                 zeros, half_umax));
6947     DONE;
6948   }
6951 (define_insn "aarch64_sqrshrun2_n<mode>_insn_le"
6952   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6953         (vec_concat:<VNARROWQ2>
6954           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6955           (truncate:<VNARROWQ>
6956             (smin:<V2XWIDE>
6957               (smax:<V2XWIDE>
6958                 (ashiftrt:<V2XWIDE>
6959                   (plus:<V2XWIDE>
6960                     (sign_extend:<V2XWIDE>
6961                       (match_operand:VQN 2 "register_operand" "w"))
6962                     (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6963                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6964                 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
6965               (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))]
6966   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6967    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6968   "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6969   [(set_attr "type" "neon_shift_imm_narrow_q")]
6972 (define_insn "aarch64_sqrshrun2_n<mode>_insn_be"
6973   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6974         (vec_concat:<VNARROWQ2>
6975           (truncate:<VNARROWQ>
6976             (smin:<V2XWIDE>
6977               (smax:<V2XWIDE>
6978                 (ashiftrt:<V2XWIDE>
6979                   (plus:<V2XWIDE>
6980                     (sign_extend:<V2XWIDE>
6981                       (match_operand:VQN 2 "register_operand" "w"))
6982                     (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6983                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6984                 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
6985               (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))
6986           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6987   "TARGET_SIMD && BYTES_BIG_ENDIAN
6988    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6989   "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6990   [(set_attr "type" "neon_shift_imm_narrow_q")]
6993 (define_expand "aarch64_sqrshrun2_n<mode>"
6994   [(match_operand:<VNARROWQ2> 0 "register_operand")
6995    (match_operand:<VNARROWQ> 1 "register_operand")
6996    (match_operand:VQN 2 "register_operand")
6997    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6998   "TARGET_SIMD"
6999   {
7000     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7001     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7002     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7003     rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7004     rtx zero = CONST0_RTX (<V2XWIDE>mode);
7005     rtx half_umax
7006       = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode,
7007                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7008     operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7009     if (BYTES_BIG_ENDIAN)
7010       emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[0],
7011                                 operands[1], operands[2], operands[3], rnd,
7012                                 zero, half_umax));
7013     else
7014       emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[0],
7015                                 operands[1], operands[2], operands[3], rnd,
7016                                 zero, half_umax));
7017     DONE;
7018   }
7021 ;; cm(eq|ge|gt|lt|le)
7022 ;; Note, we have constraints for Dz and Z as different expanders
7023 ;; have different ideas of what should be passed to this pattern.
7025 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7026   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7027         (neg:<V_INT_EQUIV>
7028           (COMPARISONS:<V_INT_EQUIV>
7029             (match_operand:VDQ_I 1 "register_operand")
7030             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")
7031           )))]
7032   "TARGET_SIMD"
7033   {@ [ cons: =0 , 1 , 2   ; attrs: type           ]
7034      [ w        , w , w   ; neon_compare<q>       ] cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7035      [ w        , w , ZDz ; neon_compare_zero<q>  ] cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0
7036   }
7039 (define_insn_and_split "aarch64_cm<optab>di"
7040   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
7041         (neg:DI
7042           (COMPARISONS:DI
7043             (match_operand:DI 1 "register_operand" "w,w,r")
7044             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
7045           )))
7046      (clobber (reg:CC CC_REGNUM))]
7047   "TARGET_SIMD"
7048   "#"
7049   "&& reload_completed"
7050   [(set (match_operand:DI 0 "register_operand")
7051         (neg:DI
7052           (COMPARISONS:DI
7053             (match_operand:DI 1 "register_operand")
7054             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7055           )))]
7056   {
7057     /* If we are in the general purpose register file,
7058        we split to a sequence of comparison and store.  */
7059     if (GP_REGNUM_P (REGNO (operands[0]))
7060         && GP_REGNUM_P (REGNO (operands[1])))
7061       {
7062         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
7063         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7064         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7065         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7066         DONE;
7067       }
7068     /* Otherwise, we expand to a similar pattern which does not
7069        clobber CC_REGNUM.  */
7070   }
7071   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
7074 (define_insn "*aarch64_cm<optab>di"
7075   [(set (match_operand:DI 0 "register_operand")
7076         (neg:DI
7077           (COMPARISONS:DI
7078             (match_operand:DI 1 "register_operand")
7079             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7080           )))]
7081   "TARGET_SIMD && reload_completed"
7082   {@ [ cons: =0 , 1 , 2   ; attrs: type        ]
7083      [ w        , w , w   ; neon_compare       ] cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
7084      [ w        , w , ZDz ; neon_compare_zero  ] cm<optab>\t%d0, %d1, #0
7085   }
7088 ;; cm(hs|hi)
7090 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7091   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7092         (neg:<V_INT_EQUIV>
7093           (UCOMPARISONS:<V_INT_EQUIV>
7094             (match_operand:VDQ_I 1 "register_operand" "w")
7095             (match_operand:VDQ_I 2 "register_operand" "w")
7096           )))]
7097   "TARGET_SIMD"
7098   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7099   [(set_attr "type" "neon_compare<q>")]
7102 (define_insn_and_split "aarch64_cm<optab>di"
7103   [(set (match_operand:DI 0 "register_operand" "=w,r")
7104         (neg:DI
7105           (UCOMPARISONS:DI
7106             (match_operand:DI 1 "register_operand" "w,r")
7107             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
7108           )))
7109     (clobber (reg:CC CC_REGNUM))]
7110   "TARGET_SIMD"
7111   "#"
7112   "&& reload_completed"
7113   [(set (match_operand:DI 0 "register_operand")
7114         (neg:DI
7115           (UCOMPARISONS:DI
7116             (match_operand:DI 1 "register_operand")
7117             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7118           )))]
7119   {
7120     /* If we are in the general purpose register file,
7121        we split to a sequence of comparison and store.  */
7122     if (GP_REGNUM_P (REGNO (operands[0]))
7123         && GP_REGNUM_P (REGNO (operands[1])))
7124       {
7125         machine_mode mode = CCmode;
7126         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7127         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7128         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7129         DONE;
7130       }
7131     /* Otherwise, we expand to a similar pattern which does not
7132        clobber CC_REGNUM.  */
7133   }
7134   [(set_attr "type" "neon_compare,multiple")]
7137 (define_insn "*aarch64_cm<optab>di"
7138   [(set (match_operand:DI 0 "register_operand" "=w")
7139         (neg:DI
7140           (UCOMPARISONS:DI
7141             (match_operand:DI 1 "register_operand" "w")
7142             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
7143           )))]
7144   "TARGET_SIMD && reload_completed"
7145   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
7146   [(set_attr "type" "neon_compare")]
7149 ;; cmtst
7151 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
7152 ;; we don't have any insns using ne, and aarch64_vcond outputs
7153 ;; not (neg (eq (and x y) 0))
7154 ;; which is rewritten by simplify_rtx as
7155 ;; plus (eq (and x y) 0) -1.
7157 (define_insn "aarch64_cmtst<mode><vczle><vczbe>"
7158   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7159         (plus:<V_INT_EQUIV>
7160           (eq:<V_INT_EQUIV>
7161             (and:VDQ_I
7162               (match_operand:VDQ_I 1 "register_operand" "w")
7163               (match_operand:VDQ_I 2 "register_operand" "w"))
7164             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
7165           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
7166   ]
7167   "TARGET_SIMD"
7168   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7169   [(set_attr "type" "neon_tst<q>")]
7172 ;; One can also get a cmtsts by having to combine a
7173 ;; not (neq (eq x 0)) in which case you rewrite it to
7174 ;; a comparison against itself
7176 (define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
7177   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7178         (plus:<V_INT_EQUIV>
7179           (eq:<V_INT_EQUIV>
7180             (match_operand:VDQ_I 1 "register_operand" "w")
7181             (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
7182           (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
7183   ]
7184   "TARGET_SIMD"
7185   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
7186   [(set_attr "type" "neon_tst<q>")]
7189 (define_insn_and_split "aarch64_cmtstdi"
7190   [(set (match_operand:DI 0 "register_operand" "=w,r")
7191         (neg:DI
7192           (ne:DI
7193             (and:DI
7194               (match_operand:DI 1 "register_operand" "w,r")
7195               (match_operand:DI 2 "register_operand" "w,r"))
7196             (const_int 0))))
7197     (clobber (reg:CC CC_REGNUM))]
7198   "TARGET_SIMD"
7199   "#"
7200   "&& reload_completed"
7201   [(set (match_operand:DI 0 "register_operand")
7202         (neg:DI
7203           (ne:DI
7204             (and:DI
7205               (match_operand:DI 1 "register_operand")
7206               (match_operand:DI 2 "register_operand"))
7207             (const_int 0))))]
7208   {
7209     /* If we are in the general purpose register file,
7210        we split to a sequence of comparison and store.  */
7211     if (GP_REGNUM_P (REGNO (operands[0]))
7212         && GP_REGNUM_P (REGNO (operands[1])))
7213       {
7214         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
7215         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
7216         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
7217         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
7218         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7219         DONE;
7220       }
7221     /* Otherwise, we expand to a similar pattern which does not
7222        clobber CC_REGNUM.  */
7223   }
7224   [(set_attr "type" "neon_tst,multiple")]
7227 (define_insn "*aarch64_cmtstdi<vczle><vczbe>"
7228   [(set (match_operand:DI 0 "register_operand" "=w")
7229         (neg:DI
7230           (ne:DI
7231             (and:DI
7232               (match_operand:DI 1 "register_operand" "w")
7233               (match_operand:DI 2 "register_operand" "w"))
7234             (const_int 0))))]
7235   "TARGET_SIMD"
7236   "cmtst\t%d0, %d1, %d2"
7237   [(set_attr "type" "neon_tst")]
7240 ;; fcm(eq|ge|gt|le|lt)
7242 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7243   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7244         (neg:<V_INT_EQUIV>
7245           (COMPARISONS:<V_INT_EQUIV>
7246             (match_operand:VHSDF_HSDF 1 "register_operand")
7247             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero")
7248           )))]
7249   "TARGET_SIMD"
7250   {@ [ cons: =0 , 1 , 2    ]
7251      [ w        , w , w    ] fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7252      [ w        , w , YDz  ] fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0
7253   }
7254   [(set_attr "type" "neon_fp_compare_<stype><q>")]
7257 ;; fac(ge|gt)
7258 ;; Note we can also handle what would be fac(le|lt) by
7259 ;; generating fac(ge|gt).
7261 (define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
7262   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7263         (neg:<V_INT_EQUIV>
7264           (FAC_COMPARISONS:<V_INT_EQUIV>
7265             (abs:VHSDF_HSDF
7266               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
7267             (abs:VHSDF_HSDF
7268               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
7269   )))]
7270   "TARGET_SIMD"
7271   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7272   [(set_attr "type" "neon_fp_compare_<stype><q>")]
7275 ;; addp
7277 ;; ADDP with two registers semantically concatenates them and performs
7278 ;; a pairwise addition on the result.  For 128-bit input modes represent this
7279 ;; as a concatentation of the pairwise addition results of the two input
7280 ;; registers.  This allow us to avoid using intermediate 256-bit modes.
7281 (define_insn "aarch64_addp<mode>_insn"
7282   [(set (match_operand:VQ_I 0 "register_operand" "=w")
7283         (vec_concat:VQ_I
7284           (plus:<VHALF>
7285             (vec_select:<VHALF>
7286               (match_operand:VQ_I 1 "register_operand" "w")
7287               (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))
7288             (vec_select:<VHALF>
7289               (match_dup 1)
7290               (match_operand:VQ_I 4 "vect_par_cnst_even_or_odd_half")))
7291           (plus:<VHALF>
7292             (vec_select:<VHALF>
7293               (match_operand:VQ_I 2 "register_operand" "w")
7294               (match_dup 3))
7295             (vec_select:<VHALF>
7296               (match_dup 2)
7297               (match_dup 4)))))]
7298   "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7299   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7300   [(set_attr "type" "neon_reduc_add<q>")]
7303 ;; For 64-bit input modes an ADDP is represented as a concatentation
7304 ;; of the input registers into an 128-bit register which is then fed
7305 ;; into a pairwise add.  That way we avoid having to create intermediate
7306 ;; 32-bit vector modes.
7307 (define_insn "aarch64_addp<mode><vczle><vczbe>_insn"
7308   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
7309         (plus:VD_BHSI
7310           (vec_select:VD_BHSI
7311             (vec_concat:<VDBL>
7312               (match_operand:VD_BHSI 1 "register_operand" "w")
7313               (match_operand:VD_BHSI 2 "register_operand" "w"))
7314             (match_operand:<VDBL> 3 "vect_par_cnst_even_or_odd_half"))
7315           (vec_select:VD_BHSI
7316             (vec_concat:<VDBL>
7317               (match_dup 1)
7318               (match_dup 2))
7319             (match_operand:<VDBL> 4 "vect_par_cnst_even_or_odd_half"))))]
7320   "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7321   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7322   [(set_attr "type" "neon_reduc_add<q>")]
7325 ;; A common usecase of 64-bit ADDP is to have both operands come from the same
7326 ;; 128-bit vector and produce the pairwise addition results in the lower half.
7327 ;; Split into the 128-bit ADDP form and extract the low half.
7328 (define_insn_and_split "*aarch64_addp_same_reg<mode>"
7329   [(set (match_operand:<VHALF> 0 "register_operand" "=w")
7330         (plus:<VHALF>
7331           (vec_select:<VHALF>
7332             (match_operand:VQ_I 1 "register_operand" "w")
7333             (match_operand:VQ_I 2 "vect_par_cnst_even_or_odd_half"))
7334           (vec_select:<VHALF>
7335             (match_dup 1)
7336             (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))))]
7337   "TARGET_SIMD && !rtx_equal_p (operands[2], operands[3])"
7338   "#"
7339   "&& 1"
7340   [(const_int 0)]
7341   {
7342     rtx scratch;
7343     if (can_create_pseudo_p ())
7344       scratch = gen_reg_rtx (<MODE>mode);
7345     else
7346       scratch = lowpart_subreg (<MODE>mode, operands[0], <VHALF>mode);
7348     emit_insn (gen_aarch64_addp<mode>_insn (scratch, operands[1], operands[1],
7349                                             operands[2], operands[3]));
7350     emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, scratch));
7351     DONE;
7352   }
7355 (define_expand "aarch64_addp<mode>"
7356   [(match_operand:VDQ_I 0 "register_operand")
7357    (match_operand:VDQ_I 1 "register_operand")
7358    (match_operand:VDQ_I 2 "register_operand")]
7359   "TARGET_SIMD"
7360   {
7361     int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant ();
7362     if (known_eq (GET_MODE_BITSIZE (<MODE>mode), 128))
7363       nunits /= 2;
7364     rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
7365     rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
7366     if (BYTES_BIG_ENDIAN)
7367       std::swap (operands[1], operands[2]);
7368     emit_insn (gen_aarch64_addp<mode>_insn (operands[0], operands[1],
7369                                             operands[2], par_even, par_odd));
7370     DONE;
7371   }
7374 ;; sqrt
7376 (define_expand "sqrt<mode>2"
7377   [(set (match_operand:VHSDF 0 "register_operand")
7378         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
7379   "TARGET_SIMD"
7381   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
7382     DONE;
7385 (define_insn "*sqrt<mode>2<vczle><vczbe>"
7386   [(set (match_operand:VHSDF 0 "register_operand" "=w")
7387         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
7388   "TARGET_SIMD"
7389   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
7390   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
7393 ;; Patterns for vector struct loads and stores.
7395 (define_insn "aarch64_simd_ld2<vstruct_elt>"
7396   [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
7397         (unspec:VSTRUCT_2Q [
7398           (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
7399           UNSPEC_LD2))]
7400   "TARGET_SIMD"
7401   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7402   [(set_attr "type" "neon_load2_2reg<q>")]
7405 (define_insn "aarch64_simd_ld2r<vstruct_elt>"
7406   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7407         (unspec:VSTRUCT_2QD [
7408           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7409           UNSPEC_LD2_DUP))]
7410   "TARGET_SIMD"
7411   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7412   [(set_attr "type" "neon_load2_all_lanes<q>")]
7415 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7416   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7417         (unspec:VSTRUCT_2QD [
7418                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7419                 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
7420                 (match_operand:SI 3 "immediate_operand" "i")]
7421                 UNSPEC_LD2_LANE))]
7422   "TARGET_SIMD"
7423   {
7424     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7425                                            INTVAL (operands[3]));
7426     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
7427   }
7428   [(set_attr "type" "neon_load2_one_lane")]
7431 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7432   [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
7433         (unspec:VSTRUCT_2Q [
7434                 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
7435                 UNSPEC_LD2))]
7436   "TARGET_SIMD"
7438   if (BYTES_BIG_ENDIAN)
7439     {
7440       rtx tmp = gen_reg_rtx (<MODE>mode);
7441       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7442                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7443       emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
7444       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7445     }
7446   else
7447     emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
7448   DONE;
7451 (define_insn "aarch64_simd_st2<vstruct_elt>"
7452   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
7453         (unspec:VSTRUCT_2Q [
7454                 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
7455                 UNSPEC_ST2))]
7456   "TARGET_SIMD"
7457   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7458   [(set_attr "type" "neon_store2_2reg<q>")]
7461 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7462 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7463   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7464         (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
7465                      (match_operand:SI 2 "immediate_operand" "i")]
7466                      UNSPEC_ST2_LANE))]
7467   "TARGET_SIMD"
7468   {
7469     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7470                                            INTVAL (operands[2]));
7471     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
7472   }
7473   [(set_attr "type" "neon_store2_one_lane<q>")]
7476 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7477   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
7478         (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
7479                    UNSPEC_ST2))]
7480   "TARGET_SIMD"
7482   if (BYTES_BIG_ENDIAN)
7483     {
7484       rtx tmp = gen_reg_rtx (<MODE>mode);
7485       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7486                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7487       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7488       emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
7489     }
7490   else
7491     emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
7492   DONE;
7495 (define_insn "aarch64_simd_ld3<vstruct_elt>"
7496   [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
7497         (unspec:VSTRUCT_3Q [
7498           (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
7499           UNSPEC_LD3))]
7500   "TARGET_SIMD"
7501   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7502   [(set_attr "type" "neon_load3_3reg<q>")]
7505 (define_insn "aarch64_simd_ld3r<vstruct_elt>"
7506   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7507         (unspec:VSTRUCT_3QD [
7508           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7509           UNSPEC_LD3_DUP))]
7510   "TARGET_SIMD"
7511   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7512   [(set_attr "type" "neon_load3_all_lanes<q>")]
7515 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7516   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7517         (unspec:VSTRUCT_3QD [
7518                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7519                 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
7520                 (match_operand:SI 3 "immediate_operand" "i")]
7521                 UNSPEC_LD3_LANE))]
7522   "TARGET_SIMD"
7524     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7525                                            INTVAL (operands[3]));
7526     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
7528   [(set_attr "type" "neon_load3_one_lane")]
7531 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7532   [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
7533         (unspec:VSTRUCT_3Q [
7534                 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
7535                 UNSPEC_LD3))]
7536   "TARGET_SIMD"
7538   if (BYTES_BIG_ENDIAN)
7539     {
7540       rtx tmp = gen_reg_rtx (<MODE>mode);
7541       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7542                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7543       emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
7544       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7545     }
7546   else
7547     emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
7548   DONE;
7551 (define_insn "aarch64_simd_st3<vstruct_elt>"
7552   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
7553         (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
7554                    UNSPEC_ST3))]
7555   "TARGET_SIMD"
7556   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7557   [(set_attr "type" "neon_store3_3reg<q>")]
7560 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7561 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7562   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7563         (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
7564                      (match_operand:SI 2 "immediate_operand" "i")]
7565                      UNSPEC_ST3_LANE))]
7566   "TARGET_SIMD"
7567   {
7568     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7569                                            INTVAL (operands[2]));
7570     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
7571   }
7572   [(set_attr "type" "neon_store3_one_lane<q>")]
7575 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7576   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
7577         (unspec:VSTRUCT_3Q [
7578                 (match_operand:VSTRUCT_3Q 1 "register_operand")]
7579                 UNSPEC_ST3))]
7580   "TARGET_SIMD"
7582   if (BYTES_BIG_ENDIAN)
7583     {
7584       rtx tmp = gen_reg_rtx (<MODE>mode);
7585       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7586                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7587       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7588       emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
7589     }
7590   else
7591     emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
7592   DONE;
7595 (define_insn "aarch64_simd_ld4<vstruct_elt>"
7596   [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
7597         (unspec:VSTRUCT_4Q [
7598           (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
7599           UNSPEC_LD4))]
7600   "TARGET_SIMD"
7601   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7602   [(set_attr "type" "neon_load4_4reg<q>")]
7605 (define_insn "aarch64_simd_ld4r<vstruct_elt>"
7606   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7607         (unspec:VSTRUCT_4QD [
7608           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7609           UNSPEC_LD4_DUP))]
7610   "TARGET_SIMD"
7611   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7612   [(set_attr "type" "neon_load4_all_lanes<q>")]
7615 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7616   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7617         (unspec:VSTRUCT_4QD [
7618                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7619                 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7620                 (match_operand:SI 3 "immediate_operand" "i")]
7621                 UNSPEC_LD4_LANE))]
7622   "TARGET_SIMD"
7624     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7625                                            INTVAL (operands[3]));
7626     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7628   [(set_attr "type" "neon_load4_one_lane")]
7631 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7632   [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7633         (unspec:VSTRUCT_4Q [
7634                 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7635                 UNSPEC_LD4))]
7636   "TARGET_SIMD"
7638   if (BYTES_BIG_ENDIAN)
7639     {
7640       rtx tmp = gen_reg_rtx (<MODE>mode);
7641       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7642                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7643       emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7644       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7645     }
7646   else
7647     emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7648   DONE;
7651 (define_insn "aarch64_simd_st4<vstruct_elt>"
7652   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7653         (unspec:VSTRUCT_4Q [
7654                 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7655                 UNSPEC_ST4))]
7656   "TARGET_SIMD"
7657   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7658   [(set_attr "type" "neon_store4_4reg<q>")]
7661 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7662 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7663   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7664         (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7665                      (match_operand:SI 2 "immediate_operand" "i")]
7666                      UNSPEC_ST4_LANE))]
7667   "TARGET_SIMD"
7668   {
7669     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7670                                            INTVAL (operands[2]));
7671     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7672   }
7673   [(set_attr "type" "neon_store4_one_lane<q>")]
7676 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7677   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7678         (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7679                    UNSPEC_ST4))]
7680   "TARGET_SIMD"
7682   if (BYTES_BIG_ENDIAN)
7683     {
7684       rtx tmp = gen_reg_rtx (<MODE>mode);
7685       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7686                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7687       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7688       emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7689     }
7690   else
7691     emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7692   DONE;
7695 ;; Patterns for rcpc3 vector lane loads and stores.
7697 (define_insn "aarch64_vec_stl1_lanes<mode>_lane<Vel>"
7698   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Q")
7699         (unspec:BLK [(match_operand:V12DIF 1 "register_operand" "w")
7700                      (match_operand:SI 2 "immediate_operand" "i")]
7701                      UNSPEC_STL1_LANE))]
7702   "TARGET_RCPC3"
7703   {
7704     operands[2] = aarch64_endian_lane_rtx (<MODE>mode,
7705                                            INTVAL (operands[2]));
7706     return "stl1\\t{%S1.<Vetype>}[%2], %0";
7707   }
7708   [(set_attr "type" "neon_store2_one_lane")]
7711 (define_expand "aarch64_vec_stl1_lane<mode>"
7712  [(match_operand:DI 0 "register_operand")
7713   (match_operand:V12DIF 1 "register_operand")
7714   (match_operand:SI 2 "immediate_operand")]
7715   "TARGET_RCPC3"
7717   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7718   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7720   aarch64_simd_lane_bounds (operands[2], 0,
7721                             GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7722   emit_insn (gen_aarch64_vec_stl1_lanes<mode>_lane<Vel> (mem,
7723                                         operands[1], operands[2]));
7724   DONE;
7727 (define_insn "aarch64_vec_ldap1_lanes<mode>_lane<Vel>"
7728   [(set (match_operand:V12DIF 0 "register_operand" "=w")
7729         (unspec:V12DIF [
7730                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Q")
7731                 (match_operand:V12DIF 2 "register_operand" "0")
7732                 (match_operand:SI 3 "immediate_operand" "i")]
7733                 UNSPEC_LDAP1_LANE))]
7734   "TARGET_RCPC3"
7735   {
7736     operands[3] = aarch64_endian_lane_rtx (<MODE>mode,
7737                                            INTVAL (operands[3]));
7738     return "ldap1\\t{%S0.<Vetype>}[%3], %1";
7739   }
7740   [(set_attr "type" "neon_load2_one_lane")]
7743 (define_expand "aarch64_vec_ldap1_lane<mode>"
7744   [(match_operand:V12DIF 0 "register_operand")
7745         (match_operand:DI 1 "register_operand")
7746         (match_operand:V12DIF 2 "register_operand")
7747         (match_operand:SI 3 "immediate_operand")]
7748   "TARGET_RCPC3"
7750   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7751   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7753   aarch64_simd_lane_bounds (operands[3], 0,
7754                             GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7755   emit_insn (gen_aarch64_vec_ldap1_lanes<mode>_lane<Vel> (operands[0],
7756                                 mem, operands[2], operands[3]));
7757   DONE;
7760 (define_insn_and_split "aarch64_rev_reglist<mode>"
7761 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7762         (unspec:VSTRUCT_QD
7763                    [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7764                     (match_operand:V16QI 2 "register_operand" "w")]
7765                    UNSPEC_REV_REGLIST))]
7766   "TARGET_SIMD"
7767   "#"
7768   "&& reload_completed"
7769   [(const_int 0)]
7771   int i;
7772   int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7773   for (i = 0; i < nregs; i++)
7774     {
7775       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7776       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7777       emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7778     }
7779   DONE;
7781   [(set_attr "type" "neon_tbl1_q")
7782    (set_attr "length" "<insn_count>")]
7785 ;; Reload patterns for AdvSIMD register list operands.
7787 (define_expand "mov<mode>"
7788   [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7789         (match_operand:VSTRUCT_QD 1 "general_operand"))]
7790   "TARGET_FLOAT"
7792   if (can_create_pseudo_p ())
7793     {
7794       if (GET_CODE (operands[0]) != REG)
7795         operands[1] = force_reg (<MODE>mode, operands[1]);
7796     }
7799 (define_expand "mov<mode>"
7800   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7801         (match_operand:VSTRUCT 1 "general_operand"))]
7802   "TARGET_FLOAT"
7804   if (can_create_pseudo_p ())
7805     {
7806       if (GET_CODE (operands[0]) != REG)
7807         operands[1] = force_reg (<MODE>mode, operands[1]);
7808     }
7811 (define_expand "movv8di"
7812   [(set (match_operand:V8DI 0 "nonimmediate_operand")
7813         (match_operand:V8DI 1 "general_operand"))]
7814   ""
7816   if (can_create_pseudo_p () && MEM_P (operands[0]))
7817     operands[1] = force_reg (V8DImode, operands[1]);
7820 (define_expand "aarch64_ld1x3<vstruct_elt>"
7821   [(match_operand:VSTRUCT_3QD 0 "register_operand")
7822    (match_operand:DI 1 "register_operand")]
7823   "TARGET_SIMD"
7825   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7826   emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
7827   DONE;
7830 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
7831   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7832         (unspec:VSTRUCT_3QD
7833           [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
7834           UNSPEC_LD1))]
7835   "TARGET_SIMD"
7836   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7837   [(set_attr "type" "neon_load1_3reg<q>")]
7840 (define_expand "aarch64_ld1x4<vstruct_elt>"
7841   [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7842    (match_operand:DI 1 "register_operand" "r")]
7843   "TARGET_SIMD"
7845   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7846   emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
7847   DONE;
7850 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
7851   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7852         (unspec:VSTRUCT_4QD
7853           [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
7854         UNSPEC_LD1))]
7855   "TARGET_SIMD"
7856   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7857   [(set_attr "type" "neon_load1_4reg<q>")]
7860 (define_expand "aarch64_st1x2<vstruct_elt>"
7861   [(match_operand:DI 0 "register_operand")
7862    (match_operand:VSTRUCT_2QD 1 "register_operand")]
7863   "TARGET_SIMD"
7865   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7866   emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
7867   DONE;
7870 (define_insn "aarch64_st1_x2_<vstruct_elt>"
7871   [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
7872         (unspec:VSTRUCT_2QD
7873                 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
7874                 UNSPEC_ST1))]
7875   "TARGET_SIMD"
7876   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7877   [(set_attr "type" "neon_store1_2reg<q>")]
7880 (define_expand "aarch64_st1x3<vstruct_elt>"
7881   [(match_operand:DI 0 "register_operand")
7882    (match_operand:VSTRUCT_3QD 1 "register_operand")]
7883   "TARGET_SIMD"
7885   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7886   emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
7887   DONE;
7890 (define_insn "aarch64_st1_x3_<vstruct_elt>"
7891   [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
7892         (unspec:VSTRUCT_3QD
7893                 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
7894                 UNSPEC_ST1))]
7895   "TARGET_SIMD"
7896   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7897   [(set_attr "type" "neon_store1_3reg<q>")]
7900 (define_expand "aarch64_st1x4<vstruct_elt>"
7901   [(match_operand:DI 0 "register_operand" "")
7902    (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
7903   "TARGET_SIMD"
7905   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7906   emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
7907   DONE;
7910 (define_insn "aarch64_st1_x4_<vstruct_elt>"
7911   [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
7912         (unspec:VSTRUCT_4QD
7913                 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
7914                 UNSPEC_ST1))]
7915   "TARGET_SIMD"
7916   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7917   [(set_attr "type" "neon_store1_4reg<q>")]
7920 (define_insn "*aarch64_mov<mode>"
7921   [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand")
7922         (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand"))]
7923   "TARGET_SIMD && !BYTES_BIG_ENDIAN
7924    && (register_operand (operands[0], <MODE>mode)
7925        || register_operand (operands[1], <MODE>mode))"
7926   {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
7927      [ w        , w   ; multiple                       , <insn_count>  ] #
7928      [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
7929      [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1
7930   }
7933 (define_insn "*aarch64_mov<mode>"
7934   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand")
7935         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand"))]
7936   "TARGET_SIMD && !BYTES_BIG_ENDIAN
7937    && (register_operand (operands[0], <MODE>mode)
7938        || register_operand (operands[1], <MODE>mode))"
7939   {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
7940      [ w        , w   ; multiple                       , <insn_count>  ] #
7941      [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.16b - %<Vendreg>1.16b}, %0
7942      [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.16b - %<Vendreg>0.16b}, %1
7943   }
7946 (define_insn "*aarch64_movv8di"
7947   [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
7948         (match_operand:V8DI 1 "general_operand" " r,r,m"))]
7949   "(register_operand (operands[0], V8DImode)
7950     || register_operand (operands[1], V8DImode))"
7951   "#"
7952   [(set_attr "type" "multiple,multiple,multiple")
7953    (set_attr "length" "32,16,16")]
7956 (define_insn "aarch64_be_ld1<mode>"
7957   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
7958         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
7959                              "aarch64_simd_struct_operand" "Utv")]
7960         UNSPEC_LD1))]
7961   "TARGET_SIMD"
7962   "ld1\\t{%0<Vmtype>}, %1"
7963   [(set_attr "type" "neon_load1_1reg<q>")]
7966 (define_insn "aarch64_be_st1<mode>"
7967   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
7968         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
7969         UNSPEC_ST1))]
7970   "TARGET_SIMD"
7971   "st1\\t{%1<Vmtype>}, %0"
7972   [(set_attr "type" "neon_store1_1reg<q>")]
7975 (define_insn "*aarch64_be_mov<mode>"
7976   [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
7977         (match_operand:VSTRUCT_2D 1 "general_operand"))]
7978   "TARGET_FLOAT
7979    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7980    && (register_operand (operands[0], <MODE>mode)
7981        || register_operand (operands[1], <MODE>mode))"
7982   {@ [ cons: =0 , 1 ; attrs: type , length ]
7983      [ w        , w ; multiple    , 8      ] #
7984      [ m        , w ; neon_stp    , 4      ] stp\t%d1, %R1, %0
7985      [ w        , m ; neon_ldp    , 4      ] ldp\t%d0, %R0, %1
7986   }
7989 (define_insn "*aarch64_be_mov<mode>"
7990   [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
7991         (match_operand:VSTRUCT_2Q 1 "general_operand"))]
7992   "TARGET_FLOAT
7993    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7994    && (register_operand (operands[0], <MODE>mode)
7995        || register_operand (operands[1], <MODE>mode))"
7996   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
7997      [ w        , w ; multiple    , simd , 8      ] #
7998      [ m        , w ; neon_stp_q  , *    , 4      ] stp\t%q1, %R1, %0
7999      [ w        , m ; neon_ldp_q  , *    , 4      ] ldp\t%q0, %R0, %1
8000   }
8003 (define_insn "*aarch64_be_movoi"
8004   [(set (match_operand:OI 0 "nonimmediate_operand")
8005         (match_operand:OI 1 "general_operand"))]
8006   "TARGET_FLOAT
8007    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8008    && (register_operand (operands[0], OImode)
8009        || register_operand (operands[1], OImode))"
8010   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
8011      [ w        , w ; multiple    , simd , 8      ] #
8012      [ m        , w ; neon_stp_q  , *    , 4      ] stp\t%q1, %R1, %0
8013      [ w        , m ; neon_ldp_q  , *    , 4      ] ldp\t%q0, %R0, %1
8014   }
8017 (define_insn "*aarch64_be_mov<mode>"
8018   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
8019         (match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
8020   "TARGET_FLOAT
8021    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8022    && (register_operand (operands[0], <MODE>mode)
8023        || register_operand (operands[1], <MODE>mode))"
8024   "#"
8025   [(set_attr "type" "multiple")
8026    (set_attr "arch" "fp<q>,*,*")
8027    (set_attr "length" "12,8,8")]
8030 (define_insn "*aarch64_be_movci"
8031   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
8032         (match_operand:CI 1 "general_operand"      " w,w,o"))]
8033   "TARGET_FLOAT
8034    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8035    && (register_operand (operands[0], CImode)
8036        || register_operand (operands[1], CImode))"
8037   "#"
8038   [(set_attr "type" "multiple")
8039    (set_attr "arch" "simd,*,*")
8040    (set_attr "length" "12,8,8")]
8043 (define_insn "*aarch64_be_mov<mode>"
8044   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
8045         (match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
8046   "TARGET_FLOAT
8047    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8048    && (register_operand (operands[0], <MODE>mode)
8049        || register_operand (operands[1], <MODE>mode))"
8050   "#"
8051   [(set_attr "type" "multiple")
8052    (set_attr "arch" "fp<q>,*,*")
8053    (set_attr "length" "16,8,8")]
8056 (define_insn "*aarch64_be_movxi"
8057   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
8058         (match_operand:XI 1 "general_operand"      " w,w,o"))]
8059   "TARGET_FLOAT
8060    && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8061    && (register_operand (operands[0], XImode)
8062        || register_operand (operands[1], XImode))"
8063   "#"
8064   [(set_attr "type" "multiple")
8065    (set_attr "arch" "simd,*,*")
8066    (set_attr "length" "16,8,8")]
8069 (define_split
8070   [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
8071         (match_operand:VSTRUCT_2QD 1 "register_operand"))]
8072   "TARGET_FLOAT && reload_completed"
8073   [(const_int 0)]
8075   aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
8076   DONE;
8079 (define_split
8080   [(set (match_operand:OI 0 "register_operand")
8081         (match_operand:OI 1 "register_operand"))]
8082   "TARGET_FLOAT && reload_completed"
8083   [(const_int 0)]
8085   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
8086   DONE;
8089 (define_split
8090   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
8091         (match_operand:VSTRUCT_3QD 1 "general_operand"))]
8092   "TARGET_FLOAT && reload_completed"
8093   [(const_int 0)]
8095   if (register_operand (operands[0], <MODE>mode)
8096       && register_operand (operands[1], <MODE>mode))
8097     {
8098       aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
8099       DONE;
8100     }
8101   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8102     {
8103       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8104       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8105       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8106                                            <MODE>mode, 0),
8107                       simplify_gen_subreg (pair_mode, operands[1],
8108                                            <MODE>mode, 0));
8109       emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
8110                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
8111                                                         operands[0],
8112                                                         <MODE>mode,
8113                                                         2 * elt_size)),
8114                       gen_lowpart (<VSTRUCT_ELT>mode,
8115                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
8116                                                         operands[1],
8117                                                         <MODE>mode,
8118                                                         2 * elt_size)));
8119       DONE;
8120     }
8121   else
8122     FAIL;
8125 (define_split
8126   [(set (match_operand:CI 0 "nonimmediate_operand")
8127         (match_operand:CI 1 "general_operand"))]
8128   "TARGET_FLOAT && reload_completed"
8129   [(const_int 0)]
8131   if (register_operand (operands[0], CImode)
8132       && register_operand (operands[1], CImode))
8133     {
8134       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
8135       DONE;
8136     }
8137   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8138     {
8139       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
8140                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
8141       emit_move_insn (gen_lowpart (V16QImode,
8142                                    simplify_gen_subreg (TImode, operands[0],
8143                                                         CImode, 32)),
8144                       gen_lowpart (V16QImode,
8145                                    simplify_gen_subreg (TImode, operands[1],
8146                                                         CImode, 32)));
8147       DONE;
8148     }
8149   else
8150     FAIL;
8153 (define_split
8154   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
8155         (match_operand:VSTRUCT_4QD 1 "general_operand"))]
8156   "TARGET_FLOAT && reload_completed"
8157   [(const_int 0)]
8159   if (register_operand (operands[0], <MODE>mode)
8160       && register_operand (operands[1], <MODE>mode))
8161     {
8162       aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
8163       DONE;
8164     }
8165   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8166     {
8167       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8168       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8169       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8170                                            <MODE>mode, 0),
8171                       simplify_gen_subreg (pair_mode, operands[1],
8172                                            <MODE>mode, 0));
8173       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8174                                            <MODE>mode, 2 * elt_size),
8175                       simplify_gen_subreg (pair_mode, operands[1],
8176                                            <MODE>mode, 2 * elt_size));
8177       DONE;
8178     }
8179   else
8180     FAIL;
8183 (define_split
8184   [(set (match_operand:XI 0 "nonimmediate_operand")
8185         (match_operand:XI 1 "general_operand"))]
8186   "TARGET_FLOAT && reload_completed"
8187   [(const_int 0)]
8189   if (register_operand (operands[0], XImode)
8190       && register_operand (operands[1], XImode))
8191     {
8192       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
8193       DONE;
8194     }
8195   else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8196     {
8197       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
8198                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
8199       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
8200                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
8201       DONE;
8202     }
8203   else
8204     FAIL;
8207 (define_split
8208   [(set (match_operand:V8DI 0 "nonimmediate_operand")
8209         (match_operand:V8DI 1 "general_operand"))]
8210   "reload_completed"
8211   [(const_int 0)]
8213   if (register_operand (operands[0], V8DImode)
8214       && register_operand (operands[1], V8DImode))
8215     {
8216       aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
8217       DONE;
8218     }
8219   else if ((register_operand (operands[0], V8DImode)
8220             && memory_operand (operands[1], V8DImode))
8221            || (memory_operand (operands[0], V8DImode)
8222                && register_operand (operands[1], V8DImode)))
8223     {
8224       /* V8DI only guarantees 8-byte alignment, whereas TImode requires 16.  */
8225       auto mode = STRICT_ALIGNMENT ? DImode : TImode;
8226       int increment = GET_MODE_SIZE (mode);
8227       std::pair<rtx, rtx> last_pair = {};
8228       for (int offset = 0; offset < 64; offset += increment)
8229         {
8230           std::pair<rtx, rtx> pair = {
8231             simplify_gen_subreg (mode, operands[0], V8DImode, offset),
8232             simplify_gen_subreg (mode, operands[1], V8DImode, offset)
8233           };
8234           if (register_operand (pair.first, mode)
8235               && reg_overlap_mentioned_p (pair.first, pair.second))
8236             last_pair = pair;
8237           else
8238             emit_move_insn (pair.first, pair.second);
8239         }
8240       if (last_pair.first)
8241         emit_move_insn (last_pair.first, last_pair.second);
8242       DONE;
8243     }
8244   else
8245     FAIL;
8248 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
8249   [(match_operand:VSTRUCT_QD 0 "register_operand")
8250    (match_operand:DI 1 "register_operand")]
8251   "TARGET_SIMD"
8253   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8254   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8256   emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
8257   DONE;
8260 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8261   [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
8262         (unspec:VSTRUCT_2DNX [
8263           (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
8264           UNSPEC_LD2_DREG))]
8265   "TARGET_SIMD"
8266   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8267   [(set_attr "type" "neon_load2_2reg<q>")]
8270 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8271   [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
8272         (unspec:VSTRUCT_2DX [
8273           (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
8274           UNSPEC_LD2_DREG))]
8275   "TARGET_SIMD"
8276   "ld1\\t{%S0.1d - %T0.1d}, %1"
8277   [(set_attr "type" "neon_load1_2reg<q>")]
8280 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8281   [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
8282         (unspec:VSTRUCT_3DNX [
8283           (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
8284           UNSPEC_LD3_DREG))]
8285   "TARGET_SIMD"
8286   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8287   [(set_attr "type" "neon_load3_3reg<q>")]
8290 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8291   [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
8292         (unspec:VSTRUCT_3DX [
8293           (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
8294           UNSPEC_LD3_DREG))]
8295   "TARGET_SIMD"
8296   "ld1\\t{%S0.1d - %U0.1d}, %1"
8297   [(set_attr "type" "neon_load1_3reg<q>")]
8300 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8301   [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
8302         (unspec:VSTRUCT_4DNX [
8303           (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
8304           UNSPEC_LD4_DREG))]
8305   "TARGET_SIMD"
8306   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8307   [(set_attr "type" "neon_load4_4reg<q>")]
8310 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8311   [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
8312         (unspec:VSTRUCT_4DX [
8313           (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
8314           UNSPEC_LD4_DREG))]
8315   "TARGET_SIMD"
8316   "ld1\\t{%S0.1d - %V0.1d}, %1"
8317   [(set_attr "type" "neon_load1_4reg<q>")]
8320 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8321  [(match_operand:VSTRUCT_D 0 "register_operand")
8322   (match_operand:DI 1 "register_operand")]
8323   "TARGET_SIMD"
8325   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8326   emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
8327   DONE;
8330 (define_expand "aarch64_ld1<VALL_F16:mode>"
8331  [(match_operand:VALL_F16 0 "register_operand")
8332   (match_operand:DI 1 "register_operand")]
8333   "TARGET_SIMD"
8335   machine_mode mode = <VALL_F16:MODE>mode;
8336   rtx mem = gen_rtx_MEM (mode, operands[1]);
8338   if (BYTES_BIG_ENDIAN)
8339     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
8340   else
8341     emit_move_insn (operands[0], mem);
8342   DONE;
8345 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8346  [(match_operand:VSTRUCT_Q 0 "register_operand")
8347   (match_operand:DI 1 "register_operand")]
8348   "TARGET_SIMD"
8350   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8351   emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
8352   DONE;
8355 (define_expand "aarch64_ld1x2<vstruct_elt>"
8356  [(match_operand:VSTRUCT_2QD 0 "register_operand")
8357   (match_operand:DI 1 "register_operand")]
8358   "TARGET_SIMD"
8360   machine_mode mode = <MODE>mode;
8361   rtx mem = gen_rtx_MEM (mode, operands[1]);
8363   emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
8364   DONE;
8367 (define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
8368   [(match_operand:VSTRUCT_QD 0 "register_operand")
8369         (match_operand:DI 1 "register_operand")
8370         (match_operand:VSTRUCT_QD 2 "register_operand")
8371         (match_operand:SI 3 "immediate_operand")]
8372   "TARGET_SIMD"
8374   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8375   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8377   aarch64_simd_lane_bounds (operands[3], 0,
8378                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8379   emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
8380                                 mem, operands[2], operands[3]));
8381   DONE;
8384 ;; Permuted-store expanders for neon intrinsics.
8386 ;; Permute instructions
8388 ;; vec_perm support
8390 (define_expand "vec_perm<mode>"
8391   [(match_operand:VB 0 "register_operand")
8392    (match_operand:VB 1 "register_operand")
8393    (match_operand:VB 2 "register_operand")
8394    (match_operand:VB 3 "register_operand")]
8395   "TARGET_SIMD"
8397   aarch64_expand_vec_perm (operands[0], operands[1],
8398                            operands[2], operands[3], <nunits>);
8399   DONE;
8402 (define_insn "aarch64_qtbl1<mode>"
8403   [(set (match_operand:VB 0 "register_operand" "=w")
8404         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
8405                     (match_operand:VB 2 "register_operand" "w")]
8406                    UNSPEC_TBL))]
8407   "TARGET_SIMD"
8408   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
8409   [(set_attr "type" "neon_tbl1<q>")]
8412 (define_insn "aarch64_qtbx1<mode>"
8413   [(set (match_operand:VB 0 "register_operand" "=w")
8414         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8415                     (match_operand:V16QI 2 "register_operand" "w")
8416                     (match_operand:VB 3 "register_operand" "w")]
8417                    UNSPEC_TBX))]
8418   "TARGET_SIMD"
8419   "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
8420   [(set_attr "type" "neon_tbl1<q>")]
8423 ;; Two source registers.
8425 (define_insn "aarch64_qtbl2<mode>"
8426   [(set (match_operand:VB 0 "register_operand" "=w")
8427         (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
8428                       (match_operand:VB 2 "register_operand" "w")]
8429                       UNSPEC_TBL))]
8430   "TARGET_SIMD"
8431   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
8432   [(set_attr "type" "neon_tbl2")]
8435 (define_insn "aarch64_qtbx2<mode>"
8436   [(set (match_operand:VB 0 "register_operand" "=w")
8437         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8438                       (match_operand:V2x16QI 2 "register_operand" "w")
8439                       (match_operand:VB 3 "register_operand" "w")]
8440                       UNSPEC_TBX))]
8441   "TARGET_SIMD"
8442   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
8443   [(set_attr "type" "neon_tbl2")]
8446 ;; Three source registers.
8448 (define_insn "aarch64_qtbl3<mode>"
8449   [(set (match_operand:VB 0 "register_operand" "=w")
8450         (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
8451                       (match_operand:VB 2 "register_operand" "w")]
8452                       UNSPEC_TBL))]
8453   "TARGET_SIMD"
8454   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
8455   [(set_attr "type" "neon_tbl3")]
8458 (define_insn "aarch64_qtbx3<mode>"
8459   [(set (match_operand:VB 0 "register_operand" "=w")
8460         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8461                       (match_operand:V3x16QI 2 "register_operand" "w")
8462                       (match_operand:VB 3 "register_operand" "w")]
8463                       UNSPEC_TBX))]
8464   "TARGET_SIMD"
8465   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
8466   [(set_attr "type" "neon_tbl3")]
8469 ;; Four source registers.
8471 (define_insn "aarch64_qtbl4<mode>"
8472   [(set (match_operand:VB 0 "register_operand" "=w")
8473         (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
8474                       (match_operand:VB 2 "register_operand" "w")]
8475                       UNSPEC_TBL))]
8476   "TARGET_SIMD"
8477   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
8478   [(set_attr "type" "neon_tbl4")]
8481 (define_insn "aarch64_qtbx4<mode>"
8482   [(set (match_operand:VB 0 "register_operand" "=w")
8483         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8484                       (match_operand:V4x16QI 2 "register_operand" "w")
8485                       (match_operand:VB 3 "register_operand" "w")]
8486                       UNSPEC_TBX))]
8487   "TARGET_SIMD"
8488   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
8489   [(set_attr "type" "neon_tbl4")]
8492 (define_insn_and_split "aarch64_combinev16qi"
8493   [(set (match_operand:V2x16QI 0 "register_operand" "=w")
8494         (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
8495                          (match_operand:V16QI 2 "register_operand" "w")]
8496                         UNSPEC_CONCAT))]
8497   "TARGET_SIMD"
8498   "#"
8499   "&& reload_completed"
8500   [(const_int 0)]
8502   aarch64_split_combinev16qi (operands);
8503   DONE;
8505 [(set_attr "type" "multiple")]
8508 ;; This instruction's pattern is generated directly by
8509 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8510 ;; need corresponding changes there.
8511 (define_insn "aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
8512   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8513         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8514                           (match_operand:VALL_F16 2 "register_operand" "w")]
8515          PERMUTE))]
8516   "TARGET_SIMD"
8517   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8518   [(set_attr "type" "neon_permute<q>")]
8521 ;; ZIP1 ignores the contents of the upper halves of the registers,
8522 ;; so we can describe 128-bit operations in terms of 64-bit inputs.
8523 (define_insn "aarch64_zip1<mode>_low"
8524   [(set (match_operand:VQ 0 "register_operand" "=w")
8525         (unspec:VQ [(match_operand:<VHALF> 1 "register_operand" "w")
8526                     (match_operand:<VHALF> 2 "register_operand" "w")]
8527                    UNSPEC_ZIP1))]
8528   "TARGET_SIMD"
8529   "zip1\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8530   [(set_attr "type" "neon_permute_q")]
8533 ;; This instruction's pattern is generated directly by
8534 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8535 ;; need corresponding changes there.  Note that the immediate (third)
8536 ;; operand is a lane index not a byte index.
8537 (define_insn "aarch64_ext<mode>"
8538   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8539         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8540                           (match_operand:VALL_F16 2 "register_operand" "w")
8541                           (match_operand:SI 3 "immediate_operand" "i")]
8542          UNSPEC_EXT))]
8543   "TARGET_SIMD"
8545   operands[3] = GEN_INT (INTVAL (operands[3])
8546       * GET_MODE_UNIT_SIZE (<MODE>mode));
8547   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
8549   [(set_attr "type" "neon_ext<q>")]
8552 ;; This instruction's pattern is generated directly by
8553 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8554 ;; need corresponding changes there.
8555 (define_insn "aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
8556   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8557         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
8558                     REVERSE))]
8559   "TARGET_SIMD"
8560   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
8561   [(set_attr "type" "neon_rev<q>")]
8564 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8565   [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
8566         (unspec:VSTRUCT_2DNX [
8567                 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
8568                 UNSPEC_ST2))]
8569   "TARGET_SIMD"
8570   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8571   [(set_attr "type" "neon_store2_2reg")]
8574 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8575   [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
8576         (unspec:VSTRUCT_2DX [
8577                 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
8578                 UNSPEC_ST2))]
8579   "TARGET_SIMD"
8580   "st1\\t{%S1.1d - %T1.1d}, %0"
8581   [(set_attr "type" "neon_store1_2reg")]
8584 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8585   [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
8586         (unspec:VSTRUCT_3DNX [
8587                 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
8588                 UNSPEC_ST3))]
8589   "TARGET_SIMD"
8590   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8591   [(set_attr "type" "neon_store3_3reg")]
8594 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8595   [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
8596         (unspec:VSTRUCT_3DX [
8597                 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
8598                 UNSPEC_ST3))]
8599   "TARGET_SIMD"
8600   "st1\\t{%S1.1d - %U1.1d}, %0"
8601   [(set_attr "type" "neon_store1_3reg")]
8604 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8605   [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
8606         (unspec:VSTRUCT_4DNX [
8607                 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
8608                 UNSPEC_ST4))]
8609   "TARGET_SIMD"
8610   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8611   [(set_attr "type" "neon_store4_4reg")]
8614 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8615   [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
8616         (unspec:VSTRUCT_4DX [
8617                 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
8618                 UNSPEC_ST4))]
8619   "TARGET_SIMD"
8620   "st1\\t{%S1.1d - %V1.1d}, %0"
8621   [(set_attr "type" "neon_store1_4reg")]
8624 (define_expand "aarch64_st<nregs><vstruct_elt>"
8625  [(match_operand:DI 0 "register_operand")
8626   (match_operand:VSTRUCT_D 1 "register_operand")]
8627   "TARGET_SIMD"
8629   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8630   emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
8631   DONE;
8634 (define_expand "aarch64_st<nregs><vstruct_elt>"
8635  [(match_operand:DI 0 "register_operand")
8636   (match_operand:VSTRUCT_Q 1 "register_operand")]
8637   "TARGET_SIMD"
8639   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8640   emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
8641   DONE;
8644 (define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
8645  [(match_operand:DI 0 "register_operand")
8646   (match_operand:VSTRUCT_QD 1 "register_operand")
8647   (match_operand:SI 2 "immediate_operand")]
8648   "TARGET_SIMD"
8650   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
8651   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8653   aarch64_simd_lane_bounds (operands[2], 0,
8654                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8655   emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
8656                                         operands[1], operands[2]));
8657   DONE;
8660 (define_expand "aarch64_st1<VALL_F16:mode>"
8661  [(match_operand:DI 0 "register_operand")
8662   (match_operand:VALL_F16 1 "register_operand")]
8663   "TARGET_SIMD"
8665   machine_mode mode = <VALL_F16:MODE>mode;
8666   rtx mem = gen_rtx_MEM (mode, operands[0]);
8668   if (BYTES_BIG_ENDIAN)
8669     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
8670   else
8671     emit_move_insn (mem, operands[1]);
8672   DONE;
8675 ;; Standard pattern name vec_init<mode><Vel>.
8677 (define_expand "vec_init<mode><Vel>"
8678   [(match_operand:VALL_F16 0 "register_operand")
8679    (match_operand 1 "" "")]
8680   "TARGET_SIMD"
8682   aarch64_expand_vector_init (operands[0], operands[1]);
8683   DONE;
8686 (define_expand "vec_init<mode><Vhalf>"
8687   [(match_operand:VQ_NO2E 0 "register_operand")
8688    (match_operand 1 "" "")]
8689   "TARGET_SIMD"
8691   aarch64_expand_vector_init (operands[0], operands[1]);
8692   DONE;
8695 (define_insn "*aarch64_simd_ld1r<mode>"
8696   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8697         (vec_duplicate:VALL_F16
8698           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
8699   "TARGET_SIMD"
8700   "ld1r\\t{%0.<Vtype>}, %1"
8701   [(set_attr "type" "neon_load1_all_lanes")]
8704 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
8705   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
8706         (unspec:VSTRUCT_2QD [
8707             (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
8708             UNSPEC_LD1))]
8709   "TARGET_SIMD"
8710   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8711   [(set_attr "type" "neon_load1_2reg<q>")]
8715 (define_insn "@aarch64_frecpe<mode>"
8716   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8717         (unspec:VHSDF_HSDF
8718          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8719          UNSPEC_FRECPE))]
8720   "TARGET_SIMD"
8721   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8722   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8725 (define_insn "aarch64_frecpx<mode>"
8726   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8727         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8728          UNSPEC_FRECPX))]
8729   "TARGET_SIMD"
8730   "frecpx\t%<s>0, %<s>1"
8731   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8734 (define_insn "@aarch64_frecps<mode>"
8735   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8736         (unspec:VHSDF_HSDF
8737           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8738           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8739           UNSPEC_FRECPS))]
8740   "TARGET_SIMD"
8741   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8742   [(set_attr "type" "neon_fp_recps_<stype><q>")]
8745 (define_insn "aarch64_urecpe<mode>"
8746   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8747         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8748                 UNSPEC_URECPE))]
8749  "TARGET_SIMD"
8750  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8751   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8753 ;; Standard pattern name vec_extract<mode><Vel>.
8755 (define_expand "vec_extract<mode><Vel>"
8756   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8757    (match_operand:VALL_F16 1 "register_operand")
8758    (match_operand:SI 2 "immediate_operand")]
8759   "TARGET_SIMD"
8761     emit_insn
8762       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8763     DONE;
8766 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8767 (define_expand "vec_extract<mode><Vhalf>"
8768   [(match_operand:<VHALF> 0 "register_operand")
8769    (match_operand:VQMOV_NO2E 1 "register_operand")
8770    (match_operand 2 "immediate_operand")]
8771   "TARGET_SIMD"
8773   int start = INTVAL (operands[2]);
8774   gcc_assert (start == 0 || start == 1);
8775   start *= <nunits> / 2;
8776   rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8777   emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8778   DONE;
8781 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8782 (define_expand "vec_extract<mode><V1half>"
8783   [(match_operand:<V1HALF> 0 "register_operand")
8784    (match_operand:VQ_2E 1 "register_operand")
8785    (match_operand 2 "immediate_operand")]
8786   "TARGET_SIMD"
8788   /* V1DI and V1DF are rarely used by other patterns, so it should be better
8789      to hide it in a subreg destination of a normal DI or DF op.  */
8790   rtx scalar0 = gen_lowpart (<VHALF>mode, operands[0]);
8791   emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands[1], operands[2]));
8792   DONE;
8795 ;; aes
8797 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8798   [(set (match_operand:V16QI 0 "register_operand" "=w")
8799         (unspec:V16QI
8800                 [(xor:V16QI
8801                  (match_operand:V16QI 1 "register_operand" "%0")
8802                  (match_operand:V16QI 2 "register_operand" "w"))]
8803          CRYPTO_AES))]
8804   "TARGET_AES"
8805   "aes<aes_op>\\t%0.16b, %2.16b"
8806   [(set_attr "type" "crypto_aese")]
8809 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8810   [(set (match_operand:V16QI 0 "register_operand" "=w")
8811         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8812          CRYPTO_AESMC))]
8813   "TARGET_AES"
8814   "aes<aesmc_op>\\t%0.16b, %1.16b"
8815   [(set_attr "type" "crypto_aesmc")]
8818 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8819 ;; and enforce the register dependency without scheduling or register
8820 ;; allocation messing up the order or introducing moves inbetween.
8821 ;;  Mash the two together during combine.
8823 (define_insn "*aarch64_crypto_aese_fused"
8824   [(set (match_operand:V16QI 0 "register_operand" "=w")
8825         (unspec:V16QI
8826           [(unspec:V16QI
8827            [(xor:V16QI
8828                 (match_operand:V16QI 1 "register_operand" "%0")
8829                 (match_operand:V16QI 2 "register_operand" "w"))]
8830              UNSPEC_AESE)]
8831         UNSPEC_AESMC))]
8832   "TARGET_AES
8833    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8834   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8835   [(set_attr "type" "crypto_aese")
8836    (set_attr "length" "8")]
8839 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8840 ;; and enforce the register dependency without scheduling or register
8841 ;; allocation messing up the order or introducing moves inbetween.
8842 ;;  Mash the two together during combine.
8844 (define_insn "*aarch64_crypto_aesd_fused"
8845   [(set (match_operand:V16QI 0 "register_operand" "=w")
8846         (unspec:V16QI
8847           [(unspec:V16QI
8848                     [(xor:V16QI
8849                         (match_operand:V16QI 1 "register_operand" "%0")
8850                         (match_operand:V16QI 2 "register_operand" "w"))]
8851                 UNSPEC_AESD)]
8852           UNSPEC_AESIMC))]
8853   "TARGET_AES
8854    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8855   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8856   [(set_attr "type" "crypto_aese")
8857    (set_attr "length" "8")]
8860 ;; sha1
8862 (define_insn "aarch64_crypto_sha1hsi"
8863   [(set (match_operand:SI 0 "register_operand" "=w")
8864         (unspec:SI [(match_operand:SI 1
8865                        "register_operand" "w")]
8866          UNSPEC_SHA1H))]
8867   "TARGET_SHA2"
8868   "sha1h\\t%s0, %s1"
8869   [(set_attr "type" "crypto_sha1_fast")]
8872 (define_insn "aarch64_crypto_sha1hv4si"
8873   [(set (match_operand:SI 0 "register_operand" "=w")
8874         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8875                      (parallel [(const_int 0)]))]
8876          UNSPEC_SHA1H))]
8877   "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8878   "sha1h\\t%s0, %s1"
8879   [(set_attr "type" "crypto_sha1_fast")]
8882 (define_insn "aarch64_be_crypto_sha1hv4si"
8883   [(set (match_operand:SI 0 "register_operand" "=w")
8884         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8885                      (parallel [(const_int 3)]))]
8886          UNSPEC_SHA1H))]
8887   "TARGET_SHA2 && BYTES_BIG_ENDIAN"
8888   "sha1h\\t%s0, %s1"
8889   [(set_attr "type" "crypto_sha1_fast")]
8892 (define_insn "aarch64_crypto_sha1su1v4si"
8893   [(set (match_operand:V4SI 0 "register_operand" "=w")
8894         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8895                       (match_operand:V4SI 2 "register_operand" "w")]
8896          UNSPEC_SHA1SU1))]
8897   "TARGET_SHA2"
8898   "sha1su1\\t%0.4s, %2.4s"
8899   [(set_attr "type" "crypto_sha1_fast")]
8902 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
8903   [(set (match_operand:V4SI 0 "register_operand" "=w")
8904         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8905                       (match_operand:SI 2 "register_operand" "w")
8906                       (match_operand:V4SI 3 "register_operand" "w")]
8907          CRYPTO_SHA1))]
8908   "TARGET_SHA2"
8909   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
8910   [(set_attr "type" "crypto_sha1_slow")]
8913 (define_insn "aarch64_crypto_sha1su0v4si"
8914   [(set (match_operand:V4SI 0 "register_operand" "=w")
8915         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8916                       (match_operand:V4SI 2 "register_operand" "w")
8917                       (match_operand:V4SI 3 "register_operand" "w")]
8918          UNSPEC_SHA1SU0))]
8919   "TARGET_SHA2"
8920   "sha1su0\\t%0.4s, %2.4s, %3.4s"
8921   [(set_attr "type" "crypto_sha1_xor")]
8924 ;; sha256
8926 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
8927   [(set (match_operand:V4SI 0 "register_operand" "=w")
8928         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8929                       (match_operand:V4SI 2 "register_operand" "w")
8930                       (match_operand:V4SI 3 "register_operand" "w")]
8931          CRYPTO_SHA256))]
8932   "TARGET_SHA2"
8933   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
8934   [(set_attr "type" "crypto_sha256_slow")]
8937 (define_insn "aarch64_crypto_sha256su0v4si"
8938   [(set (match_operand:V4SI 0 "register_operand" "=w")
8939         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8940                       (match_operand:V4SI 2 "register_operand" "w")]
8941          UNSPEC_SHA256SU0))]
8942   "TARGET_SHA2"
8943   "sha256su0\\t%0.4s, %2.4s"
8944   [(set_attr "type" "crypto_sha256_fast")]
8947 (define_insn "aarch64_crypto_sha256su1v4si"
8948   [(set (match_operand:V4SI 0 "register_operand" "=w")
8949         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8950                       (match_operand:V4SI 2 "register_operand" "w")
8951                       (match_operand:V4SI 3 "register_operand" "w")]
8952          UNSPEC_SHA256SU1))]
8953   "TARGET_SHA2"
8954   "sha256su1\\t%0.4s, %2.4s, %3.4s"
8955   [(set_attr "type" "crypto_sha256_slow")]
8958 ;; sha512
8960 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
8961   [(set (match_operand:V2DI 0 "register_operand" "=w")
8962         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8963                       (match_operand:V2DI 2 "register_operand" "w")
8964                       (match_operand:V2DI 3 "register_operand" "w")]
8965          CRYPTO_SHA512))]
8966   "TARGET_SHA3"
8967   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
8968   [(set_attr "type" "crypto_sha512")]
8971 (define_insn "aarch64_crypto_sha512su0qv2di"
8972   [(set (match_operand:V2DI 0 "register_operand" "=w")
8973         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8974                       (match_operand:V2DI 2 "register_operand" "w")]
8975          UNSPEC_SHA512SU0))]
8976   "TARGET_SHA3"
8977   "sha512su0\\t%0.2d, %2.2d"
8978   [(set_attr "type" "crypto_sha512")]
8981 (define_insn "aarch64_crypto_sha512su1qv2di"
8982   [(set (match_operand:V2DI 0 "register_operand" "=w")
8983         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8984                       (match_operand:V2DI 2 "register_operand" "w")
8985                       (match_operand:V2DI 3 "register_operand" "w")]
8986          UNSPEC_SHA512SU1))]
8987   "TARGET_SHA3"
8988   "sha512su1\\t%0.2d, %2.2d, %3.2d"
8989   [(set_attr "type" "crypto_sha512")]
8992 ;; sha3
8994 (define_insn "eor3q<mode>4"
8995   [(set (match_operand:VQ_I 0 "register_operand" "=w")
8996         (xor:VQ_I
8997          (xor:VQ_I
8998           (match_operand:VQ_I 2 "register_operand" "w")
8999           (match_operand:VQ_I 3 "register_operand" "w"))
9000          (match_operand:VQ_I 1 "register_operand" "w")))]
9001   "TARGET_SHA3"
9002   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
9003   [(set_attr "type" "crypto_sha3")]
9006 (define_insn "aarch64_rax1qv2di"
9007   [(set (match_operand:V2DI 0 "register_operand" "=w")
9008         (xor:V2DI
9009          (rotate:V2DI
9010           (match_operand:V2DI 2 "register_operand" "w")
9011           (const_int 1))
9012          (match_operand:V2DI 1 "register_operand" "w")))]
9013   "TARGET_SHA3"
9014   "rax1\\t%0.2d, %1.2d, %2.2d"
9015   [(set_attr "type" "crypto_sha3")]
9018 (define_insn "aarch64_xarqv2di"
9019   [(set (match_operand:V2DI 0 "register_operand" "=w")
9020         (rotatert:V2DI
9021          (xor:V2DI
9022           (match_operand:V2DI 1 "register_operand" "%w")
9023           (match_operand:V2DI 2 "register_operand" "w"))
9024          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
9025   "TARGET_SHA3"
9026   "xar\\t%0.2d, %1.2d, %2.2d, %3"
9027   [(set_attr "type" "crypto_sha3")]
9030 (define_insn "bcaxq<mode>4"
9031   [(set (match_operand:VQ_I 0 "register_operand" "=w")
9032         (xor:VQ_I
9033          (and:VQ_I
9034           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
9035           (match_operand:VQ_I 2 "register_operand" "w"))
9036          (match_operand:VQ_I 1 "register_operand" "w")))]
9037   "TARGET_SHA3"
9038   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
9039   [(set_attr "type" "crypto_sha3")]
9042 ;; SM3
9044 (define_insn "aarch64_sm3ss1qv4si"
9045   [(set (match_operand:V4SI 0 "register_operand" "=w")
9046         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9047                       (match_operand:V4SI 2 "register_operand" "w")
9048                       (match_operand:V4SI 3 "register_operand" "w")]
9049          UNSPEC_SM3SS1))]
9050   "TARGET_SM4"
9051   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
9052   [(set_attr "type" "crypto_sm3")]
9056 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
9057   [(set (match_operand:V4SI 0 "register_operand" "=w")
9058         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9059                       (match_operand:V4SI 2 "register_operand" "w")
9060                       (match_operand:V4SI 3 "register_operand" "w")
9061                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
9062          CRYPTO_SM3TT))]
9063   "TARGET_SM4"
9064   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
9065   [(set_attr "type" "crypto_sm3")]
9068 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
9069   [(set (match_operand:V4SI 0 "register_operand" "=w")
9070         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9071                       (match_operand:V4SI 2 "register_operand" "w")
9072                       (match_operand:V4SI 3 "register_operand" "w")]
9073          CRYPTO_SM3PART))]
9074   "TARGET_SM4"
9075   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
9076   [(set_attr "type" "crypto_sm3")]
9079 ;; SM4
9081 (define_insn "aarch64_sm4eqv4si"
9082   [(set (match_operand:V4SI 0 "register_operand" "=w")
9083         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9084                       (match_operand:V4SI 2 "register_operand" "w")]
9085          UNSPEC_SM4E))]
9086   "TARGET_SM4"
9087   "sm4e\\t%0.4s, %2.4s"
9088   [(set_attr "type" "crypto_sm4")]
9091 (define_insn "aarch64_sm4ekeyqv4si"
9092   [(set (match_operand:V4SI 0 "register_operand" "=w")
9093         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9094                       (match_operand:V4SI 2 "register_operand" "w")]
9095          UNSPEC_SM4EKEY))]
9096   "TARGET_SM4"
9097   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
9098   [(set_attr "type" "crypto_sm4")]
9101 ;; fp16fml
9103 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
9104   [(set (match_operand:VDQSF 0 "register_operand")
9105         (unspec:VDQSF
9106          [(match_operand:VDQSF 1 "register_operand")
9107           (match_operand:<VFMLA_W> 2 "register_operand")
9108           (match_operand:<VFMLA_W> 3 "register_operand")]
9109          VFMLA16_LOW))]
9110   "TARGET_F16FML"
9112   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9113                                             <nunits> * 2, false);
9114   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9115                                             <nunits> * 2, false);
9117   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
9118                                                                 operands[1],
9119                                                                 operands[2],
9120                                                                 operands[3],
9121                                                                 p1, p2));
9122   DONE;
9126 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
9127   [(set (match_operand:VDQSF 0 "register_operand")
9128         (unspec:VDQSF
9129          [(match_operand:VDQSF 1 "register_operand")
9130           (match_operand:<VFMLA_W> 2 "register_operand")
9131           (match_operand:<VFMLA_W> 3 "register_operand")]
9132          VFMLA16_HIGH))]
9133   "TARGET_F16FML"
9135   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9136   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9138   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
9139                                                                  operands[1],
9140                                                                  operands[2],
9141                                                                  operands[3],
9142                                                                  p1, p2));
9143   DONE;
9146 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
9147   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9148         (fma:VDQSF
9149          (float_extend:VDQSF
9150           (vec_select:<VFMLA_SEL_W>
9151            (match_operand:<VFMLA_W> 2 "register_operand" "w")
9152            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
9153          (float_extend:VDQSF
9154           (vec_select:<VFMLA_SEL_W>
9155            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9156            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9157          (match_operand:VDQSF 1 "register_operand" "0")))]
9158   "TARGET_F16FML"
9159   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9160   [(set_attr "type" "neon_fp_mul_s")]
9163 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
9164   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9165         (fma:VDQSF
9166          (float_extend:VDQSF
9167           (neg:<VFMLA_SEL_W>
9168            (vec_select:<VFMLA_SEL_W>
9169             (match_operand:<VFMLA_W> 2 "register_operand" "w")
9170             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
9171          (float_extend:VDQSF
9172           (vec_select:<VFMLA_SEL_W>
9173            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9174            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9175          (match_operand:VDQSF 1 "register_operand" "0")))]
9176   "TARGET_F16FML"
9177   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9178   [(set_attr "type" "neon_fp_mul_s")]
9181 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
9182   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9183         (fma:VDQSF
9184          (float_extend:VDQSF
9185           (vec_select:<VFMLA_SEL_W>
9186            (match_operand:<VFMLA_W> 2 "register_operand" "w")
9187            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
9188          (float_extend:VDQSF
9189           (vec_select:<VFMLA_SEL_W>
9190            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9191            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9192          (match_operand:VDQSF 1 "register_operand" "0")))]
9193   "TARGET_F16FML"
9194   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9195   [(set_attr "type" "neon_fp_mul_s")]
9198 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
9199   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9200         (fma:VDQSF
9201          (float_extend:VDQSF
9202           (neg:<VFMLA_SEL_W>
9203            (vec_select:<VFMLA_SEL_W>
9204             (match_operand:<VFMLA_W> 2 "register_operand" "w")
9205             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
9206          (float_extend:VDQSF
9207           (vec_select:<VFMLA_SEL_W>
9208            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9209            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9210          (match_operand:VDQSF 1 "register_operand" "0")))]
9211   "TARGET_F16FML"
9212   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9213   [(set_attr "type" "neon_fp_mul_s")]
9216 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
9217   [(set (match_operand:V2SF 0 "register_operand")
9218         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9219                            (match_operand:V4HF 2 "register_operand")
9220                            (match_operand:V4HF 3 "register_operand")
9221                            (match_operand:SI 4 "aarch64_imm2")]
9222          VFMLA16_LOW))]
9223   "TARGET_F16FML"
9225     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9226     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9228     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
9229                                                             operands[1],
9230                                                             operands[2],
9231                                                             operands[3],
9232                                                             p1, lane));
9233     DONE;
9237 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
9238   [(set (match_operand:V2SF 0 "register_operand")
9239         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9240                            (match_operand:V4HF 2 "register_operand")
9241                            (match_operand:V4HF 3 "register_operand")
9242                            (match_operand:SI 4 "aarch64_imm2")]
9243          VFMLA16_HIGH))]
9244   "TARGET_F16FML"
9246     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9247     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9249     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
9250                                                              operands[1],
9251                                                              operands[2],
9252                                                              operands[3],
9253                                                              p1, lane));
9254     DONE;
9257 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
9258   [(set (match_operand:V2SF 0 "register_operand" "=w")
9259         (fma:V2SF
9260          (float_extend:V2SF
9261            (vec_select:V2HF
9262             (match_operand:V4HF 2 "register_operand" "w")
9263             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9264          (float_extend:V2SF
9265            (vec_duplicate:V2HF
9266             (vec_select:HF
9267              (match_operand:V4HF 3 "register_operand" "x")
9268              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9269          (match_operand:V2SF 1 "register_operand" "0")))]
9270   "TARGET_F16FML"
9271   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9272   [(set_attr "type" "neon_fp_mul_s")]
9275 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
9276   [(set (match_operand:V2SF 0 "register_operand" "=w")
9277         (fma:V2SF
9278          (float_extend:V2SF
9279           (neg:V2HF
9280            (vec_select:V2HF
9281             (match_operand:V4HF 2 "register_operand" "w")
9282             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9283          (float_extend:V2SF
9284           (vec_duplicate:V2HF
9285            (vec_select:HF
9286             (match_operand:V4HF 3 "register_operand" "x")
9287             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9288          (match_operand:V2SF 1 "register_operand" "0")))]
9289   "TARGET_F16FML"
9290   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9291   [(set_attr "type" "neon_fp_mul_s")]
9294 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
9295   [(set (match_operand:V2SF 0 "register_operand" "=w")
9296         (fma:V2SF
9297          (float_extend:V2SF
9298            (vec_select:V2HF
9299             (match_operand:V4HF 2 "register_operand" "w")
9300             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9301          (float_extend:V2SF
9302            (vec_duplicate:V2HF
9303             (vec_select:HF
9304              (match_operand:V4HF 3 "register_operand" "x")
9305              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9306          (match_operand:V2SF 1 "register_operand" "0")))]
9307   "TARGET_F16FML"
9308   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9309   [(set_attr "type" "neon_fp_mul_s")]
9312 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
9313   [(set (match_operand:V2SF 0 "register_operand" "=w")
9314         (fma:V2SF
9315          (float_extend:V2SF
9316            (neg:V2HF
9317             (vec_select:V2HF
9318              (match_operand:V4HF 2 "register_operand" "w")
9319              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9320          (float_extend:V2SF
9321            (vec_duplicate:V2HF
9322             (vec_select:HF
9323              (match_operand:V4HF 3 "register_operand" "x")
9324              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9325          (match_operand:V2SF 1 "register_operand" "0")))]
9326   "TARGET_F16FML"
9327   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9328   [(set_attr "type" "neon_fp_mul_s")]
9331 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
9332   [(set (match_operand:V4SF 0 "register_operand")
9333         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9334                            (match_operand:V8HF 2 "register_operand")
9335                            (match_operand:V8HF 3 "register_operand")
9336                            (match_operand:SI 4 "aarch64_lane_imm3")]
9337          VFMLA16_LOW))]
9338   "TARGET_F16FML"
9340     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9341     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9343     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
9344                                                               operands[1],
9345                                                               operands[2],
9346                                                               operands[3],
9347                                                               p1, lane));
9348     DONE;
9351 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
9352   [(set (match_operand:V4SF 0 "register_operand")
9353         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9354                            (match_operand:V8HF 2 "register_operand")
9355                            (match_operand:V8HF 3 "register_operand")
9356                            (match_operand:SI 4 "aarch64_lane_imm3")]
9357          VFMLA16_HIGH))]
9358   "TARGET_F16FML"
9360     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9361     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9363     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
9364                                                                operands[1],
9365                                                                operands[2],
9366                                                                operands[3],
9367                                                                p1, lane));
9368     DONE;
9371 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
9372   [(set (match_operand:V4SF 0 "register_operand" "=w")
9373         (fma:V4SF
9374          (float_extend:V4SF
9375           (vec_select:V4HF
9376             (match_operand:V8HF 2 "register_operand" "w")
9377             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9378          (float_extend:V4SF
9379           (vec_duplicate:V4HF
9380            (vec_select:HF
9381             (match_operand:V8HF 3 "register_operand" "x")
9382             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9383          (match_operand:V4SF 1 "register_operand" "0")))]
9384   "TARGET_F16FML"
9385   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9386   [(set_attr "type" "neon_fp_mul_s")]
9389 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
9390   [(set (match_operand:V4SF 0 "register_operand" "=w")
9391         (fma:V4SF
9392           (float_extend:V4SF
9393            (neg:V4HF
9394             (vec_select:V4HF
9395              (match_operand:V8HF 2 "register_operand" "w")
9396              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9397          (float_extend:V4SF
9398           (vec_duplicate:V4HF
9399            (vec_select:HF
9400             (match_operand:V8HF 3 "register_operand" "x")
9401             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9402          (match_operand:V4SF 1 "register_operand" "0")))]
9403   "TARGET_F16FML"
9404   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9405   [(set_attr "type" "neon_fp_mul_s")]
9408 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
9409   [(set (match_operand:V4SF 0 "register_operand" "=w")
9410         (fma:V4SF
9411          (float_extend:V4SF
9412           (vec_select:V4HF
9413             (match_operand:V8HF 2 "register_operand" "w")
9414             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9415          (float_extend:V4SF
9416           (vec_duplicate:V4HF
9417            (vec_select:HF
9418             (match_operand:V8HF 3 "register_operand" "x")
9419             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9420          (match_operand:V4SF 1 "register_operand" "0")))]
9421   "TARGET_F16FML"
9422   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9423   [(set_attr "type" "neon_fp_mul_s")]
9426 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
9427   [(set (match_operand:V4SF 0 "register_operand" "=w")
9428         (fma:V4SF
9429          (float_extend:V4SF
9430           (neg:V4HF
9431            (vec_select:V4HF
9432             (match_operand:V8HF 2 "register_operand" "w")
9433             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9434          (float_extend:V4SF
9435           (vec_duplicate:V4HF
9436            (vec_select:HF
9437             (match_operand:V8HF 3 "register_operand" "x")
9438             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9439          (match_operand:V4SF 1 "register_operand" "0")))]
9440   "TARGET_F16FML"
9441   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9442   [(set_attr "type" "neon_fp_mul_s")]
9445 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
9446   [(set (match_operand:V2SF 0 "register_operand")
9447         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9448                       (match_operand:V4HF 2 "register_operand")
9449                       (match_operand:V8HF 3 "register_operand")
9450                       (match_operand:SI 4 "aarch64_lane_imm3")]
9451          VFMLA16_LOW))]
9452   "TARGET_F16FML"
9454     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9455     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9457     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
9458                                                              operands[1],
9459                                                              operands[2],
9460                                                              operands[3],
9461                                                              p1, lane));
9462     DONE;
9466 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
9467   [(set (match_operand:V2SF 0 "register_operand")
9468         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9469                       (match_operand:V4HF 2 "register_operand")
9470                       (match_operand:V8HF 3 "register_operand")
9471                       (match_operand:SI 4 "aarch64_lane_imm3")]
9472          VFMLA16_HIGH))]
9473   "TARGET_F16FML"
9475     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9476     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9478     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
9479                                                               operands[1],
9480                                                               operands[2],
9481                                                               operands[3],
9482                                                               p1, lane));
9483     DONE;
9487 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
9488   [(set (match_operand:V2SF 0 "register_operand" "=w")
9489         (fma:V2SF
9490          (float_extend:V2SF
9491            (vec_select:V2HF
9492             (match_operand:V4HF 2 "register_operand" "w")
9493             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9494          (float_extend:V2SF
9495           (vec_duplicate:V2HF
9496            (vec_select:HF
9497             (match_operand:V8HF 3 "register_operand" "x")
9498             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9499          (match_operand:V2SF 1 "register_operand" "0")))]
9500   "TARGET_F16FML"
9501   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9502   [(set_attr "type" "neon_fp_mul_s")]
9505 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
9506   [(set (match_operand:V2SF 0 "register_operand" "=w")
9507         (fma:V2SF
9508          (float_extend:V2SF
9509           (neg:V2HF
9510            (vec_select:V2HF
9511             (match_operand:V4HF 2 "register_operand" "w")
9512             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9513          (float_extend:V2SF
9514           (vec_duplicate:V2HF
9515            (vec_select:HF
9516             (match_operand:V8HF 3 "register_operand" "x")
9517             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9518          (match_operand:V2SF 1 "register_operand" "0")))]
9519   "TARGET_F16FML"
9520   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9521   [(set_attr "type" "neon_fp_mul_s")]
9524 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
9525   [(set (match_operand:V2SF 0 "register_operand" "=w")
9526         (fma:V2SF
9527          (float_extend:V2SF
9528            (vec_select:V2HF
9529             (match_operand:V4HF 2 "register_operand" "w")
9530             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9531          (float_extend:V2SF
9532           (vec_duplicate:V2HF
9533            (vec_select:HF
9534             (match_operand:V8HF 3 "register_operand" "x")
9535             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9536          (match_operand:V2SF 1 "register_operand" "0")))]
9537   "TARGET_F16FML"
9538   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9539   [(set_attr "type" "neon_fp_mul_s")]
9542 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
9543   [(set (match_operand:V2SF 0 "register_operand" "=w")
9544         (fma:V2SF
9545          (float_extend:V2SF
9546           (neg:V2HF
9547            (vec_select:V2HF
9548             (match_operand:V4HF 2 "register_operand" "w")
9549             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9550          (float_extend:V2SF
9551           (vec_duplicate:V2HF
9552            (vec_select:HF
9553             (match_operand:V8HF 3 "register_operand" "x")
9554             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9555          (match_operand:V2SF 1 "register_operand" "0")))]
9556   "TARGET_F16FML"
9557   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9558   [(set_attr "type" "neon_fp_mul_s")]
9561 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
9562   [(set (match_operand:V4SF 0 "register_operand")
9563         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9564                       (match_operand:V8HF 2 "register_operand")
9565                       (match_operand:V4HF 3 "register_operand")
9566                       (match_operand:SI 4 "aarch64_imm2")]
9567          VFMLA16_LOW))]
9568   "TARGET_F16FML"
9570     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9571     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9573     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
9574                                                              operands[1],
9575                                                              operands[2],
9576                                                              operands[3],
9577                                                              p1, lane));
9578     DONE;
9581 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
9582   [(set (match_operand:V4SF 0 "register_operand")
9583         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9584                       (match_operand:V8HF 2 "register_operand")
9585                       (match_operand:V4HF 3 "register_operand")
9586                       (match_operand:SI 4 "aarch64_imm2")]
9587          VFMLA16_HIGH))]
9588   "TARGET_F16FML"
9590     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9591     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9593     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
9594                                                               operands[1],
9595                                                               operands[2],
9596                                                               operands[3],
9597                                                               p1, lane));
9598     DONE;
9601 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
9602   [(set (match_operand:V4SF 0 "register_operand" "=w")
9603         (fma:V4SF
9604          (float_extend:V4SF
9605           (vec_select:V4HF
9606            (match_operand:V8HF 2 "register_operand" "w")
9607            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9608          (float_extend:V4SF
9609           (vec_duplicate:V4HF
9610            (vec_select:HF
9611             (match_operand:V4HF 3 "register_operand" "x")
9612             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9613          (match_operand:V4SF 1 "register_operand" "0")))]
9614   "TARGET_F16FML"
9615   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9616   [(set_attr "type" "neon_fp_mul_s")]
9619 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
9620   [(set (match_operand:V4SF 0 "register_operand" "=w")
9621         (fma:V4SF
9622          (float_extend:V4SF
9623           (neg:V4HF
9624            (vec_select:V4HF
9625             (match_operand:V8HF 2 "register_operand" "w")
9626             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9627          (float_extend:V4SF
9628           (vec_duplicate:V4HF
9629            (vec_select:HF
9630             (match_operand:V4HF 3 "register_operand" "x")
9631             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9632          (match_operand:V4SF 1 "register_operand" "0")))]
9633   "TARGET_F16FML"
9634   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9635   [(set_attr "type" "neon_fp_mul_s")]
9638 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
9639   [(set (match_operand:V4SF 0 "register_operand" "=w")
9640         (fma:V4SF
9641          (float_extend:V4SF
9642           (vec_select:V4HF
9643            (match_operand:V8HF 2 "register_operand" "w")
9644            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9645          (float_extend:V4SF
9646           (vec_duplicate:V4HF
9647            (vec_select:HF
9648             (match_operand:V4HF 3 "register_operand" "x")
9649             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9650          (match_operand:V4SF 1 "register_operand" "0")))]
9651   "TARGET_F16FML"
9652   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9653   [(set_attr "type" "neon_fp_mul_s")]
9656 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
9657   [(set (match_operand:V4SF 0 "register_operand" "=w")
9658         (fma:V4SF
9659          (float_extend:V4SF
9660           (neg:V4HF
9661            (vec_select:V4HF
9662             (match_operand:V8HF 2 "register_operand" "w")
9663             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9664          (float_extend:V4SF
9665           (vec_duplicate:V4HF
9666            (vec_select:HF
9667             (match_operand:V4HF 3 "register_operand" "x")
9668             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9669          (match_operand:V4SF 1 "register_operand" "0")))]
9670   "TARGET_F16FML"
9671   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9672   [(set_attr "type" "neon_fp_mul_s")]
9675 ;; pmull
9677 (define_insn "aarch64_crypto_pmulldi"
9678   [(set (match_operand:TI 0 "register_operand" "=w")
9679         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
9680                      (match_operand:DI 2 "register_operand" "w")]
9681                     UNSPEC_PMULL))]
9682  "TARGET_AES"
9683  "pmull\\t%0.1q, %1.1d, %2.1d"
9684   [(set_attr "type" "crypto_pmull")]
9687 (define_insn "aarch64_crypto_pmullv2di"
9688  [(set (match_operand:TI 0 "register_operand" "=w")
9689        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
9690                    (match_operand:V2DI 2 "register_operand" "w")]
9691                   UNSPEC_PMULL2))]
9692   "TARGET_AES"
9693   "pmull2\\t%0.1q, %1.2d, %2.2d"
9694   [(set_attr "type" "crypto_pmull")]
9697 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
9698 (define_insn_and_split "<optab><Vnarrowq><mode>2"
9699   [(set (match_operand:VQN 0 "register_operand" "=w")
9700         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9701   "TARGET_SIMD"
9702   "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
9703   "&& <CODE> == ZERO_EXTEND
9704    && aarch64_split_simd_shift_p (insn)"
9705   [(const_int 0)]
9706   {
9707     /* On many cores, it is cheaper to implement UXTL using a ZIP1 with zero,
9708        provided that the cost of the zero can be amortized over several
9709        operations.  We'll later recombine the zero and zip if there are
9710        not sufficient uses of the zero to make the split worthwhile.  */
9711     rtx res = simplify_gen_subreg (<VNARROWQ2>mode, operands[0],
9712                                    <MODE>mode, 0);
9713     rtx zero = aarch64_gen_shareable_zero (<VNARROWQ>mode);
9714     emit_insn (gen_aarch64_zip1<Vnarrowq2>_low (res, operands[1], zero));
9715     DONE;
9716   }
9717   [(set_attr "type" "neon_shift_imm_long")]
9720 (define_expand "aarch64_<su>xtl<mode>"
9721   [(set (match_operand:VQN 0 "register_operand" "=w")
9722         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9723   "TARGET_SIMD"
9724   ""
9727 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9728 (define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
9729   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9730         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9731   "TARGET_SIMD"
9732   "xtn\t%0.<Vntype>, %1.<Vtype>"
9733   [(set_attr "type" "neon_move_narrow_q")]
9736 ;; Expander for the intrinsics that only takes one mode unlike the two-mode
9737 ;; trunc optab.
9738 (define_expand "aarch64_xtn<mode>"
9739   [(set (match_operand:<VNARROWQ> 0 "register_operand")
9740        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
9741   "TARGET_SIMD"
9742   {}
9745 (define_insn "aarch64_bfdot<mode>"
9746   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9747         (plus:VDQSF
9748           (unspec:VDQSF
9749            [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9750             (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9751             UNSPEC_BFDOT)
9752           (match_operand:VDQSF 1 "register_operand" "0")))]
9753   "TARGET_BF16_SIMD"
9754   "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9755   [(set_attr "type" "neon_dot<q>")]
9758 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9759   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9760         (plus:VDQSF
9761           (unspec:VDQSF
9762            [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9763             (match_operand:VBF 3 "register_operand" "w")
9764             (match_operand:SI 4 "const_int_operand" "n")]
9765             UNSPEC_BFDOT)
9766           (match_operand:VDQSF 1 "register_operand" "0")))]
9767   "TARGET_BF16_SIMD"
9769   int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9770   int lane = INTVAL (operands[4]);
9771   operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9772   return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9774   [(set_attr "type" "neon_dot<VDQSF:q>")]
9777 ;; vget_low/high_bf16
9778 (define_expand "aarch64_vget_lo_halfv8bf"
9779   [(match_operand:V4BF 0 "register_operand")
9780    (match_operand:V8BF 1 "register_operand")]
9781   "TARGET_BF16_SIMD"
9783   rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
9784   emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9785   DONE;
9788 (define_expand "aarch64_vget_hi_halfv8bf"
9789   [(match_operand:V4BF 0 "register_operand")
9790    (match_operand:V8BF 1 "register_operand")]
9791   "TARGET_BF16_SIMD"
9793   rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
9794   emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9795   DONE;
9798 ;; bfmmla
9799 (define_insn "aarch64_bfmmlaqv4sf"
9800   [(set (match_operand:V4SF 0 "register_operand" "=w")
9801         (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9802                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9803                                  (match_operand:V8BF 3 "register_operand" "w")]
9804                     UNSPEC_BFMMLA)))]
9805   "TARGET_BF16_SIMD"
9806   "bfmmla\\t%0.4s, %2.8h, %3.8h"
9807   [(set_attr "type" "neon_fp_mla_s_q")]
9810 ;; bfmlal<bt>
9811 (define_insn "aarch64_bfmlal<bt>v4sf"
9812   [(set (match_operand:V4SF 0 "register_operand" "=w")
9813         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9814                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9815                                   (match_operand:V8BF 3 "register_operand" "w")]
9816                      BF_MLA)))]
9817   "TARGET_BF16_SIMD"
9818   "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9819   [(set_attr "type" "neon_fp_mla_s_q")]
9822 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9823   [(set (match_operand:V4SF 0 "register_operand" "=w")
9824         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9825                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9826                                   (match_operand:VBF 3 "register_operand" "x")
9827                                   (match_operand:SI 4 "const_int_operand" "n")]
9828                      BF_MLA)))]
9829   "TARGET_BF16_SIMD"
9831   operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9832   return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9834   [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9837 ;; 8-bit integer matrix multiply-accumulate
9838 (define_insn "aarch64_simd_<sur>mmlav16qi"
9839   [(set (match_operand:V4SI 0 "register_operand" "=w")
9840         (plus:V4SI
9841          (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9842                        (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9843          (match_operand:V4SI 1 "register_operand" "0")))]
9844   "TARGET_I8MM"
9845   "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9846   [(set_attr "type" "neon_mla_s_q")]
9849 ;; bfcvtn
9850 (define_insn "aarch64_bfcvtn<q><mode>"
9851   [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9852         (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9853                             UNSPEC_BFCVTN))]
9854   "TARGET_BF16_SIMD"
9855   "bfcvtn\\t%0.4h, %1.4s"
9856   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9859 (define_insn "aarch64_bfcvtn2v8bf"
9860   [(set (match_operand:V8BF 0 "register_operand" "=w")
9861         (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9862                       (match_operand:V4SF 2 "register_operand" "w")]
9863                       UNSPEC_BFCVTN2))]
9864   "TARGET_BF16_SIMD"
9865   "bfcvtn2\\t%0.8h, %2.4s"
9866   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9869 (define_insn "aarch64_bfcvtbf"
9870   [(set (match_operand:BF 0 "register_operand" "=w")
9871         (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9872                     UNSPEC_BFCVT))]
9873   "TARGET_BF16_FP"
9874   "bfcvt\\t%h0, %s1"
9875   [(set_attr "type" "f_cvt")]
9878 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
9879 (define_insn "aarch64_vbfcvt<mode>"
9880   [(set (match_operand:V4SF 0 "register_operand" "=w")
9881         (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
9882                       UNSPEC_BFCVTN))]
9883   "TARGET_BF16_SIMD"
9884   "shll\\t%0.4s, %1.4h, #16"
9885   [(set_attr "type" "neon_shift_imm_long")]
9888 (define_insn "aarch64_vbfcvt_highv8bf"
9889   [(set (match_operand:V4SF 0 "register_operand" "=w")
9890         (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
9891                       UNSPEC_BFCVTN2))]
9892   "TARGET_BF16_SIMD"
9893   "shll2\\t%0.4s, %1.8h, #16"
9894   [(set_attr "type" "neon_shift_imm_long")]
9897 (define_insn "aarch64_bfcvtsf"
9898   [(set (match_operand:SF 0 "register_operand" "=w")
9899         (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
9900                     UNSPEC_BFCVT))]
9901   "TARGET_BF16_FP"
9902   "shl\\t%d0, %d1, #16"
9903   [(set_attr "type" "neon_shift_imm")]