[AArch64] Use intrinsics for widening multiplies (PR91598)
[official-gcc.git] / gcc / config / aarch64 / aarch64-simd.md
blob24a11fb50403423c63a36797d796fdc78326acda
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3.  If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22   [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
23         (match_operand:VALL_F16MOV 1 "general_operand"))]
24   "TARGET_SIMD"
25   "
26   /* Force the operand into a register if it is not an
27      immediate whose use can be replaced with xzr.
28      If the mode is 16 bytes wide, then we will be doing
29      a stp in DI mode, so we check the validity of that.
30      If the mode is 8 bytes wide, then we will do doing a
31      normal str, so the check need not apply.  */
32   if (GET_CODE (operands[0]) == MEM
33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35                 && aarch64_mem_pair_operand (operands[0], DImode))
36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37       operands[1] = force_reg (<MODE>mode, operands[1]);
38   "
41 (define_expand "movmisalign<mode>"
42   [(set (match_operand:VALL 0 "nonimmediate_operand")
43         (match_operand:VALL 1 "general_operand"))]
44   "TARGET_SIMD && !STRICT_ALIGNMENT"
46   /* This pattern is not permitted to fail during expansion: if both arguments
47      are non-registers (e.g. memory := constant, which can be created by the
48      auto-vectorizer), force operand 1 into a register.  */
49   if (!register_operand (operands[0], <MODE>mode)
50       && !register_operand (operands[1], <MODE>mode))
51     operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56         (vec_duplicate:VDQ_I
57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58   "TARGET_SIMD"
59   "@
60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
61    dup\\t%0.<Vtype>, %<vw>1"
62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67         (vec_duplicate:VDQF_F16
68           (match_operand:<VEL> 1 "register_operand" "w")))]
69   "TARGET_SIMD"
70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71   [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76         (vec_duplicate:VALL_F16
77           (vec_select:<VEL>
78             (match_operand:VALL_F16 1 "register_operand" "w")
79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80           )))]
81   "TARGET_SIMD"
82   {
83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85   }
86   [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91         (vec_duplicate:VALL_F16_NO_V2Q
92           (vec_select:<VEL>
93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95           )))]
96   "TARGET_SIMD"
97   {
98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100   }
101   [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
105   [(set (match_operand:VDMOV 0 "nonimmediate_operand"
106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
107         (match_operand:VDMOV 1 "general_operand"
108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
109   "TARGET_SIMD
110    && (register_operand (operands[0], <MODE>mode)
111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113    switch (which_alternative)
114      {
115      case 0: return "ldr\t%d0, %1";
116      case 1: return "str\txzr, %0";
117      case 2: return "str\t%d1, %0";
118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119      case 4: return "umov\t%0, %1.d[0]";
120      case 5: return "fmov\t%d0, %1";
121      case 6: return "mov\t%0, %1";
122      case 7:
123         return aarch64_output_simd_mov_immediate (operands[1], 64);
124      default: gcc_unreachable ();
125      }
127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
129                      mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
133   [(set (match_operand:VQMOV 0 "nonimmediate_operand"
134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
135         (match_operand:VQMOV 1 "general_operand"
136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
137   "TARGET_SIMD
138    && (register_operand (operands[0], <MODE>mode)
139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141   switch (which_alternative)
142     {
143     case 0:
144         return "ldr\t%q0, %1";
145     case 1:
146         return "stp\txzr, xzr, %0";
147     case 2:
148         return "str\t%q1, %0";
149     case 3:
150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151     case 4:
152     case 5:
153     case 6:
154         return "#";
155     case 7:
156         return aarch64_output_simd_mov_immediate (operands[1], 128);
157     default:
158         gcc_unreachable ();
159     }
161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162                      neon_logic<q>, multiple, multiple,\
163                      multiple, neon_move<q>")
164    (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
170 (define_insn "aarch64_store_lane0<mode>"
171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
174   "TARGET_SIMD
175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176   "str\\t%<Vetype>1, %0"
177   [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181   [(set (match_operand:DREG 0 "register_operand" "=w")
182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183    (set (match_operand:DREG2 2 "register_operand" "=w")
184         (match_operand:DREG2 3 "memory_operand" "m"))]
185   "TARGET_SIMD
186    && rtx_equal_p (XEXP (operands[3], 0),
187                    plus_constant (Pmode,
188                                   XEXP (operands[1], 0),
189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
190   "ldp\\t%d0, %d2, %1"
191   [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196         (match_operand:DREG 1 "register_operand" "w"))
197    (set (match_operand:DREG2 2 "memory_operand" "=m")
198         (match_operand:DREG2 3 "register_operand" "w"))]
199   "TARGET_SIMD
200    && rtx_equal_p (XEXP (operands[2], 0),
201                    plus_constant (Pmode,
202                                   XEXP (operands[0], 0),
203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
204   "stp\\t%d1, %d3, %0"
205   [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209   [(set (match_operand:VQ 0 "register_operand" "=w")
210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211    (set (match_operand:VQ2 2 "register_operand" "=w")
212         (match_operand:VQ2 3 "memory_operand" "m"))]
213   "TARGET_SIMD
214     && rtx_equal_p (XEXP (operands[3], 0),
215                     plus_constant (Pmode,
216                                XEXP (operands[1], 0),
217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
218   "ldp\\t%q0, %q2, %1"
219   [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224         (match_operand:VQ 1 "register_operand" "w"))
225    (set (match_operand:VQ2 2 "memory_operand" "=m")
226         (match_operand:VQ2 3 "register_operand" "w"))]
227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228                 plus_constant (Pmode,
229                                XEXP (operands[0], 0),
230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
231   "stp\\t%q1, %q3, %0"
232   [(set_attr "type" "neon_stp_q")]
236 (define_split
237   [(set (match_operand:VQMOV 0 "register_operand" "")
238       (match_operand:VQMOV 1 "register_operand" ""))]
239   "TARGET_SIMD && reload_completed
240    && GP_REGNUM_P (REGNO (operands[0]))
241    && GP_REGNUM_P (REGNO (operands[1]))"
242   [(const_int 0)]
244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245   DONE;
248 (define_split
249   [(set (match_operand:VQMOV 0 "register_operand" "")
250         (match_operand:VQMOV 1 "register_operand" ""))]
251   "TARGET_SIMD && reload_completed
252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254   [(const_int 0)]
256   aarch64_split_simd_move (operands[0], operands[1]);
257   DONE;
260 (define_expand "@aarch64_split_simd_mov<mode>"
261   [(set (match_operand:VQMOV 0)
262         (match_operand:VQMOV 1))]
263   "TARGET_SIMD"
264   {
265     rtx dst = operands[0];
266     rtx src = operands[1];
268     if (GP_REGNUM_P (REGNO (src)))
269       {
270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
273         emit_insn
274           (gen_move_lo_quad_<mode> (dst, src_low_part));
275         emit_insn
276           (gen_move_hi_quad_<mode> (dst, src_high_part));
277       }
279     else
280       {
281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285         emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
286         emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
287       }
288     DONE;
289   }
292 (define_expand "aarch64_get_half<mode>"
293   [(set (match_operand:<VHALF> 0 "register_operand")
294         (vec_select:<VHALF>
295           (match_operand:VQMOV 1 "register_operand")
296           (match_operand 2 "ascending_int_parallel")))]
297   "TARGET_SIMD"
300 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
301   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
302         (vec_select:<VHALF>
303           (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
304           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
305   "TARGET_SIMD"
306   "@
307    #
308    umov\t%0, %1.d[0]"
309   "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
310   [(set (match_dup 0) (match_dup 1))]
311   {
312     operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
313   }
314   [(set_attr "type" "mov_reg,neon_to_gp<q>")
315    (set_attr "length" "4")]
318 (define_insn "aarch64_simd_mov_from_<mode>high"
319   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
320         (vec_select:<VHALF>
321           (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
322           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
323   "TARGET_SIMD"
324   "@
325    dup\\t%d0, %1.d[1]
326    umov\t%0, %1.d[1]"
327   [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
328    (set_attr "length" "4")]
331 (define_insn "orn<mode>3"
332  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
333        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
334                 (match_operand:VDQ_I 2 "register_operand" "w")))]
335  "TARGET_SIMD"
336  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
337   [(set_attr "type" "neon_logic<q>")]
340 (define_insn "bic<mode>3"
341  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
342        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
343                 (match_operand:VDQ_I 2 "register_operand" "w")))]
344  "TARGET_SIMD"
345  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
346   [(set_attr "type" "neon_logic<q>")]
349 (define_insn "add<mode>3"
350   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
351         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
352                   (match_operand:VDQ_I 2 "register_operand" "w")))]
353   "TARGET_SIMD"
354   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
355   [(set_attr "type" "neon_add<q>")]
358 (define_insn "sub<mode>3"
359   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
360         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
361                    (match_operand:VDQ_I 2 "register_operand" "w")))]
362   "TARGET_SIMD"
363   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
364   [(set_attr "type" "neon_sub<q>")]
367 (define_insn "mul<mode>3"
368   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
369         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
370                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
371   "TARGET_SIMD"
372   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
373   [(set_attr "type" "neon_mul_<Vetype><q>")]
376 (define_insn "bswap<mode>2"
377   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
378         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
379   "TARGET_SIMD"
380   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
381   [(set_attr "type" "neon_rev<q>")]
384 (define_insn "aarch64_rbit<mode>"
385   [(set (match_operand:VB 0 "register_operand" "=w")
386         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
387                    UNSPEC_RBIT))]
388   "TARGET_SIMD"
389   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
390   [(set_attr "type" "neon_rbit")]
393 (define_expand "ctz<mode>2"
394   [(set (match_operand:VS 0 "register_operand")
395         (ctz:VS (match_operand:VS 1 "register_operand")))]
396   "TARGET_SIMD"
397   {
398      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
399      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
400                                              <MODE>mode, 0);
401      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
402      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
403      DONE;
404   }
407 (define_expand "xorsign<mode>3"
408   [(match_operand:VHSDF 0 "register_operand")
409    (match_operand:VHSDF 1 "register_operand")
410    (match_operand:VHSDF 2 "register_operand")]
411   "TARGET_SIMD"
414   machine_mode imode = <V_INT_EQUIV>mode;
415   rtx v_bitmask = gen_reg_rtx (imode);
416   rtx op1x = gen_reg_rtx (imode);
417   rtx op2x = gen_reg_rtx (imode);
419   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
420   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
422   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
424   emit_move_insn (v_bitmask,
425                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
426                                                      HOST_WIDE_INT_M1U << bits));
428   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
429   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
430   emit_move_insn (operands[0],
431                   lowpart_subreg (<MODE>mode, op1x, imode));
432   DONE;
436 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
437 ;; fact that their usage need to guarantee that the source vectors are
438 ;; contiguous.  It would be wrong to describe the operation without being able
439 ;; to describe the permute that is also required, but even if that is done
440 ;; the permute would have been created as a LOAD_LANES which means the values
441 ;; in the registers are in the wrong order.
442 (define_insn "aarch64_fcadd<rot><mode>"
443   [(set (match_operand:VHSDF 0 "register_operand" "=w")
444         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
445                        (match_operand:VHSDF 2 "register_operand" "w")]
446                        FCADD))]
447   "TARGET_COMPLEX"
448   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
449   [(set_attr "type" "neon_fcadd")]
452 (define_insn "aarch64_fcmla<rot><mode>"
453   [(set (match_operand:VHSDF 0 "register_operand" "=w")
454         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
455                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
456                                    (match_operand:VHSDF 3 "register_operand" "w")]
457                                    FCMLA)))]
458   "TARGET_COMPLEX"
459   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
460   [(set_attr "type" "neon_fcmla")]
464 (define_insn "aarch64_fcmla_lane<rot><mode>"
465   [(set (match_operand:VHSDF 0 "register_operand" "=w")
466         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
467                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
468                                    (match_operand:VHSDF 3 "register_operand" "w")
469                                    (match_operand:SI 4 "const_int_operand" "n")]
470                                    FCMLA)))]
471   "TARGET_COMPLEX"
473   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
474   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
476   [(set_attr "type" "neon_fcmla")]
479 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
480   [(set (match_operand:V4HF 0 "register_operand" "=w")
481         (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
482                    (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
483                                  (match_operand:V8HF 3 "register_operand" "w")
484                                  (match_operand:SI 4 "const_int_operand" "n")]
485                                  FCMLA)))]
486   "TARGET_COMPLEX"
488   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
489   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
491   [(set_attr "type" "neon_fcmla")]
494 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
495   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
496         (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
497                      (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
498                                      (match_operand:<VHALF> 3 "register_operand" "w")
499                                      (match_operand:SI 4 "const_int_operand" "n")]
500                                      FCMLA)))]
501   "TARGET_COMPLEX"
503   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
504   operands[4]
505     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
506   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
508   [(set_attr "type" "neon_fcmla")]
511 ;; These instructions map to the __builtins for the Dot Product operations.
512 (define_insn "aarch64_<sur>dot<vsi2qi>"
513   [(set (match_operand:VS 0 "register_operand" "=w")
514         (plus:VS (match_operand:VS 1 "register_operand" "0")
515                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
516                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
517                 DOTPROD)))]
518   "TARGET_DOTPROD"
519   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
520   [(set_attr "type" "neon_dot<q>")]
523 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot
524 ;; (vector) Dot Product operation.
525 (define_insn "aarch64_usdot<vsi2qi>"
526   [(set (match_operand:VS 0 "register_operand" "=w")
527         (plus:VS
528           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
529                       (match_operand:<VSI2QI> 3 "register_operand" "w")]
530           UNSPEC_USDOT)
531           (match_operand:VS 1 "register_operand" "0")))]
532   "TARGET_I8MM"
533   "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
534   [(set_attr "type" "neon_dot<q>")]
537 ;; These expands map to the Dot Product optab the vectorizer checks for.
538 ;; The auto-vectorizer expects a dot product builtin that also does an
539 ;; accumulation into the provided register.
540 ;; Given the following pattern
542 ;; for (i=0; i<len; i++) {
543 ;;     c = a[i] * b[i];
544 ;;     r += c;
545 ;; }
546 ;; return result;
548 ;; This can be auto-vectorized to
549 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
551 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
552 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
553 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
554 ;; ...
556 ;; and so the vectorizer provides r, in which the result has to be accumulated.
557 (define_expand "<sur>dot_prod<vsi2qi>"
558   [(set (match_operand:VS 0 "register_operand")
559         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
560                             (match_operand:<VSI2QI> 2 "register_operand")]
561                  DOTPROD)
562                 (match_operand:VS 3 "register_operand")))]
563   "TARGET_DOTPROD"
565   emit_insn (
566     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
567                                     operands[2]));
568   emit_insn (gen_rtx_SET (operands[0], operands[3]));
569   DONE;
572 ;; These instructions map to the __builtins for the Dot Product
573 ;; indexed operations.
574 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
575   [(set (match_operand:VS 0 "register_operand" "=w")
576         (plus:VS (match_operand:VS 1 "register_operand" "0")
577                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
578                             (match_operand:V8QI 3 "register_operand" "<h_con>")
579                             (match_operand:SI 4 "immediate_operand" "i")]
580                 DOTPROD)))]
581   "TARGET_DOTPROD"
582   {
583     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
584     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
585   }
586   [(set_attr "type" "neon_dot<q>")]
589 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
590   [(set (match_operand:VS 0 "register_operand" "=w")
591         (plus:VS (match_operand:VS 1 "register_operand" "0")
592                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
593                             (match_operand:V16QI 3 "register_operand" "<h_con>")
594                             (match_operand:SI 4 "immediate_operand" "i")]
595                 DOTPROD)))]
596   "TARGET_DOTPROD"
597   {
598     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
599     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
600   }
601   [(set_attr "type" "neon_dot<q>")]
604 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
605 ;; (by element) Dot Product operations.
606 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
607   [(set (match_operand:VS 0 "register_operand" "=w")
608         (plus:VS
609           (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
610                       (match_operand:VB 3 "register_operand" "w")
611                       (match_operand:SI 4 "immediate_operand" "i")]
612           DOTPROD_I8MM)
613           (match_operand:VS 1 "register_operand" "0")))]
614   "TARGET_I8MM"
615   {
616     int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
617     int lane = INTVAL (operands[4]);
618     operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
619     return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
620   }
621   [(set_attr "type" "neon_dot<VS:q>")]
624 (define_expand "copysign<mode>3"
625   [(match_operand:VHSDF 0 "register_operand")
626    (match_operand:VHSDF 1 "register_operand")
627    (match_operand:VHSDF 2 "register_operand")]
628   "TARGET_FLOAT && TARGET_SIMD"
630   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
631   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
633   emit_move_insn (v_bitmask,
634                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
635                                                      HOST_WIDE_INT_M1U << bits));
636   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
637                                          operands[2], operands[1]));
638   DONE;
642 (define_insn "*aarch64_mul3_elt<mode>"
643  [(set (match_operand:VMUL 0 "register_operand" "=w")
644     (mult:VMUL
645       (vec_duplicate:VMUL
646           (vec_select:<VEL>
647             (match_operand:VMUL 1 "register_operand" "<h_con>")
648             (parallel [(match_operand:SI 2 "immediate_operand")])))
649       (match_operand:VMUL 3 "register_operand" "w")))]
650   "TARGET_SIMD"
651   {
652     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
653     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
654   }
655   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
658 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
659   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
660      (mult:VMUL_CHANGE_NLANES
661        (vec_duplicate:VMUL_CHANGE_NLANES
662           (vec_select:<VEL>
663             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
664             (parallel [(match_operand:SI 2 "immediate_operand")])))
665       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
666   "TARGET_SIMD"
667   {
668     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
669     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
670   }
671   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
674 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
675  [(set (match_operand:VMUL 0 "register_operand" "=w")
676     (mult:VMUL
677       (vec_duplicate:VMUL
678             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
679       (match_operand:VMUL 2 "register_operand" "w")))]
680   "TARGET_SIMD"
681   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
682   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
685 (define_insn "@aarch64_rsqrte<mode>"
686   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
687         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
688                      UNSPEC_RSQRTE))]
689   "TARGET_SIMD"
690   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
691   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
693 (define_insn "@aarch64_rsqrts<mode>"
694   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
695         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
696                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
697          UNSPEC_RSQRTS))]
698   "TARGET_SIMD"
699   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
700   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
702 (define_expand "rsqrt<mode>2"
703   [(set (match_operand:VALLF 0 "register_operand")
704         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
705                      UNSPEC_RSQRT))]
706   "TARGET_SIMD"
708   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
709   DONE;
712 (define_insn "*aarch64_mul3_elt_to_64v2df"
713   [(set (match_operand:DF 0 "register_operand" "=w")
714      (mult:DF
715        (vec_select:DF
716          (match_operand:V2DF 1 "register_operand" "w")
717          (parallel [(match_operand:SI 2 "immediate_operand")]))
718        (match_operand:DF 3 "register_operand" "w")))]
719   "TARGET_SIMD"
720   {
721     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
722     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
723   }
724   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
727 (define_insn "neg<mode>2"
728   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
729         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
730   "TARGET_SIMD"
731   "neg\t%0.<Vtype>, %1.<Vtype>"
732   [(set_attr "type" "neon_neg<q>")]
735 (define_insn "abs<mode>2"
736   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
737         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
738   "TARGET_SIMD"
739   "abs\t%0.<Vtype>, %1.<Vtype>"
740   [(set_attr "type" "neon_abs<q>")]
743 ;; The intrinsic version of integer ABS must not be allowed to
744 ;; combine with any operation with an integerated ABS step, such
745 ;; as SABD.
746 (define_insn "aarch64_abs<mode>"
747   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
748           (unspec:VSDQ_I_DI
749             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
750            UNSPEC_ABS))]
751   "TARGET_SIMD"
752   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
753   [(set_attr "type" "neon_abs<q>")]
756 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
757 ;; This isn't accurate as ABS treats always its input as a signed value.
758 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
759 ;; Whereas SABD would return 192 (-64 signed) on the above example.
760 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
761 (define_insn "aarch64_<su>abd<mode>_3"
762   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
763         (minus:VDQ_BHSI
764           (USMAX:VDQ_BHSI
765             (match_operand:VDQ_BHSI 1 "register_operand" "w")
766             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
767           (<max_opp>:VDQ_BHSI
768             (match_dup 1)
769             (match_dup 2))))]
770   "TARGET_SIMD"
771   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
772   [(set_attr "type" "neon_abd<q>")]
775 (define_insn "aarch64_<sur>abdl2<mode>_3"
776   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
777         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
778                           (match_operand:VDQV_S 2 "register_operand" "w")]
779         ABDL2))]
780   "TARGET_SIMD"
781   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
782   [(set_attr "type" "neon_abd<q>")]
785 (define_insn "aarch64_<sur>abal<mode>_4"
786   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
787         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
788                           (match_operand:VDQV_S 2 "register_operand" "w")
789                          (match_operand:<VDBLW> 3 "register_operand" "0")]
790         ABAL))]
791   "TARGET_SIMD"
792   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
793   [(set_attr "type" "neon_arith_acc<q>")]
796 (define_insn "aarch64_<sur>adalp<mode>_3"
797   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
798         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
799                           (match_operand:<VDBLW> 2 "register_operand" "0")]
800         ADALP))]
801   "TARGET_SIMD"
802   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
803   [(set_attr "type" "neon_reduc_add<q>")]
806 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
807 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
808 ;; reduction of the difference into a V4SI vector and accumulate that into
809 ;; operand 3 before copying that into the result operand 0.
810 ;; Perform that with a sequence of:
811 ;; UABDL2       tmp.8h, op1.16b, op2.16b
812 ;; UABAL        tmp.8h, op1.16b, op2.16b
813 ;; UADALP       op3.4s, tmp.8h
814 ;; MOV          op0, op3 // should be eliminated in later passes.
816 ;; For TARGET_DOTPROD we do:
817 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
818 ;; UABD tmp2.16b, op1.16b, op2.16b
819 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
820 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
822 ;; The signed version just uses the signed variants of the above instructions
823 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
824 ;; unsigned.
826 (define_expand "<sur>sadv16qi"
827   [(use (match_operand:V4SI 0 "register_operand"))
828    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
829                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
830    (use (match_operand:V4SI 3 "register_operand"))]
831   "TARGET_SIMD"
832   {
833     if (TARGET_DOTPROD)
834       {
835         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
836         rtx abd = gen_reg_rtx (V16QImode);
837         emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
838         emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
839                                           abd, ones));
840         DONE;
841       }
842     rtx reduc = gen_reg_rtx (V8HImode);
843     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
844                                                operands[2]));
845     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
846                                               operands[2], reduc));
847     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
848                                               operands[3]));
849     emit_move_insn (operands[0], operands[3]);
850     DONE;
851   }
854 (define_insn "aba<mode>_3"
855   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
856         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
857                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
858                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
859                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
860   "TARGET_SIMD"
861   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
862   [(set_attr "type" "neon_arith_acc<q>")]
865 (define_insn "fabd<mode>3"
866   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
867         (abs:VHSDF_HSDF
868           (minus:VHSDF_HSDF
869             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
870             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
871   "TARGET_SIMD"
872   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
873   [(set_attr "type" "neon_fp_abd_<stype><q>")]
876 ;; For AND (vector, register) and BIC (vector, immediate)
877 (define_insn "and<mode>3"
878   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
879         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
880                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
881   "TARGET_SIMD"
882   {
883     switch (which_alternative)
884       {
885       case 0:
886         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
887       case 1:
888         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
889                                                   AARCH64_CHECK_BIC);
890       default:
891         gcc_unreachable ();
892       }
893   }
894   [(set_attr "type" "neon_logic<q>")]
897 ;; For ORR (vector, register) and ORR (vector, immediate)
898 (define_insn "ior<mode>3"
899   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
900         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
901                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
902   "TARGET_SIMD"
903   {
904     switch (which_alternative)
905       {
906       case 0:
907         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
908       case 1:
909         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
910                                                   AARCH64_CHECK_ORR);
911       default:
912         gcc_unreachable ();
913       }
914   }
915   [(set_attr "type" "neon_logic<q>")]
918 (define_insn "xor<mode>3"
919   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
920         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
921                  (match_operand:VDQ_I 2 "register_operand" "w")))]
922   "TARGET_SIMD"
923   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
924   [(set_attr "type" "neon_logic<q>")]
927 (define_insn "one_cmpl<mode>2"
928   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
929         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
930   "TARGET_SIMD"
931   "not\t%0.<Vbtype>, %1.<Vbtype>"
932   [(set_attr "type" "neon_logic<q>")]
935 (define_insn "aarch64_simd_vec_set<mode>"
936   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
937         (vec_merge:VALL_F16
938             (vec_duplicate:VALL_F16
939                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
940             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
941             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
942   "TARGET_SIMD"
943   {
944    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
945    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
946    switch (which_alternative)
947      {
948      case 0:
949         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
950      case 1:
951         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
952      case 2:
953         return "ld1\\t{%0.<Vetype>}[%p2], %1";
954      default:
955         gcc_unreachable ();
956      }
957   }
958   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
961 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
962   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
963         (vec_merge:VALL_F16
964             (vec_duplicate:VALL_F16
965               (vec_select:<VEL>
966                 (match_operand:VALL_F16 3 "register_operand" "w")
967                 (parallel
968                   [(match_operand:SI 4 "immediate_operand" "i")])))
969             (match_operand:VALL_F16 1 "register_operand" "0")
970             (match_operand:SI 2 "immediate_operand" "i")))]
971   "TARGET_SIMD"
972   {
973     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
974     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
975     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
977     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
978   }
979   [(set_attr "type" "neon_ins<q>")]
982 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
983   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
984         (vec_merge:VALL_F16_NO_V2Q
985             (vec_duplicate:VALL_F16_NO_V2Q
986               (vec_select:<VEL>
987                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
988                 (parallel
989                   [(match_operand:SI 4 "immediate_operand" "i")])))
990             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
991             (match_operand:SI 2 "immediate_operand" "i")))]
992   "TARGET_SIMD"
993   {
994     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
995     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
996     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
997                                            INTVAL (operands[4]));
999     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1000   }
1001   [(set_attr "type" "neon_ins<q>")]
1004 (define_expand "signbit<mode>2"
1005   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1006    (use (match_operand:VDQSF 1 "register_operand"))]
1007   "TARGET_SIMD"
1009   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1010   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1011                                                         shift_amount);
1012   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1014   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1015                                                  shift_vector));
1016   DONE;
1019 (define_insn "aarch64_simd_lshr<mode>"
1020  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1021        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1022                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1023  "TARGET_SIMD"
1024  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1025   [(set_attr "type" "neon_shift_imm<q>")]
1028 (define_insn "aarch64_simd_ashr<mode>"
1029  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1030        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1031                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1032  "TARGET_SIMD"
1033  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1034   [(set_attr "type" "neon_shift_imm<q>")]
1037 (define_insn "*aarch64_simd_sra<mode>"
1038  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1039         (plus:VDQ_I
1040            (SHIFTRT:VDQ_I
1041                 (match_operand:VDQ_I 1 "register_operand" "w")
1042                 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1043            (match_operand:VDQ_I 3 "register_operand" "0")))]
1044   "TARGET_SIMD"
1045   "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1046   [(set_attr "type" "neon_shift_acc<q>")]
1049 (define_insn "aarch64_simd_imm_shl<mode>"
1050  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1051        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1052                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1053  "TARGET_SIMD"
1054   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1055   [(set_attr "type" "neon_shift_imm<q>")]
1058 (define_insn "aarch64_simd_reg_sshl<mode>"
1059  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1060        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1061                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1062  "TARGET_SIMD"
1063  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1064   [(set_attr "type" "neon_shift_reg<q>")]
1067 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1068  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1069        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1070                     (match_operand:VDQ_I 2 "register_operand" "w")]
1071                    UNSPEC_ASHIFT_UNSIGNED))]
1072  "TARGET_SIMD"
1073  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1074   [(set_attr "type" "neon_shift_reg<q>")]
1077 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1078  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1079        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1080                     (match_operand:VDQ_I 2 "register_operand" "w")]
1081                    UNSPEC_ASHIFT_SIGNED))]
1082  "TARGET_SIMD"
1083  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1084   [(set_attr "type" "neon_shift_reg<q>")]
1087 (define_expand "ashl<mode>3"
1088   [(match_operand:VDQ_I 0 "register_operand")
1089    (match_operand:VDQ_I 1 "register_operand")
1090    (match_operand:SI  2 "general_operand")]
1091  "TARGET_SIMD"
1093   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1094   int shift_amount;
1096   if (CONST_INT_P (operands[2]))
1097     {
1098       shift_amount = INTVAL (operands[2]);
1099       if (shift_amount >= 0 && shift_amount < bit_width)
1100         {
1101           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1102                                                        shift_amount);
1103           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1104                                                      operands[1],
1105                                                      tmp));
1106           DONE;
1107         }
1108       else
1109         {
1110           operands[2] = force_reg (SImode, operands[2]);
1111         }
1112     }
1113   else if (MEM_P (operands[2]))
1114     {
1115       operands[2] = force_reg (SImode, operands[2]);
1116     }
1118   if (REG_P (operands[2]))
1119     {
1120       rtx tmp = gen_reg_rtx (<MODE>mode);
1121       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1122                                              convert_to_mode (<VEL>mode,
1123                                                               operands[2],
1124                                                               0)));
1125       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1126                                                   tmp));
1127       DONE;
1128     }
1129   else
1130     FAIL;
1134 (define_expand "lshr<mode>3"
1135   [(match_operand:VDQ_I 0 "register_operand")
1136    (match_operand:VDQ_I 1 "register_operand")
1137    (match_operand:SI  2 "general_operand")]
1138  "TARGET_SIMD"
1140   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1141   int shift_amount;
1143   if (CONST_INT_P (operands[2]))
1144     {
1145       shift_amount = INTVAL (operands[2]);
1146       if (shift_amount > 0 && shift_amount <= bit_width)
1147         {
1148           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1149                                                        shift_amount);
1150           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1151                                                   operands[1],
1152                                                   tmp));
1153           DONE;
1154         }
1155       else
1156         operands[2] = force_reg (SImode, operands[2]);
1157     }
1158   else if (MEM_P (operands[2]))
1159     {
1160       operands[2] = force_reg (SImode, operands[2]);
1161     }
1163   if (REG_P (operands[2]))
1164     {
1165       rtx tmp = gen_reg_rtx (SImode);
1166       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1167       emit_insn (gen_negsi2 (tmp, operands[2]));
1168       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1169                                              convert_to_mode (<VEL>mode,
1170                                                               tmp, 0)));
1171       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1172                                                           operands[1],
1173                                                           tmp1));
1174       DONE;
1175     }
1176   else
1177     FAIL;
1181 (define_expand "ashr<mode>3"
1182   [(match_operand:VDQ_I 0 "register_operand")
1183    (match_operand:VDQ_I 1 "register_operand")
1184    (match_operand:SI  2 "general_operand")]
1185  "TARGET_SIMD"
1187   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1188   int shift_amount;
1190   if (CONST_INT_P (operands[2]))
1191     {
1192       shift_amount = INTVAL (operands[2]);
1193       if (shift_amount > 0 && shift_amount <= bit_width)
1194         {
1195           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1196                                                        shift_amount);
1197           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1198                                                   operands[1],
1199                                                   tmp));
1200           DONE;
1201         }
1202       else
1203         operands[2] = force_reg (SImode, operands[2]);
1204     }
1205   else if (MEM_P (operands[2]))
1206     {
1207       operands[2] = force_reg (SImode, operands[2]);
1208     }
1210   if (REG_P (operands[2]))
1211     {
1212       rtx tmp = gen_reg_rtx (SImode);
1213       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1214       emit_insn (gen_negsi2 (tmp, operands[2]));
1215       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1216                                              convert_to_mode (<VEL>mode,
1217                                                               tmp, 0)));
1218       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1219                                                         operands[1],
1220                                                         tmp1));
1221       DONE;
1222     }
1223   else
1224     FAIL;
1228 (define_expand "vashl<mode>3"
1229  [(match_operand:VDQ_I 0 "register_operand")
1230   (match_operand:VDQ_I 1 "register_operand")
1231   (match_operand:VDQ_I 2 "register_operand")]
1232  "TARGET_SIMD"
1234   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1235                                               operands[2]));
1236   DONE;
1239 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1240 ;; Negating individual lanes most certainly offsets the
1241 ;; gain from vectorization.
1242 (define_expand "vashr<mode>3"
1243  [(match_operand:VDQ_BHSI 0 "register_operand")
1244   (match_operand:VDQ_BHSI 1 "register_operand")
1245   (match_operand:VDQ_BHSI 2 "register_operand")]
1246  "TARGET_SIMD"
1248   rtx neg = gen_reg_rtx (<MODE>mode);
1249   emit (gen_neg<mode>2 (neg, operands[2]));
1250   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1251                                                     neg));
1252   DONE;
1255 ;; DI vector shift
1256 (define_expand "aarch64_ashr_simddi"
1257   [(match_operand:DI 0 "register_operand")
1258    (match_operand:DI 1 "register_operand")
1259    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1260   "TARGET_SIMD"
1261   {
1262     /* An arithmetic shift right by 64 fills the result with copies of the sign
1263        bit, just like asr by 63 - however the standard pattern does not handle
1264        a shift by 64.  */
1265     if (INTVAL (operands[2]) == 64)
1266       operands[2] = GEN_INT (63);
1267     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1268     DONE;
1269   }
1272 (define_expand "vlshr<mode>3"
1273  [(match_operand:VDQ_BHSI 0 "register_operand")
1274   (match_operand:VDQ_BHSI 1 "register_operand")
1275   (match_operand:VDQ_BHSI 2 "register_operand")]
1276  "TARGET_SIMD"
1278   rtx neg = gen_reg_rtx (<MODE>mode);
1279   emit (gen_neg<mode>2 (neg, operands[2]));
1280   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1281                                                       neg));
1282   DONE;
1285 (define_expand "aarch64_lshr_simddi"
1286   [(match_operand:DI 0 "register_operand")
1287    (match_operand:DI 1 "register_operand")
1288    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1289   "TARGET_SIMD"
1290   {
1291     if (INTVAL (operands[2]) == 64)
1292       emit_move_insn (operands[0], const0_rtx);
1293     else
1294       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1295     DONE;
1296   }
1299 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1300 (define_insn "vec_shr_<mode>"
1301   [(set (match_operand:VD 0 "register_operand" "=w")
1302         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1303                     (match_operand:SI 2 "immediate_operand" "i")]
1304                    UNSPEC_VEC_SHR))]
1305   "TARGET_SIMD"
1306   {
1307     if (BYTES_BIG_ENDIAN)
1308       return "shl %d0, %d1, %2";
1309     else
1310       return "ushr %d0, %d1, %2";
1311   }
1312   [(set_attr "type" "neon_shift_imm")]
1315 (define_expand "vec_set<mode>"
1316   [(match_operand:VALL_F16 0 "register_operand")
1317    (match_operand:<VEL> 1 "register_operand")
1318    (match_operand:SI 2 "immediate_operand")]
1319   "TARGET_SIMD"
1320   {
1321     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1322     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1323                                           GEN_INT (elem), operands[0]));
1324     DONE;
1325   }
1329 (define_insn "aarch64_mla<mode>"
1330  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1331        (plus:VDQ_BHSI (mult:VDQ_BHSI
1332                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1333                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1334                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1335  "TARGET_SIMD"
1336  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1337   [(set_attr "type" "neon_mla_<Vetype><q>")]
1340 (define_insn "*aarch64_mla_elt<mode>"
1341  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1342        (plus:VDQHS
1343          (mult:VDQHS
1344            (vec_duplicate:VDQHS
1345               (vec_select:<VEL>
1346                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1347                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1348            (match_operand:VDQHS 3 "register_operand" "w"))
1349          (match_operand:VDQHS 4 "register_operand" "0")))]
1350  "TARGET_SIMD"
1351   {
1352     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1353     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1354   }
1355   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1358 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1359  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1360        (plus:VDQHS
1361          (mult:VDQHS
1362            (vec_duplicate:VDQHS
1363               (vec_select:<VEL>
1364                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1365                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1366            (match_operand:VDQHS 3 "register_operand" "w"))
1367          (match_operand:VDQHS 4 "register_operand" "0")))]
1368  "TARGET_SIMD"
1369   {
1370     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1371     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1372   }
1373   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1376 (define_insn "*aarch64_mla_elt_merge<mode>"
1377   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1378         (plus:VDQHS
1379           (mult:VDQHS (vec_duplicate:VDQHS
1380                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1381                 (match_operand:VDQHS 2 "register_operand" "w"))
1382           (match_operand:VDQHS 3 "register_operand" "0")))]
1383  "TARGET_SIMD"
1384  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1385   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1388 (define_insn "aarch64_mls<mode>"
1389  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1390        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1391                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1392                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1393  "TARGET_SIMD"
1394  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1395   [(set_attr "type" "neon_mla_<Vetype><q>")]
1398 (define_insn "*aarch64_mls_elt<mode>"
1399  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1400        (minus:VDQHS
1401          (match_operand:VDQHS 4 "register_operand" "0")
1402          (mult:VDQHS
1403            (vec_duplicate:VDQHS
1404               (vec_select:<VEL>
1405                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1406                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1407            (match_operand:VDQHS 3 "register_operand" "w"))))]
1408  "TARGET_SIMD"
1409   {
1410     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1411     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1412   }
1413   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1416 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1417  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1418        (minus:VDQHS
1419          (match_operand:VDQHS 4 "register_operand" "0")
1420          (mult:VDQHS
1421            (vec_duplicate:VDQHS
1422               (vec_select:<VEL>
1423                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1424                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1425            (match_operand:VDQHS 3 "register_operand" "w"))))]
1426  "TARGET_SIMD"
1427   {
1428     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1429     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1430   }
1431   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1434 (define_insn "*aarch64_mls_elt_merge<mode>"
1435   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1436         (minus:VDQHS
1437           (match_operand:VDQHS 1 "register_operand" "0")
1438           (mult:VDQHS (vec_duplicate:VDQHS
1439                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1440                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1441   "TARGET_SIMD"
1442   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1443   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1446 ;; Max/Min operations.
1447 (define_insn "<su><maxmin><mode>3"
1448  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1449        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1450                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1451  "TARGET_SIMD"
1452  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1453   [(set_attr "type" "neon_minmax<q>")]
1456 (define_expand "<su><maxmin>v2di3"
1457  [(set (match_operand:V2DI 0 "register_operand")
1458        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1459                     (match_operand:V2DI 2 "register_operand")))]
1460  "TARGET_SIMD"
1462   enum rtx_code cmp_operator;
1463   rtx cmp_fmt;
1465   switch (<CODE>)
1466     {
1467     case UMIN:
1468       cmp_operator = LTU;
1469       break;
1470     case SMIN:
1471       cmp_operator = LT;
1472       break;
1473     case UMAX:
1474       cmp_operator = GTU;
1475       break;
1476     case SMAX:
1477       cmp_operator = GT;
1478       break;
1479     default:
1480       gcc_unreachable ();
1481     }
1483   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1484   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1485               operands[2], cmp_fmt, operands[1], operands[2]));
1486   DONE;
1489 ;; Pairwise Integer Max/Min operations.
1490 (define_insn "aarch64_<maxmin_uns>p<mode>"
1491  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1492        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1493                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1494                         MAXMINV))]
1495  "TARGET_SIMD"
1496  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1497   [(set_attr "type" "neon_minmax<q>")]
1500 ;; Pairwise FP Max/Min operations.
1501 (define_insn "aarch64_<maxmin_uns>p<mode>"
1502  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1503        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1504                       (match_operand:VHSDF 2 "register_operand" "w")]
1505                       FMAXMINV))]
1506  "TARGET_SIMD"
1507  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1508   [(set_attr "type" "neon_minmax<q>")]
1511 ;; vec_concat gives a new vector with the low elements from operand 1, and
1512 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1513 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1514 ;; What that means, is that the RTL descriptions of the below patterns
1515 ;; need to change depending on endianness.
1517 ;; Move to the low architectural bits of the register.
1518 ;; On little-endian this is { operand, zeroes }
1519 ;; On big-endian this is { zeroes, operand }
1521 (define_insn "move_lo_quad_internal_<mode>"
1522   [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1523         (vec_concat:VQMOV_NO2E
1524           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1525           (vec_duplicate:<VHALF> (const_int 0))))]
1526   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1527   "@
1528    dup\\t%d0, %1.d[0]
1529    fmov\\t%d0, %1
1530    dup\\t%d0, %1"
1531   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1532    (set_attr "length" "4")
1533    (set_attr "arch" "simd,fp,simd")]
1536 (define_insn "move_lo_quad_internal_<mode>"
1537   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1538         (vec_concat:VQ_2E
1539           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1540           (const_int 0)))]
1541   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1542   "@
1543    dup\\t%d0, %1.d[0]
1544    fmov\\t%d0, %1
1545    dup\\t%d0, %1"
1546   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1547    (set_attr "length" "4")
1548    (set_attr "arch" "simd,fp,simd")]
1551 (define_insn "move_lo_quad_internal_be_<mode>"
1552   [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1553         (vec_concat:VQMOV_NO2E
1554           (vec_duplicate:<VHALF> (const_int 0))
1555           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1556   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1557   "@
1558    dup\\t%d0, %1.d[0]
1559    fmov\\t%d0, %1
1560    dup\\t%d0, %1"
1561   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1562    (set_attr "length" "4")
1563    (set_attr "arch" "simd,fp,simd")]
1566 (define_insn "move_lo_quad_internal_be_<mode>"
1567   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1568         (vec_concat:VQ_2E
1569           (const_int 0)
1570           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1571   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1572   "@
1573    dup\\t%d0, %1.d[0]
1574    fmov\\t%d0, %1
1575    dup\\t%d0, %1"
1576   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1577    (set_attr "length" "4")
1578    (set_attr "arch" "simd,fp,simd")]
1581 (define_expand "move_lo_quad_<mode>"
1582   [(match_operand:VQMOV 0 "register_operand")
1583    (match_operand:VQMOV 1 "register_operand")]
1584   "TARGET_SIMD"
1586   if (BYTES_BIG_ENDIAN)
1587     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1588   else
1589     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1590   DONE;
1594 ;; Move operand1 to the high architectural bits of the register, keeping
1595 ;; the low architectural bits of operand2.
1596 ;; For little-endian this is { operand2, operand1 }
1597 ;; For big-endian this is { operand1, operand2 }
1599 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1600   [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1601         (vec_concat:VQMOV
1602           (vec_select:<VHALF>
1603                 (match_dup 0)
1604                 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1605           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1606   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1607   "@
1608    ins\\t%0.d[1], %1.d[0]
1609    ins\\t%0.d[1], %1"
1610   [(set_attr "type" "neon_ins")]
1613 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1614   [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1615         (vec_concat:VQMOV
1616           (match_operand:<VHALF> 1 "register_operand" "w,r")
1617           (vec_select:<VHALF>
1618                 (match_dup 0)
1619                 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1620   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1621   "@
1622    ins\\t%0.d[1], %1.d[0]
1623    ins\\t%0.d[1], %1"
1624   [(set_attr "type" "neon_ins")]
1627 (define_expand "move_hi_quad_<mode>"
1628  [(match_operand:VQMOV 0 "register_operand")
1629   (match_operand:<VHALF> 1 "register_operand")]
1630  "TARGET_SIMD"
1632   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1633   if (BYTES_BIG_ENDIAN)
1634     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1635                     operands[1], p));
1636   else
1637     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1638                     operands[1], p));
1639   DONE;
1642 ;; Narrowing operations.
1644 ;; For doubles.
1645 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1646  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1647        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1648  "TARGET_SIMD"
1649  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1650   [(set_attr "type" "neon_shift_imm_narrow_q")]
1653 (define_expand "vec_pack_trunc_<mode>"
1654  [(match_operand:<VNARROWD> 0 "register_operand")
1655   (match_operand:VDN 1 "register_operand")
1656   (match_operand:VDN 2 "register_operand")]
1657  "TARGET_SIMD"
1659   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1660   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1661   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1663   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1664   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1665   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1666   DONE;
1669 ;; For quads.
1671 (define_insn "vec_pack_trunc_<mode>"
1672  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1673        (vec_concat:<VNARROWQ2>
1674          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1675          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1676  "TARGET_SIMD"
1678    if (BYTES_BIG_ENDIAN)
1679      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1680    else
1681      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1683   [(set_attr "type" "multiple")
1684    (set_attr "length" "8")]
1687 ;; Widening operations.
1689 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1690   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1691         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1692                                (match_operand:VQW 1 "register_operand" "w")
1693                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1694                             )))]
1695   "TARGET_SIMD"
1696   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1697   [(set_attr "type" "neon_shift_imm_long")]
1700 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1701   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1702         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1703                                (match_operand:VQW 1 "register_operand" "w")
1704                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1705                             )))]
1706   "TARGET_SIMD"
1707   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1708   [(set_attr "type" "neon_shift_imm_long")]
1711 (define_expand "vec_unpack<su>_hi_<mode>"
1712   [(match_operand:<VWIDE> 0 "register_operand")
1713    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1714   "TARGET_SIMD"
1715   {
1716     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1717     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1718                                                           operands[1], p));
1719     DONE;
1720   }
1723 (define_expand "vec_unpack<su>_lo_<mode>"
1724   [(match_operand:<VWIDE> 0 "register_operand")
1725    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1726   "TARGET_SIMD"
1727   {
1728     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1729     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1730                                                           operands[1], p));
1731     DONE;
1732   }
1735 ;; Widening arithmetic.
1737 (define_insn "*aarch64_<su>mlal_lo<mode>"
1738   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1739         (plus:<VWIDE>
1740           (mult:<VWIDE>
1741               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1742                  (match_operand:VQW 2 "register_operand" "w")
1743                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1744               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1745                  (match_operand:VQW 4 "register_operand" "w")
1746                  (match_dup 3))))
1747           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1748   "TARGET_SIMD"
1749   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1750   [(set_attr "type" "neon_mla_<Vetype>_long")]
1753 (define_insn "*aarch64_<su>mlal_hi<mode>"
1754   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1755         (plus:<VWIDE>
1756           (mult:<VWIDE>
1757               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1758                  (match_operand:VQW 2 "register_operand" "w")
1759                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1760               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1761                  (match_operand:VQW 4 "register_operand" "w")
1762                  (match_dup 3))))
1763           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1764   "TARGET_SIMD"
1765   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1766   [(set_attr "type" "neon_mla_<Vetype>_long")]
1769 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1770   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1771         (minus:<VWIDE>
1772           (match_operand:<VWIDE> 1 "register_operand" "0")
1773           (mult:<VWIDE>
1774               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1775                  (match_operand:VQW 2 "register_operand" "w")
1776                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1777               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1778                  (match_operand:VQW 4 "register_operand" "w")
1779                  (match_dup 3))))))]
1780   "TARGET_SIMD"
1781   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1782   [(set_attr "type" "neon_mla_<Vetype>_long")]
1785 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1786   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1787         (minus:<VWIDE>
1788           (match_operand:<VWIDE> 1 "register_operand" "0")
1789           (mult:<VWIDE>
1790               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1791                  (match_operand:VQW 2 "register_operand" "w")
1792                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1793               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1794                  (match_operand:VQW 4 "register_operand" "w")
1795                  (match_dup 3))))))]
1796   "TARGET_SIMD"
1797   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1798   [(set_attr "type" "neon_mla_<Vetype>_long")]
1801 (define_insn "*aarch64_<su>mlal<mode>"
1802   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1803         (plus:<VWIDE>
1804           (mult:<VWIDE>
1805             (ANY_EXTEND:<VWIDE>
1806               (match_operand:VD_BHSI 1 "register_operand" "w"))
1807             (ANY_EXTEND:<VWIDE>
1808               (match_operand:VD_BHSI 2 "register_operand" "w")))
1809           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1810   "TARGET_SIMD"
1811   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1812   [(set_attr "type" "neon_mla_<Vetype>_long")]
1815 (define_insn "*aarch64_<su>mlsl<mode>"
1816   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1817         (minus:<VWIDE>
1818           (match_operand:<VWIDE> 1 "register_operand" "0")
1819           (mult:<VWIDE>
1820             (ANY_EXTEND:<VWIDE>
1821               (match_operand:VD_BHSI 2 "register_operand" "w"))
1822             (ANY_EXTEND:<VWIDE>
1823               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1824   "TARGET_SIMD"
1825   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1826   [(set_attr "type" "neon_mla_<Vetype>_long")]
1829 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1830  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1831        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1832                            (match_operand:VQW 1 "register_operand" "w")
1833                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1834                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1835                            (match_operand:VQW 2 "register_operand" "w")
1836                            (match_dup 3)))))]
1837   "TARGET_SIMD"
1838   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1839   [(set_attr "type" "neon_mul_<Vetype>_long")]
1842 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
1843   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1844         (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
1845                          (match_operand:VD_BHSI 1 "register_operand" "w"))
1846                       (ANY_EXTEND:<VWIDE>
1847                          (match_operand:VD_BHSI 2 "register_operand" "w"))))]
1848   "TARGET_SIMD"
1849   "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1850   [(set_attr "type" "neon_mul_<Vetype>_long")]
1853 (define_expand "vec_widen_<su>mult_lo_<mode>"
1854   [(match_operand:<VWIDE> 0 "register_operand")
1855    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1856    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1857  "TARGET_SIMD"
1859    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1860    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1861                                                        operands[1],
1862                                                        operands[2], p));
1863    DONE;
1867 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1868  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1869       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1870                             (match_operand:VQW 1 "register_operand" "w")
1871                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1872                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1873                             (match_operand:VQW 2 "register_operand" "w")
1874                             (match_dup 3)))))]
1875   "TARGET_SIMD"
1876   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1877   [(set_attr "type" "neon_mul_<Vetype>_long")]
1880 (define_expand "vec_widen_<su>mult_hi_<mode>"
1881   [(match_operand:<VWIDE> 0 "register_operand")
1882    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1883    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1884  "TARGET_SIMD"
1886    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1887    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1888                                                        operands[1],
1889                                                        operands[2], p));
1890    DONE;
1895 ;; vmull_lane_s16 intrinsics
1896 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
1897   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1898         (mult:<VWIDE>
1899           (ANY_EXTEND:<VWIDE>
1900             (match_operand:<VCOND> 1 "register_operand" "w"))
1901           (ANY_EXTEND:<VWIDE>
1902             (vec_duplicate:<VCOND>
1903               (vec_select:<VEL>
1904                 (match_operand:VDQHS 2 "register_operand" "<vwx>")
1905                 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
1906   "TARGET_SIMD"
1907   {
1908     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
1909     return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
1910   }
1911   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
1914 ;; vmlal_lane_s16 intrinsics
1915 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
1916   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1917         (plus:<VWIDE>
1918           (mult:<VWIDE>
1919             (ANY_EXTEND:<VWIDE>
1920               (match_operand:<VCOND> 2 "register_operand" "w"))
1921             (ANY_EXTEND:<VWIDE>
1922               (vec_duplicate:<VCOND>
1923                 (vec_select:<VEL>
1924                   (match_operand:VDQHS 3 "register_operand" "<vwx>")
1925                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
1926           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1927   "TARGET_SIMD"
1928   {
1929     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1930     return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
1931   }
1932   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
1935 ;; FP vector operations.
1936 ;; AArch64 AdvSIMD supports single-precision (32-bit) and 
1937 ;; double-precision (64-bit) floating-point data types and arithmetic as
1938 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable 
1939 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1941 ;; Floating-point operations can raise an exception.  Vectorizing such
1942 ;; operations are safe because of reasons explained below.
1944 ;; ARMv8 permits an extension to enable trapped floating-point
1945 ;; exception handling, however this is an optional feature.  In the
1946 ;; event of a floating-point exception being raised by vectorised
1947 ;; code then:
1948 ;; 1.  If trapped floating-point exceptions are available, then a trap
1949 ;;     will be taken when any lane raises an enabled exception.  A trap
1950 ;;     handler may determine which lane raised the exception.
1951 ;; 2.  Alternatively a sticky exception flag is set in the
1952 ;;     floating-point status register (FPSR).  Software may explicitly
1953 ;;     test the exception flags, in which case the tests will either
1954 ;;     prevent vectorisation, allowing precise identification of the
1955 ;;     failing operation, or if tested outside of vectorisable regions
1956 ;;     then the specific operation and lane are not of interest.
1958 ;; FP arithmetic operations.
1960 (define_insn "add<mode>3"
1961  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1962        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1963                    (match_operand:VHSDF 2 "register_operand" "w")))]
1964  "TARGET_SIMD"
1965  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1966   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1969 (define_insn "sub<mode>3"
1970  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1971        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1972                     (match_operand:VHSDF 2 "register_operand" "w")))]
1973  "TARGET_SIMD"
1974  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1975   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1978 (define_insn "mul<mode>3"
1979  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1980        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1981                    (match_operand:VHSDF 2 "register_operand" "w")))]
1982  "TARGET_SIMD"
1983  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1984   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1987 (define_expand "div<mode>3"
1988  [(set (match_operand:VHSDF 0 "register_operand")
1989        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1990                   (match_operand:VHSDF 2 "register_operand")))]
1991  "TARGET_SIMD"
1993   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1994     DONE;
1996   operands[1] = force_reg (<MODE>mode, operands[1]);
1999 (define_insn "*div<mode>3"
2000  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2001        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2002                  (match_operand:VHSDF 2 "register_operand" "w")))]
2003  "TARGET_SIMD"
2004  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2005   [(set_attr "type" "neon_fp_div_<stype><q>")]
2008 (define_insn "neg<mode>2"
2009  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2010        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2011  "TARGET_SIMD"
2012  "fneg\\t%0.<Vtype>, %1.<Vtype>"
2013   [(set_attr "type" "neon_fp_neg_<stype><q>")]
2016 (define_insn "abs<mode>2"
2017  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2018        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2019  "TARGET_SIMD"
2020  "fabs\\t%0.<Vtype>, %1.<Vtype>"
2021   [(set_attr "type" "neon_fp_abs_<stype><q>")]
2024 (define_insn "fma<mode>4"
2025   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2026        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2027                   (match_operand:VHSDF 2 "register_operand" "w")
2028                   (match_operand:VHSDF 3 "register_operand" "0")))]
2029   "TARGET_SIMD"
2030  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2031   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2034 (define_insn "*aarch64_fma4_elt<mode>"
2035   [(set (match_operand:VDQF 0 "register_operand" "=w")
2036     (fma:VDQF
2037       (vec_duplicate:VDQF
2038         (vec_select:<VEL>
2039           (match_operand:VDQF 1 "register_operand" "<h_con>")
2040           (parallel [(match_operand:SI 2 "immediate_operand")])))
2041       (match_operand:VDQF 3 "register_operand" "w")
2042       (match_operand:VDQF 4 "register_operand" "0")))]
2043   "TARGET_SIMD"
2044   {
2045     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2046     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2047   }
2048   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2051 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2052   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2053     (fma:VDQSF
2054       (vec_duplicate:VDQSF
2055         (vec_select:<VEL>
2056           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2057           (parallel [(match_operand:SI 2 "immediate_operand")])))
2058       (match_operand:VDQSF 3 "register_operand" "w")
2059       (match_operand:VDQSF 4 "register_operand" "0")))]
2060   "TARGET_SIMD"
2061   {
2062     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2063     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2064   }
2065   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2068 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
2069   [(set (match_operand:VMUL 0 "register_operand" "=w")
2070     (fma:VMUL
2071       (vec_duplicate:VMUL
2072           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2073       (match_operand:VMUL 2 "register_operand" "w")
2074       (match_operand:VMUL 3 "register_operand" "0")))]
2075   "TARGET_SIMD"
2076   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2077   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2080 (define_insn "*aarch64_fma4_elt_to_64v2df"
2081   [(set (match_operand:DF 0 "register_operand" "=w")
2082     (fma:DF
2083         (vec_select:DF
2084           (match_operand:V2DF 1 "register_operand" "w")
2085           (parallel [(match_operand:SI 2 "immediate_operand")]))
2086       (match_operand:DF 3 "register_operand" "w")
2087       (match_operand:DF 4 "register_operand" "0")))]
2088   "TARGET_SIMD"
2089   {
2090     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2091     return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2092   }
2093   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2096 (define_insn "fnma<mode>4"
2097   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2098         (fma:VHSDF
2099           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2100           (match_operand:VHSDF 2 "register_operand" "w")
2101           (match_operand:VHSDF 3 "register_operand" "0")))]
2102   "TARGET_SIMD"
2103   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2104   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2107 (define_insn "*aarch64_fnma4_elt<mode>"
2108   [(set (match_operand:VDQF 0 "register_operand" "=w")
2109     (fma:VDQF
2110       (neg:VDQF
2111         (match_operand:VDQF 3 "register_operand" "w"))
2112       (vec_duplicate:VDQF
2113         (vec_select:<VEL>
2114           (match_operand:VDQF 1 "register_operand" "<h_con>")
2115           (parallel [(match_operand:SI 2 "immediate_operand")])))
2116       (match_operand:VDQF 4 "register_operand" "0")))]
2117   "TARGET_SIMD"
2118   {
2119     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2120     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2121   }
2122   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2125 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2126   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2127     (fma:VDQSF
2128       (neg:VDQSF
2129         (match_operand:VDQSF 3 "register_operand" "w"))
2130       (vec_duplicate:VDQSF
2131         (vec_select:<VEL>
2132           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2133           (parallel [(match_operand:SI 2 "immediate_operand")])))
2134       (match_operand:VDQSF 4 "register_operand" "0")))]
2135   "TARGET_SIMD"
2136   {
2137     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2138     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2139   }
2140   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2143 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2144   [(set (match_operand:VMUL 0 "register_operand" "=w")
2145     (fma:VMUL
2146       (neg:VMUL
2147         (match_operand:VMUL 2 "register_operand" "w"))
2148       (vec_duplicate:VMUL
2149         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2150       (match_operand:VMUL 3 "register_operand" "0")))]
2151   "TARGET_SIMD"
2152   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2153   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2156 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2157   [(set (match_operand:DF 0 "register_operand" "=w")
2158     (fma:DF
2159       (vec_select:DF
2160         (match_operand:V2DF 1 "register_operand" "w")
2161         (parallel [(match_operand:SI 2 "immediate_operand")]))
2162       (neg:DF
2163         (match_operand:DF 3 "register_operand" "w"))
2164       (match_operand:DF 4 "register_operand" "0")))]
2165   "TARGET_SIMD"
2166   {
2167     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2168     return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2169   }
2170   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2173 ;; Vector versions of the floating-point frint patterns.
2174 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2175 (define_insn "<frint_pattern><mode>2"
2176   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2177         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2178                        FRINT))]
2179   "TARGET_SIMD"
2180   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2181   [(set_attr "type" "neon_fp_round_<stype><q>")]
2184 ;; Vector versions of the fcvt standard patterns.
2185 ;; Expands to lbtrunc, lround, lceil, lfloor
2186 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2187   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2188         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2189                                [(match_operand:VHSDF 1 "register_operand" "w")]
2190                                FCVT)))]
2191   "TARGET_SIMD"
2192   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2193   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2196 ;; HF Scalar variants of related SIMD instructions.
2197 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2198   [(set (match_operand:HI 0 "register_operand" "=w")
2199         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2200                       FCVT)))]
2201   "TARGET_SIMD_F16INST"
2202   "fcvt<frint_suffix><su>\t%h0, %h1"
2203   [(set_attr "type" "neon_fp_to_int_s")]
2206 (define_insn "<optab>_trunchfhi2"
2207   [(set (match_operand:HI 0 "register_operand" "=w")
2208         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2209   "TARGET_SIMD_F16INST"
2210   "fcvtz<su>\t%h0, %h1"
2211   [(set_attr "type" "neon_fp_to_int_s")]
2214 (define_insn "<optab>hihf2"
2215   [(set (match_operand:HF 0 "register_operand" "=w")
2216         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2217   "TARGET_SIMD_F16INST"
2218   "<su_optab>cvtf\t%h0, %h1"
2219   [(set_attr "type" "neon_int_to_fp_s")]
2222 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2223   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2224         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2225                                [(mult:VDQF
2226          (match_operand:VDQF 1 "register_operand" "w")
2227          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2228                                UNSPEC_FRINTZ)))]
2229   "TARGET_SIMD
2230    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2231                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2232   {
2233     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2234     char buf[64];
2235     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2236     output_asm_insn (buf, operands);
2237     return "";
2238   }
2239   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2242 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2243   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2244         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2245                                [(match_operand:VHSDF 1 "register_operand")]
2246                                 UNSPEC_FRINTZ)))]
2247   "TARGET_SIMD"
2248   {})
2250 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2251   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2252         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2253                                [(match_operand:VHSDF 1 "register_operand")]
2254                                 UNSPEC_FRINTZ)))]
2255   "TARGET_SIMD"
2256   {})
2258 (define_expand "ftrunc<VHSDF:mode>2"
2259   [(set (match_operand:VHSDF 0 "register_operand")
2260         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2261                        UNSPEC_FRINTZ))]
2262   "TARGET_SIMD"
2263   {})
2265 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2266   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2267         (FLOATUORS:VHSDF
2268           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2269   "TARGET_SIMD"
2270   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2271   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2274 ;; Conversions between vectors of floats and doubles.
2275 ;; Contains a mix of patterns to match standard pattern names
2276 ;; and those for intrinsics.
2278 ;; Float widening operations.
2280 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2281   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2282         (float_extend:<VWIDE> (vec_select:<VHALF>
2283                                (match_operand:VQ_HSF 1 "register_operand" "w")
2284                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2285                             )))]
2286   "TARGET_SIMD"
2287   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2288   [(set_attr "type" "neon_fp_cvt_widen_s")]
2291 ;; Convert between fixed-point and floating-point (vector modes)
2293 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2294   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2295         (unspec:<VHSDF:FCVT_TARGET>
2296           [(match_operand:VHSDF 1 "register_operand" "w")
2297            (match_operand:SI 2 "immediate_operand" "i")]
2298          FCVT_F2FIXED))]
2299   "TARGET_SIMD"
2300   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2301   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2304 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2305   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2306         (unspec:<VDQ_HSDI:FCVT_TARGET>
2307           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2308            (match_operand:SI 2 "immediate_operand" "i")]
2309          FCVT_FIXED2F))]
2310   "TARGET_SIMD"
2311   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2312   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2315 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2316 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2317 ;; the meaning of HI and LO changes depending on the target endianness.
2318 ;; While elsewhere we map the higher numbered elements of a vector to
2319 ;; the lower architectural lanes of the vector, for these patterns we want
2320 ;; to always treat "hi" as referring to the higher architectural lanes.
2321 ;; Consequently, while the patterns below look inconsistent with our
2322 ;; other big-endian patterns their behavior is as required.
2324 (define_expand "vec_unpacks_lo_<mode>"
2325   [(match_operand:<VWIDE> 0 "register_operand")
2326    (match_operand:VQ_HSF 1 "register_operand")]
2327   "TARGET_SIMD"
2328   {
2329     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2330     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2331                                                        operands[1], p));
2332     DONE;
2333   }
2336 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2337   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2338         (float_extend:<VWIDE> (vec_select:<VHALF>
2339                                (match_operand:VQ_HSF 1 "register_operand" "w")
2340                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2341                             )))]
2342   "TARGET_SIMD"
2343   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2344   [(set_attr "type" "neon_fp_cvt_widen_s")]
2347 (define_expand "vec_unpacks_hi_<mode>"
2348   [(match_operand:<VWIDE> 0 "register_operand")
2349    (match_operand:VQ_HSF 1 "register_operand")]
2350   "TARGET_SIMD"
2351   {
2352     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2353     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2354                                                        operands[1], p));
2355     DONE;
2356   }
2358 (define_insn "aarch64_float_extend_lo_<Vwide>"
2359   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2360         (float_extend:<VWIDE>
2361           (match_operand:VDF 1 "register_operand" "w")))]
2362   "TARGET_SIMD"
2363   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2364   [(set_attr "type" "neon_fp_cvt_widen_s")]
2367 ;; Float narrowing operations.
2369 (define_insn "aarch64_float_truncate_lo_<mode>"
2370   [(set (match_operand:VDF 0 "register_operand" "=w")
2371       (float_truncate:VDF
2372         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2373   "TARGET_SIMD"
2374   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2375   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2378 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2379   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2380     (vec_concat:<VDBL>
2381       (match_operand:VDF 1 "register_operand" "0")
2382       (float_truncate:VDF
2383         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2384   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2385   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2386   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2389 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2390   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2391     (vec_concat:<VDBL>
2392       (float_truncate:VDF
2393         (match_operand:<VWIDE> 2 "register_operand" "w"))
2394       (match_operand:VDF 1 "register_operand" "0")))]
2395   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2396   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2397   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2400 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2401   [(match_operand:<VDBL> 0 "register_operand")
2402    (match_operand:VDF 1 "register_operand")
2403    (match_operand:<VWIDE> 2 "register_operand")]
2404   "TARGET_SIMD"
2406   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2407                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2408                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2409   emit_insn (gen (operands[0], operands[1], operands[2]));
2410   DONE;
2414 (define_expand "vec_pack_trunc_v2df"
2415   [(set (match_operand:V4SF 0 "register_operand")
2416       (vec_concat:V4SF
2417         (float_truncate:V2SF
2418             (match_operand:V2DF 1 "register_operand"))
2419         (float_truncate:V2SF
2420             (match_operand:V2DF 2 "register_operand"))
2421           ))]
2422   "TARGET_SIMD"
2423   {
2424     rtx tmp = gen_reg_rtx (V2SFmode);
2425     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2426     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2428     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2429     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2430                                                    tmp, operands[hi]));
2431     DONE;
2432   }
2435 (define_expand "vec_pack_trunc_df"
2436   [(set (match_operand:V2SF 0 "register_operand")
2437       (vec_concat:V2SF
2438         (float_truncate:SF
2439             (match_operand:DF 1 "register_operand"))
2440         (float_truncate:SF
2441             (match_operand:DF 2 "register_operand"))
2442           ))]
2443   "TARGET_SIMD"
2444   {
2445     rtx tmp = gen_reg_rtx (V2SFmode);
2446     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2447     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2449     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2450     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2451     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2452     DONE;
2453   }
2456 ;; FP Max/Min
2457 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2458 ;; expression like:
2459 ;;      a = (b < c) ? b : c;
2460 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2461 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2462 ;; -ffast-math.
2464 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2465 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2466 ;; operand will be returned when both operands are zero (i.e. they may not
2467 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2468 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2469 ;; NaNs.
2471 (define_insn "<su><maxmin><mode>3"
2472   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2473         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2474                        (match_operand:VHSDF 2 "register_operand" "w")))]
2475   "TARGET_SIMD"
2476   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2477   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2480 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2481 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2482 ;; which implement the IEEE fmax ()/fmin () functions.
2483 (define_insn "<maxmin_uns><mode>3"
2484   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2485        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2486                       (match_operand:VHSDF 2 "register_operand" "w")]
2487                       FMAXMIN_UNS))]
2488   "TARGET_SIMD"
2489   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2490   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2493 ;; 'across lanes' add.
2495 (define_expand "reduc_plus_scal_<mode>"
2496   [(match_operand:<VEL> 0 "register_operand")
2497    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2498                UNSPEC_ADDV)]
2499   "TARGET_SIMD"
2500   {
2501     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2502     rtx scratch = gen_reg_rtx (<MODE>mode);
2503     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2504     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2505     DONE;
2506   }
2509 (define_insn "aarch64_faddp<mode>"
2510  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2511        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2512                       (match_operand:VHSDF 2 "register_operand" "w")]
2513         UNSPEC_FADDV))]
2514  "TARGET_SIMD"
2515  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2516   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2519 (define_insn "aarch64_reduc_plus_internal<mode>"
2520  [(set (match_operand:VDQV 0 "register_operand" "=w")
2521        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2522                     UNSPEC_ADDV))]
2523  "TARGET_SIMD"
2524  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2525   [(set_attr "type" "neon_reduc_add<q>")]
2528 ;; ADDV with result zero-extended to SI/DImode (for popcount).
2529 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
2530  [(set (match_operand:GPI 0 "register_operand" "=w")
2531        (zero_extend:GPI
2532         (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
2533                              UNSPEC_ADDV)))]
2534  "TARGET_SIMD"
2535  "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
2536   [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
2539 (define_insn "aarch64_reduc_plus_internalv2si"
2540  [(set (match_operand:V2SI 0 "register_operand" "=w")
2541        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2542                     UNSPEC_ADDV))]
2543  "TARGET_SIMD"
2544  "addp\\t%0.2s, %1.2s, %1.2s"
2545   [(set_attr "type" "neon_reduc_add")]
2548 (define_insn "reduc_plus_scal_<mode>"
2549  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2550        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2551                    UNSPEC_FADDV))]
2552  "TARGET_SIMD"
2553  "faddp\\t%<Vetype>0, %1.<Vtype>"
2554   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2557 (define_expand "reduc_plus_scal_v4sf"
2558  [(set (match_operand:SF 0 "register_operand")
2559        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2560                     UNSPEC_FADDV))]
2561  "TARGET_SIMD"
2563   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2564   rtx scratch = gen_reg_rtx (V4SFmode);
2565   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2566   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2567   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2568   DONE;
2571 (define_insn "clrsb<mode>2"
2572   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2573         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2574   "TARGET_SIMD"
2575   "cls\\t%0.<Vtype>, %1.<Vtype>"
2576   [(set_attr "type" "neon_cls<q>")]
2579 (define_insn "clz<mode>2"
2580  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2581        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2582  "TARGET_SIMD"
2583  "clz\\t%0.<Vtype>, %1.<Vtype>"
2584   [(set_attr "type" "neon_cls<q>")]
2587 (define_insn "popcount<mode>2"
2588   [(set (match_operand:VB 0 "register_operand" "=w")
2589         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2590   "TARGET_SIMD"
2591   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2592   [(set_attr "type" "neon_cnt<q>")]
2595 ;; 'across lanes' max and min ops.
2597 ;; Template for outputting a scalar, so we can create __builtins which can be
2598 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2599 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2600   [(match_operand:<VEL> 0 "register_operand")
2601    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2602                   FMAXMINV)]
2603   "TARGET_SIMD"
2604   {
2605     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2606     rtx scratch = gen_reg_rtx (<MODE>mode);
2607     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2608                                                               operands[1]));
2609     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2610     DONE;
2611   }
2614 ;; Likewise for integer cases, signed and unsigned.
2615 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2616   [(match_operand:<VEL> 0 "register_operand")
2617    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2618                     MAXMINV)]
2619   "TARGET_SIMD"
2620   {
2621     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2622     rtx scratch = gen_reg_rtx (<MODE>mode);
2623     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2624                                                               operands[1]));
2625     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2626     DONE;
2627   }
2630 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2631  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2632        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2633                     MAXMINV))]
2634  "TARGET_SIMD"
2635  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2636   [(set_attr "type" "neon_reduc_minmax<q>")]
2639 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2640  [(set (match_operand:V2SI 0 "register_operand" "=w")
2641        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2642                     MAXMINV))]
2643  "TARGET_SIMD"
2644  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2645   [(set_attr "type" "neon_reduc_minmax")]
2648 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2649  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2650        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2651                       FMAXMINV))]
2652  "TARGET_SIMD"
2653  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2654   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2657 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2658 ;; allocation.
2659 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2660 ;; to select.
2662 ;; Thus our BSL is of the form:
2663 ;;   op0 = bsl (mask, op2, op3)
2664 ;; We can use any of:
2666 ;;   if (op0 = mask)
2667 ;;     bsl mask, op1, op2
2668 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2669 ;;     bit op0, op2, mask
2670 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2671 ;;     bif op0, op1, mask
2673 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2674 ;; Some forms of straight-line code may generate the equivalent form
2675 ;; in *aarch64_simd_bsl<mode>_alt.
2677 (define_insn "aarch64_simd_bsl<mode>_internal"
2678   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2679         (xor:VDQ_I
2680            (and:VDQ_I
2681              (xor:VDQ_I
2682                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2683                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2684              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2685           (match_dup:<V_INT_EQUIV> 3)
2686         ))]
2687   "TARGET_SIMD"
2688   "@
2689   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2690   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2691   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2692   [(set_attr "type" "neon_bsl<q>")]
2695 ;; We need this form in addition to the above pattern to match the case
2696 ;; when combine tries merging three insns such that the second operand of
2697 ;; the outer XOR matches the second operand of the inner XOR rather than
2698 ;; the first.  The two are equivalent but since recog doesn't try all
2699 ;; permutations of commutative operations, we have to have a separate pattern.
2701 (define_insn "*aarch64_simd_bsl<mode>_alt"
2702   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2703         (xor:VDQ_I
2704            (and:VDQ_I
2705              (xor:VDQ_I
2706                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2707                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2708               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2709           (match_dup:<V_INT_EQUIV> 2)))]
2710   "TARGET_SIMD"
2711   "@
2712   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2713   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2714   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2715   [(set_attr "type" "neon_bsl<q>")]
2718 ;; DImode is special, we want to avoid computing operations which are
2719 ;; more naturally computed in general purpose registers in the vector
2720 ;; registers.  If we do that, we need to move all three operands from general
2721 ;; purpose registers to vector registers, then back again.  However, we
2722 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2723 ;; optimizations based on the component operations of a BSL.
2725 ;; That means we need a splitter back to the individual operations, if they
2726 ;; would be better calculated on the integer side.
2728 (define_insn_and_split "aarch64_simd_bsldi_internal"
2729   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2730         (xor:DI
2731            (and:DI
2732              (xor:DI
2733                (match_operand:DI 3 "register_operand" "w,0,w,r")
2734                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2735              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2736           (match_dup:DI 3)
2737         ))]
2738   "TARGET_SIMD"
2739   "@
2740   bsl\\t%0.8b, %2.8b, %3.8b
2741   bit\\t%0.8b, %2.8b, %1.8b
2742   bif\\t%0.8b, %3.8b, %1.8b
2743   #"
2744   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2745   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2747   /* Split back to individual operations.  If we're before reload, and
2748      able to create a temporary register, do so.  If we're after reload,
2749      we've got an early-clobber destination register, so use that.
2750      Otherwise, we can't create pseudos and we can't yet guarantee that
2751      operands[0] is safe to write, so FAIL to split.  */
2753   rtx scratch;
2754   if (reload_completed)
2755     scratch = operands[0];
2756   else if (can_create_pseudo_p ())
2757     scratch = gen_reg_rtx (DImode);
2758   else
2759     FAIL;
2761   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2762   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2763   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2764   DONE;
2766   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2767    (set_attr "length" "4,4,4,12")]
2770 (define_insn_and_split "aarch64_simd_bsldi_alt"
2771   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2772         (xor:DI
2773            (and:DI
2774              (xor:DI
2775                (match_operand:DI 3 "register_operand" "w,w,0,r")
2776                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2777              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2778           (match_dup:DI 2)
2779         ))]
2780   "TARGET_SIMD"
2781   "@
2782   bsl\\t%0.8b, %3.8b, %2.8b
2783   bit\\t%0.8b, %3.8b, %1.8b
2784   bif\\t%0.8b, %2.8b, %1.8b
2785   #"
2786   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2787   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2789   /* Split back to individual operations.  If we're before reload, and
2790      able to create a temporary register, do so.  If we're after reload,
2791      we've got an early-clobber destination register, so use that.
2792      Otherwise, we can't create pseudos and we can't yet guarantee that
2793      operands[0] is safe to write, so FAIL to split.  */
2795   rtx scratch;
2796   if (reload_completed)
2797     scratch = operands[0];
2798   else if (can_create_pseudo_p ())
2799     scratch = gen_reg_rtx (DImode);
2800   else
2801     FAIL;
2803   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2804   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2805   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2806   DONE;
2808   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2809    (set_attr "length" "4,4,4,12")]
2812 (define_expand "aarch64_simd_bsl<mode>"
2813   [(match_operand:VALLDIF 0 "register_operand")
2814    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2815    (match_operand:VALLDIF 2 "register_operand")
2816    (match_operand:VALLDIF 3 "register_operand")]
2817  "TARGET_SIMD"
2819   /* We can't alias operands together if they have different modes.  */
2820   rtx tmp = operands[0];
2821   if (FLOAT_MODE_P (<MODE>mode))
2822     {
2823       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2824       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2825       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2826     }
2827   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2828   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2829                                                          operands[1],
2830                                                          operands[2],
2831                                                          operands[3]));
2832   if (tmp != operands[0])
2833     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2835   DONE;
2838 (define_expand "vcond_mask_<mode><v_int_equiv>"
2839   [(match_operand:VALLDI 0 "register_operand")
2840    (match_operand:VALLDI 1 "nonmemory_operand")
2841    (match_operand:VALLDI 2 "nonmemory_operand")
2842    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2843   "TARGET_SIMD"
2845   /* If we have (a = (P) ? -1 : 0);
2846      Then we can simply move the generated mask (result must be int).  */
2847   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2848       && operands[2] == CONST0_RTX (<MODE>mode))
2849     emit_move_insn (operands[0], operands[3]);
2850   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2851   else if (operands[1] == CONST0_RTX (<MODE>mode)
2852            && operands[2] == CONSTM1_RTX (<MODE>mode))
2853     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2854   else
2855     {
2856       if (!REG_P (operands[1]))
2857         operands[1] = force_reg (<MODE>mode, operands[1]);
2858       if (!REG_P (operands[2]))
2859         operands[2] = force_reg (<MODE>mode, operands[2]);
2860       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2861                                              operands[1], operands[2]));
2862     }
2864   DONE;
2867 ;; Patterns comparing two vectors to produce a mask.
2869 (define_expand "vec_cmp<mode><mode>"
2870   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2871           (match_operator 1 "comparison_operator"
2872             [(match_operand:VSDQ_I_DI 2 "register_operand")
2873              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2874   "TARGET_SIMD"
2876   rtx mask = operands[0];
2877   enum rtx_code code = GET_CODE (operands[1]);
2879   switch (code)
2880     {
2881     case NE:
2882     case LE:
2883     case LT:
2884     case GE:
2885     case GT:
2886     case EQ:
2887       if (operands[3] == CONST0_RTX (<MODE>mode))
2888         break;
2890       /* Fall through.  */
2891     default:
2892       if (!REG_P (operands[3]))
2893         operands[3] = force_reg (<MODE>mode, operands[3]);
2895       break;
2896     }
2898   switch (code)
2899     {
2900     case LT:
2901       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2902       break;
2904     case GE:
2905       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2906       break;
2908     case LE:
2909       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2910       break;
2912     case GT:
2913       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2914       break;
2916     case LTU:
2917       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2918       break;
2920     case GEU:
2921       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2922       break;
2924     case LEU:
2925       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2926       break;
2928     case GTU:
2929       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2930       break;
2932     case NE:
2933       /* Handle NE as !EQ.  */
2934       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2935       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2936       break;
2938     case EQ:
2939       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2940       break;
2942     default:
2943       gcc_unreachable ();
2944     }
2946   DONE;
2949 (define_expand "vec_cmp<mode><v_int_equiv>"
2950   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2951         (match_operator 1 "comparison_operator"
2952             [(match_operand:VDQF 2 "register_operand")
2953              (match_operand:VDQF 3 "nonmemory_operand")]))]
2954   "TARGET_SIMD"
2956   int use_zero_form = 0;
2957   enum rtx_code code = GET_CODE (operands[1]);
2958   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2960   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2962   switch (code)
2963     {
2964     case LE:
2965     case LT:
2966     case GE:
2967     case GT:
2968     case EQ:
2969       if (operands[3] == CONST0_RTX (<MODE>mode))
2970         {
2971           use_zero_form = 1;
2972           break;
2973         }
2974       /* Fall through.  */
2975     default:
2976       if (!REG_P (operands[3]))
2977         operands[3] = force_reg (<MODE>mode, operands[3]);
2979       break;
2980     }
2982   switch (code)
2983     {
2984     case LT:
2985       if (use_zero_form)
2986         {
2987           comparison = gen_aarch64_cmlt<mode>;
2988           break;
2989         }
2990       /* Fall through.  */
2991     case UNLT:
2992       std::swap (operands[2], operands[3]);
2993       /* Fall through.  */
2994     case UNGT:
2995     case GT:
2996       comparison = gen_aarch64_cmgt<mode>;
2997       break;
2998     case LE:
2999       if (use_zero_form)
3000         {
3001           comparison = gen_aarch64_cmle<mode>;
3002           break;
3003         }
3004       /* Fall through.  */
3005     case UNLE:
3006       std::swap (operands[2], operands[3]);
3007       /* Fall through.  */
3008     case UNGE:
3009     case GE:
3010       comparison = gen_aarch64_cmge<mode>;
3011       break;
3012     case NE:
3013     case EQ:
3014       comparison = gen_aarch64_cmeq<mode>;
3015       break;
3016     case UNEQ:
3017     case ORDERED:
3018     case UNORDERED:
3019     case LTGT:
3020       break;
3021     default:
3022       gcc_unreachable ();
3023     }
3025   switch (code)
3026     {
3027     case UNGE:
3028     case UNGT:
3029     case UNLE:
3030     case UNLT:
3031       {
3032         /* All of the above must not raise any FP exceptions.  Thus we first
3033            check each operand for NaNs and force any elements containing NaN to
3034            zero before using them in the compare.
3035            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
3036                                      (cm<cc> (isnan (a) ? 0.0 : a,
3037                                               isnan (b) ? 0.0 : b))
3038            We use the following transformations for doing the comparisions:
3039            a UNGE b -> a GE b
3040            a UNGT b -> a GT b
3041            a UNLE b -> b GE a
3042            a UNLT b -> b GT a.  */
3044         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
3045         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
3046         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
3047         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
3048         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
3049         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
3050         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
3051                                           lowpart_subreg (<V_INT_EQUIV>mode,
3052                                                           operands[2],
3053                                                           <MODE>mode)));
3054         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
3055                                           lowpart_subreg (<V_INT_EQUIV>mode,
3056                                                           operands[3],
3057                                                           <MODE>mode)));
3058         gcc_assert (comparison != NULL);
3059         emit_insn (comparison (operands[0],
3060                                lowpart_subreg (<MODE>mode,
3061                                                tmp0, <V_INT_EQUIV>mode),
3062                                lowpart_subreg (<MODE>mode,
3063                                                tmp1, <V_INT_EQUIV>mode)));
3064         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
3065       }
3066       break;
3068     case LT:
3069     case LE:
3070     case GT:
3071     case GE:
3072     case EQ:
3073     case NE:
3074       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
3075          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
3076          a GE b -> a GE b
3077          a GT b -> a GT b
3078          a LE b -> b GE a
3079          a LT b -> b GT a
3080          a EQ b -> a EQ b
3081          a NE b -> ~(a EQ b)  */
3082       gcc_assert (comparison != NULL);
3083       emit_insn (comparison (operands[0], operands[2], operands[3]));
3084       if (code == NE)
3085         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3086       break;
3088     case LTGT:
3089       /* LTGT is not guranteed to not generate a FP exception.  So let's
3090          go the faster way : ((a > b) || (b > a)).  */
3091       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
3092                                          operands[2], operands[3]));
3093       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
3094       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
3095       break;
3097     case ORDERED:
3098     case UNORDERED:
3099     case UNEQ:
3100       /* cmeq (a, a) & cmeq (b, b).  */
3101       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
3102                                          operands[2], operands[2]));
3103       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
3104       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
3106       if (code == UNORDERED)
3107         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3108       else if (code == UNEQ)
3109         {
3110           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3111           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3112         }
3113       break;
3115     default:
3116       gcc_unreachable ();
3117     }
3119   DONE;
3122 (define_expand "vec_cmpu<mode><mode>"
3123   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3124           (match_operator 1 "comparison_operator"
3125             [(match_operand:VSDQ_I_DI 2 "register_operand")
3126              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3127   "TARGET_SIMD"
3129   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3130                                       operands[2], operands[3]));
3131   DONE;
3134 (define_expand "vcond<mode><mode>"
3135   [(set (match_operand:VALLDI 0 "register_operand")
3136         (if_then_else:VALLDI
3137           (match_operator 3 "comparison_operator"
3138             [(match_operand:VALLDI 4 "register_operand")
3139              (match_operand:VALLDI 5 "nonmemory_operand")])
3140           (match_operand:VALLDI 1 "nonmemory_operand")
3141           (match_operand:VALLDI 2 "nonmemory_operand")))]
3142   "TARGET_SIMD"
3144   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3145   enum rtx_code code = GET_CODE (operands[3]);
3147   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3148      it as well as switch operands 1/2 in order to avoid the additional
3149      NOT instruction.  */
3150   if (code == NE)
3151     {
3152       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3153                                     operands[4], operands[5]);
3154       std::swap (operands[1], operands[2]);
3155     }
3156   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3157                                              operands[4], operands[5]));
3158   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3159                                                  operands[2], mask));
3161   DONE;
3164 (define_expand "vcond<v_cmp_mixed><mode>"
3165   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3166         (if_then_else:<V_cmp_mixed>
3167           (match_operator 3 "comparison_operator"
3168             [(match_operand:VDQF_COND 4 "register_operand")
3169              (match_operand:VDQF_COND 5 "nonmemory_operand")])
3170           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3171           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3172   "TARGET_SIMD"
3174   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3175   enum rtx_code code = GET_CODE (operands[3]);
3177   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3178      it as well as switch operands 1/2 in order to avoid the additional
3179      NOT instruction.  */
3180   if (code == NE)
3181     {
3182       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3183                                     operands[4], operands[5]);
3184       std::swap (operands[1], operands[2]);
3185     }
3186   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3187                                              operands[4], operands[5]));
3188   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3189                                                 operands[0], operands[1],
3190                                                 operands[2], mask));
3192   DONE;
3195 (define_expand "vcondu<mode><mode>"
3196   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3197         (if_then_else:VSDQ_I_DI
3198           (match_operator 3 "comparison_operator"
3199             [(match_operand:VSDQ_I_DI 4 "register_operand")
3200              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3201           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3202           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3203   "TARGET_SIMD"
3205   rtx mask = gen_reg_rtx (<MODE>mode);
3206   enum rtx_code code = GET_CODE (operands[3]);
3208   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3209      it as well as switch operands 1/2 in order to avoid the additional
3210      NOT instruction.  */
3211   if (code == NE)
3212     {
3213       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3214                                     operands[4], operands[5]);
3215       std::swap (operands[1], operands[2]);
3216     }
3217   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3218                                       operands[4], operands[5]));
3219   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3220                                                  operands[2], mask));
3221   DONE;
3224 (define_expand "vcondu<mode><v_cmp_mixed>"
3225   [(set (match_operand:VDQF 0 "register_operand")
3226         (if_then_else:VDQF
3227           (match_operator 3 "comparison_operator"
3228             [(match_operand:<V_cmp_mixed> 4 "register_operand")
3229              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3230           (match_operand:VDQF 1 "nonmemory_operand")
3231           (match_operand:VDQF 2 "nonmemory_operand")))]
3232   "TARGET_SIMD"
3234   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3235   enum rtx_code code = GET_CODE (operands[3]);
3237   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3238      it as well as switch operands 1/2 in order to avoid the additional
3239      NOT instruction.  */
3240   if (code == NE)
3241     {
3242       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3243                                     operands[4], operands[5]);
3244       std::swap (operands[1], operands[2]);
3245     }
3246   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3247                                                   mask, operands[3],
3248                                                   operands[4], operands[5]));
3249   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3250                                                  operands[2], mask));
3251   DONE;
3254 ;; Patterns for AArch64 SIMD Intrinsics.
3256 ;; Lane extraction with sign extension to general purpose register.
3257 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3258   [(set (match_operand:GPI 0 "register_operand" "=r")
3259         (sign_extend:GPI
3260           (vec_select:<VDQQH:VEL>
3261             (match_operand:VDQQH 1 "register_operand" "w")
3262             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3263   "TARGET_SIMD"
3264   {
3265     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3266                                            INTVAL (operands[2]));
3267     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3268   }
3269   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3272 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3273   [(set (match_operand:GPI 0 "register_operand" "=r")
3274         (zero_extend:GPI
3275           (vec_select:<VDQQH:VEL>
3276             (match_operand:VDQQH 1 "register_operand" "w")
3277             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3278   "TARGET_SIMD"
3279   {
3280     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3281                                            INTVAL (operands[2]));
3282     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3283   }
3284   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3287 ;; Lane extraction of a value, neither sign nor zero extension
3288 ;; is guaranteed so upper bits should be considered undefined.
3289 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3290 (define_insn "aarch64_get_lane<mode>"
3291   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3292         (vec_select:<VEL>
3293           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3294           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3295   "TARGET_SIMD"
3296   {
3297     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3298     switch (which_alternative)
3299       {
3300         case 0:
3301           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3302         case 1:
3303           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3304         case 2:
3305           return "st1\\t{%1.<Vetype>}[%2], %0";
3306         default:
3307           gcc_unreachable ();
3308       }
3309   }
3310   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3313 (define_insn "load_pair_lanes<mode>"
3314   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3315         (vec_concat:<VDBL>
3316            (match_operand:VDC 1 "memory_operand" "Utq")
3317            (match_operand:VDC 2 "memory_operand" "m")))]
3318   "TARGET_SIMD && !STRICT_ALIGNMENT
3319    && rtx_equal_p (XEXP (operands[2], 0),
3320                    plus_constant (Pmode,
3321                                   XEXP (operands[1], 0),
3322                                   GET_MODE_SIZE (<MODE>mode)))"
3323   "ldr\\t%q0, %1"
3324   [(set_attr "type" "neon_load1_1reg_q")]
3327 (define_insn "store_pair_lanes<mode>"
3328   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3329         (vec_concat:<VDBL>
3330            (match_operand:VDC 1 "register_operand" "w, r")
3331            (match_operand:VDC 2 "register_operand" "w, r")))]
3332   "TARGET_SIMD"
3333   "@
3334    stp\\t%d1, %d2, %y0
3335    stp\\t%x1, %x2, %y0"
3336   [(set_attr "type" "neon_stp, store_16")]
3339 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3340 ;; dest vector.
3342 (define_insn "@aarch64_combinez<mode>"
3343   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3344         (vec_concat:<VDBL>
3345           (match_operand:VDC 1 "general_operand" "w,?r,m")
3346           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3347   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3348   "@
3349    mov\\t%0.8b, %1.8b
3350    fmov\t%d0, %1
3351    ldr\\t%d0, %1"
3352   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3353    (set_attr "arch" "simd,fp,simd")]
3356 (define_insn "@aarch64_combinez_be<mode>"
3357   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3358         (vec_concat:<VDBL>
3359           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3360           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3361   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3362   "@
3363    mov\\t%0.8b, %1.8b
3364    fmov\t%d0, %1
3365    ldr\\t%d0, %1"
3366   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3367    (set_attr "arch" "simd,fp,simd")]
3370 (define_expand "aarch64_combine<mode>"
3371   [(match_operand:<VDBL> 0 "register_operand")
3372    (match_operand:VDC 1 "register_operand")
3373    (match_operand:VDC 2 "register_operand")]
3374   "TARGET_SIMD"
3376   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3378   DONE;
3382 (define_expand "@aarch64_simd_combine<mode>"
3383   [(match_operand:<VDBL> 0 "register_operand")
3384    (match_operand:VDC 1 "register_operand")
3385    (match_operand:VDC 2 "register_operand")]
3386   "TARGET_SIMD"
3387   {
3388     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3389     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3390     DONE;
3391   }
3392 [(set_attr "type" "multiple")]
3395 ;; <su><addsub>l<q>.
3397 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3398  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3399        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3400                            (match_operand:VQW 1 "register_operand" "w")
3401                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3402                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3403                            (match_operand:VQW 2 "register_operand" "w")
3404                            (match_dup 3)))))]
3405   "TARGET_SIMD"
3406   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3407   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3410 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3411  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3412        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3413                            (match_operand:VQW 1 "register_operand" "w")
3414                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3415                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3416                            (match_operand:VQW 2 "register_operand" "w")
3417                            (match_dup 3)))))]
3418   "TARGET_SIMD"
3419   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3420   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3424 (define_expand "aarch64_saddl2<mode>"
3425   [(match_operand:<VWIDE> 0 "register_operand")
3426    (match_operand:VQW 1 "register_operand")
3427    (match_operand:VQW 2 "register_operand")]
3428   "TARGET_SIMD"
3430   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3431   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3432                                                   operands[2], p));
3433   DONE;
3436 (define_expand "aarch64_uaddl2<mode>"
3437   [(match_operand:<VWIDE> 0 "register_operand")
3438    (match_operand:VQW 1 "register_operand")
3439    (match_operand:VQW 2 "register_operand")]
3440   "TARGET_SIMD"
3442   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3443   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3444                                                   operands[2], p));
3445   DONE;
3448 (define_expand "aarch64_ssubl2<mode>"
3449   [(match_operand:<VWIDE> 0 "register_operand")
3450    (match_operand:VQW 1 "register_operand")
3451    (match_operand:VQW 2 "register_operand")]
3452   "TARGET_SIMD"
3454   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3455   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3456                                                 operands[2], p));
3457   DONE;
3460 (define_expand "aarch64_usubl2<mode>"
3461   [(match_operand:<VWIDE> 0 "register_operand")
3462    (match_operand:VQW 1 "register_operand")
3463    (match_operand:VQW 2 "register_operand")]
3464   "TARGET_SIMD"
3466   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3467   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3468                                                 operands[2], p));
3469   DONE;
3472 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3473  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3474        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3475                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3476                        (ANY_EXTEND:<VWIDE>
3477                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3478   "TARGET_SIMD"
3479   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3480   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3483 ;; <su><addsub>w<q>.
3485 (define_expand "widen_ssum<mode>3"
3486   [(set (match_operand:<VDBLW> 0 "register_operand")
3487         (plus:<VDBLW> (sign_extend:<VDBLW> 
3488                         (match_operand:VQW 1 "register_operand"))
3489                       (match_operand:<VDBLW> 2 "register_operand")))]
3490   "TARGET_SIMD"
3491   {
3492     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3493     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3495     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3496                                                 operands[1], p));
3497     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3498     DONE;
3499   }
3502 (define_expand "widen_ssum<mode>3"
3503   [(set (match_operand:<VWIDE> 0 "register_operand")
3504         (plus:<VWIDE> (sign_extend:<VWIDE>
3505                         (match_operand:VD_BHSI 1 "register_operand"))
3506                       (match_operand:<VWIDE> 2 "register_operand")))]
3507   "TARGET_SIMD"
3509   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3510   DONE;
3513 (define_expand "widen_usum<mode>3"
3514   [(set (match_operand:<VDBLW> 0 "register_operand")
3515         (plus:<VDBLW> (zero_extend:<VDBLW> 
3516                         (match_operand:VQW 1 "register_operand"))
3517                       (match_operand:<VDBLW> 2 "register_operand")))]
3518   "TARGET_SIMD"
3519   {
3520     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3521     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3523     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3524                                                  operands[1], p));
3525     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3526     DONE;
3527   }
3530 (define_expand "widen_usum<mode>3"
3531   [(set (match_operand:<VWIDE> 0 "register_operand")
3532         (plus:<VWIDE> (zero_extend:<VWIDE>
3533                         (match_operand:VD_BHSI 1 "register_operand"))
3534                       (match_operand:<VWIDE> 2 "register_operand")))]
3535   "TARGET_SIMD"
3537   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3538   DONE;
3541 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3542   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3543         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3544           (ANY_EXTEND:<VWIDE>
3545             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3546   "TARGET_SIMD"
3547   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3548   [(set_attr "type" "neon_sub_widen")]
3551 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3552   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3553         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3554           (ANY_EXTEND:<VWIDE>
3555             (vec_select:<VHALF>
3556               (match_operand:VQW 2 "register_operand" "w")
3557               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3558   "TARGET_SIMD"
3559   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3560   [(set_attr "type" "neon_sub_widen")]
3563 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3564   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3565         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3566           (ANY_EXTEND:<VWIDE>
3567             (vec_select:<VHALF>
3568               (match_operand:VQW 2 "register_operand" "w")
3569               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3570   "TARGET_SIMD"
3571   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3572   [(set_attr "type" "neon_sub_widen")]
3575 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3576   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3577         (plus:<VWIDE>
3578           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3579           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3580   "TARGET_SIMD"
3581   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3582   [(set_attr "type" "neon_add_widen")]
3585 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3586   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3587         (plus:<VWIDE>
3588           (ANY_EXTEND:<VWIDE>
3589             (vec_select:<VHALF>
3590               (match_operand:VQW 2 "register_operand" "w")
3591               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3592           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3593   "TARGET_SIMD"
3594   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3595   [(set_attr "type" "neon_add_widen")]
3598 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3599   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3600         (plus:<VWIDE>
3601           (ANY_EXTEND:<VWIDE>
3602             (vec_select:<VHALF>
3603               (match_operand:VQW 2 "register_operand" "w")
3604               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3605           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3606   "TARGET_SIMD"
3607   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3608   [(set_attr "type" "neon_add_widen")]
3611 (define_expand "aarch64_saddw2<mode>"
3612   [(match_operand:<VWIDE> 0 "register_operand")
3613    (match_operand:<VWIDE> 1 "register_operand")
3614    (match_operand:VQW 2 "register_operand")]
3615   "TARGET_SIMD"
3617   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3618   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3619                                                 operands[2], p));
3620   DONE;
3623 (define_expand "aarch64_uaddw2<mode>"
3624   [(match_operand:<VWIDE> 0 "register_operand")
3625    (match_operand:<VWIDE> 1 "register_operand")
3626    (match_operand:VQW 2 "register_operand")]
3627   "TARGET_SIMD"
3629   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3630   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3631                                                 operands[2], p));
3632   DONE;
3636 (define_expand "aarch64_ssubw2<mode>"
3637   [(match_operand:<VWIDE> 0 "register_operand")
3638    (match_operand:<VWIDE> 1 "register_operand")
3639    (match_operand:VQW 2 "register_operand")]
3640   "TARGET_SIMD"
3642   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3643   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3644                                                 operands[2], p));
3645   DONE;
3648 (define_expand "aarch64_usubw2<mode>"
3649   [(match_operand:<VWIDE> 0 "register_operand")
3650    (match_operand:<VWIDE> 1 "register_operand")
3651    (match_operand:VQW 2 "register_operand")]
3652   "TARGET_SIMD"
3654   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3655   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3656                                                 operands[2], p));
3657   DONE;
3660 ;; <su><r>h<addsub>.
3662 (define_expand "<u>avg<mode>3_floor"
3663   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3664         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3665                           (match_operand:VDQ_BHSI 2 "register_operand")]
3666                          HADD))]
3667   "TARGET_SIMD"
3670 (define_expand "<u>avg<mode>3_ceil"
3671   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3672         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3673                           (match_operand:VDQ_BHSI 2 "register_operand")]
3674                          RHADD))]
3675   "TARGET_SIMD"
3678 (define_insn "aarch64_<sur>h<addsub><mode>"
3679   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3680         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3681                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3682                      HADDSUB))]
3683   "TARGET_SIMD"
3684   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3685   [(set_attr "type" "neon_<addsub>_halve<q>")]
3688 ;; <r><addsub>hn<q>.
3690 (define_insn "aarch64_<sur><addsub>hn<mode>"
3691   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3692         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3693                             (match_operand:VQN 2 "register_operand" "w")]
3694                            ADDSUBHN))]
3695   "TARGET_SIMD"
3696   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3697   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3700 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3701   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3702         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3703                              (match_operand:VQN 2 "register_operand" "w")
3704                              (match_operand:VQN 3 "register_operand" "w")]
3705                             ADDSUBHN2))]
3706   "TARGET_SIMD"
3707   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3708   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3711 ;; pmul.
3713 (define_insn "aarch64_pmul<mode>"
3714   [(set (match_operand:VB 0 "register_operand" "=w")
3715         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3716                     (match_operand:VB 2 "register_operand" "w")]
3717                    UNSPEC_PMUL))]
3718  "TARGET_SIMD"
3719  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3720   [(set_attr "type" "neon_mul_<Vetype><q>")]
3723 ;; fmulx.
3725 (define_insn "aarch64_fmulx<mode>"
3726   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3727         (unspec:VHSDF_HSDF
3728           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3729            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3730            UNSPEC_FMULX))]
3731  "TARGET_SIMD"
3732  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3733  [(set_attr "type" "neon_fp_mul_<stype>")]
3736 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3738 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3739   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3740         (unspec:VDQSF
3741          [(match_operand:VDQSF 1 "register_operand" "w")
3742           (vec_duplicate:VDQSF
3743            (vec_select:<VEL>
3744             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3745             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3746          UNSPEC_FMULX))]
3747   "TARGET_SIMD"
3748   {
3749     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3750     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3751   }
3752   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3755 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3757 (define_insn "*aarch64_mulx_elt<mode>"
3758   [(set (match_operand:VDQF 0 "register_operand" "=w")
3759         (unspec:VDQF
3760          [(match_operand:VDQF 1 "register_operand" "w")
3761           (vec_duplicate:VDQF
3762            (vec_select:<VEL>
3763             (match_operand:VDQF 2 "register_operand" "w")
3764             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3765          UNSPEC_FMULX))]
3766   "TARGET_SIMD"
3767   {
3768     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3769     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3770   }
3771   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3774 ;; vmulxq_lane
3776 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3777   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3778         (unspec:VHSDF
3779          [(match_operand:VHSDF 1 "register_operand" "w")
3780           (vec_duplicate:VHSDF
3781             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3782          UNSPEC_FMULX))]
3783   "TARGET_SIMD"
3784   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3785   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3788 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3789 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3790 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3792 (define_insn "*aarch64_vgetfmulx<mode>"
3793   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3794         (unspec:<VEL>
3795          [(match_operand:<VEL> 1 "register_operand" "w")
3796           (vec_select:<VEL>
3797            (match_operand:VDQF 2 "register_operand" "w")
3798             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3799          UNSPEC_FMULX))]
3800   "TARGET_SIMD"
3801   {
3802     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3803     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3804   }
3805   [(set_attr "type" "fmul<Vetype>")]
3807 ;; <su>q<addsub>
3809 (define_insn "aarch64_<su_optab>q<addsub><mode>"
3810   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3811         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3812                         (match_operand:VSDQ_I 2 "register_operand" "w")))]
3813   "TARGET_SIMD"
3814   "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3815   [(set_attr "type" "neon_q<addsub><q>")]
3818 ;; suqadd and usqadd
3820 (define_insn "aarch64_<sur>qadd<mode>"
3821   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3822         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3823                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3824                        USSUQADD))]
3825   "TARGET_SIMD"
3826   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3827   [(set_attr "type" "neon_qadd<q>")]
3830 ;; sqmovun
3832 (define_insn "aarch64_sqmovun<mode>"
3833   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3834         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3835                             UNSPEC_SQXTUN))]
3836    "TARGET_SIMD"
3837    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3838    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3841 ;; sqmovn and uqmovn
3843 (define_insn "aarch64_<sur>qmovn<mode>"
3844   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3845         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3846                             SUQMOVN))]
3847   "TARGET_SIMD"
3848   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3849    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3852 ;; <su>q<absneg>
3854 (define_insn "aarch64_s<optab><mode>"
3855   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3856         (UNQOPS:VSDQ_I
3857           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3858   "TARGET_SIMD"
3859   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3860   [(set_attr "type" "neon_<optab><q>")]
3863 ;; sq<r>dmulh.
3865 (define_insn "aarch64_sq<r>dmulh<mode>"
3866   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3867         (unspec:VSDQ_HSI
3868           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3869            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3870          VQDMULH))]
3871   "TARGET_SIMD"
3872   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3873   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3876 ;; sq<r>dmulh_lane
3878 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3879   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3880         (unspec:VDQHS
3881           [(match_operand:VDQHS 1 "register_operand" "w")
3882            (vec_select:<VEL>
3883              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3884              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3885          VQDMULH))]
3886   "TARGET_SIMD"
3887   "*
3888    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3889    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3890   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3893 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3894   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3895         (unspec:VDQHS
3896           [(match_operand:VDQHS 1 "register_operand" "w")
3897            (vec_select:<VEL>
3898              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3899              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3900          VQDMULH))]
3901   "TARGET_SIMD"
3902   "*
3903    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3904    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3905   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3908 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3909   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3910         (unspec:SD_HSI
3911           [(match_operand:SD_HSI 1 "register_operand" "w")
3912            (vec_select:<VEL>
3913              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3914              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3915          VQDMULH))]
3916   "TARGET_SIMD"
3917   "*
3918    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3919    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3920   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3923 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3924   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3925         (unspec:SD_HSI
3926           [(match_operand:SD_HSI 1 "register_operand" "w")
3927            (vec_select:<VEL>
3928              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3929              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3930          VQDMULH))]
3931   "TARGET_SIMD"
3932   "*
3933    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3934    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3935   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3938 ;; sqrdml[as]h.
3940 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3941   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3942         (unspec:VSDQ_HSI
3943           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3944            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3945            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3946           SQRDMLH_AS))]
3947    "TARGET_SIMD_RDMA"
3948    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3949    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3952 ;; sqrdml[as]h_lane.
3954 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3955   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3956         (unspec:VDQHS
3957           [(match_operand:VDQHS 1 "register_operand" "0")
3958            (match_operand:VDQHS 2 "register_operand" "w")
3959            (vec_select:<VEL>
3960              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3961              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3962           SQRDMLH_AS))]
3963    "TARGET_SIMD_RDMA"
3964    {
3965      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3966      return
3967       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3968    }
3969    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3972 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3973   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3974         (unspec:SD_HSI
3975           [(match_operand:SD_HSI 1 "register_operand" "0")
3976            (match_operand:SD_HSI 2 "register_operand" "w")
3977            (vec_select:<VEL>
3978              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3979              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3980           SQRDMLH_AS))]
3981    "TARGET_SIMD_RDMA"
3982    {
3983      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3984      return
3985       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3986    }
3987    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3990 ;; sqrdml[as]h_laneq.
3992 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3993   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3994         (unspec:VDQHS
3995           [(match_operand:VDQHS 1 "register_operand" "0")
3996            (match_operand:VDQHS 2 "register_operand" "w")
3997            (vec_select:<VEL>
3998              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3999              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4000           SQRDMLH_AS))]
4001    "TARGET_SIMD_RDMA"
4002    {
4003      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4004      return
4005       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4006    }
4007    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4010 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4011   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4012         (unspec:SD_HSI
4013           [(match_operand:SD_HSI 1 "register_operand" "0")
4014            (match_operand:SD_HSI 2 "register_operand" "w")
4015            (vec_select:<VEL>
4016              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4017              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4018           SQRDMLH_AS))]
4019    "TARGET_SIMD_RDMA"
4020    {
4021      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4022      return
4023       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
4024    }
4025    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4028 ;; vqdml[sa]l
4030 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
4031   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4032         (SBINQOPS:<VWIDE>
4033           (match_operand:<VWIDE> 1 "register_operand" "0")
4034           (ss_ashift:<VWIDE>
4035               (mult:<VWIDE>
4036                 (sign_extend:<VWIDE>
4037                       (match_operand:VSD_HSI 2 "register_operand" "w"))
4038                 (sign_extend:<VWIDE>
4039                       (match_operand:VSD_HSI 3 "register_operand" "w")))
4040               (const_int 1))))]
4041   "TARGET_SIMD"
4042   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4043   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4046 ;; vqdml[sa]l_lane
4048 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
4049   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4050         (SBINQOPS:<VWIDE>
4051           (match_operand:<VWIDE> 1 "register_operand" "0")
4052           (ss_ashift:<VWIDE>
4053             (mult:<VWIDE>
4054               (sign_extend:<VWIDE>
4055                 (match_operand:VD_HSI 2 "register_operand" "w"))
4056               (sign_extend:<VWIDE>
4057                 (vec_duplicate:VD_HSI
4058                   (vec_select:<VEL>
4059                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4060                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4061               ))
4062             (const_int 1))))]
4063   "TARGET_SIMD"
4064   {
4065     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4066     return
4067       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4068   }
4069   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4072 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4073   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4074         (SBINQOPS:<VWIDE>
4075           (match_operand:<VWIDE> 1 "register_operand" "0")
4076           (ss_ashift:<VWIDE>
4077             (mult:<VWIDE>
4078               (sign_extend:<VWIDE>
4079                 (match_operand:VD_HSI 2 "register_operand" "w"))
4080               (sign_extend:<VWIDE>
4081                 (vec_duplicate:VD_HSI
4082                   (vec_select:<VEL>
4083                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4084                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4085               ))
4086             (const_int 1))))]
4087   "TARGET_SIMD"
4088   {
4089     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4090     return
4091       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4092   }
4093   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4096 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
4097   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4098         (SBINQOPS:<VWIDE>
4099           (match_operand:<VWIDE> 1 "register_operand" "0")
4100           (ss_ashift:<VWIDE>
4101             (mult:<VWIDE>
4102               (sign_extend:<VWIDE>
4103                 (match_operand:SD_HSI 2 "register_operand" "w"))
4104               (sign_extend:<VWIDE>
4105                 (vec_select:<VEL>
4106                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4107                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4108               )
4109             (const_int 1))))]
4110   "TARGET_SIMD"
4111   {
4112     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4113     return
4114       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4115   }
4116   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4119 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4120   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4121         (SBINQOPS:<VWIDE>
4122           (match_operand:<VWIDE> 1 "register_operand" "0")
4123           (ss_ashift:<VWIDE>
4124             (mult:<VWIDE>
4125               (sign_extend:<VWIDE>
4126                 (match_operand:SD_HSI 2 "register_operand" "w"))
4127               (sign_extend:<VWIDE>
4128                 (vec_select:<VEL>
4129                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4130                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4131               )
4132             (const_int 1))))]
4133   "TARGET_SIMD"
4134   {
4135     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4136     return
4137       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4138   }
4139   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4142 ;; vqdml[sa]l_n
4144 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4145   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4146         (SBINQOPS:<VWIDE>
4147           (match_operand:<VWIDE> 1 "register_operand" "0")
4148           (ss_ashift:<VWIDE>
4149               (mult:<VWIDE>
4150                 (sign_extend:<VWIDE>
4151                       (match_operand:VD_HSI 2 "register_operand" "w"))
4152                 (sign_extend:<VWIDE>
4153                   (vec_duplicate:VD_HSI
4154                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4155               (const_int 1))))]
4156   "TARGET_SIMD"
4157   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4158   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4161 ;; sqdml[as]l2
4163 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4164   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4165         (SBINQOPS:<VWIDE>
4166          (match_operand:<VWIDE> 1 "register_operand" "0")
4167          (ss_ashift:<VWIDE>
4168              (mult:<VWIDE>
4169                (sign_extend:<VWIDE>
4170                  (vec_select:<VHALF>
4171                      (match_operand:VQ_HSI 2 "register_operand" "w")
4172                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4173                (sign_extend:<VWIDE>
4174                  (vec_select:<VHALF>
4175                      (match_operand:VQ_HSI 3 "register_operand" "w")
4176                      (match_dup 4))))
4177              (const_int 1))))]
4178   "TARGET_SIMD"
4179   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4180   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4183 (define_expand "aarch64_sqdmlal2<mode>"
4184   [(match_operand:<VWIDE> 0 "register_operand")
4185    (match_operand:<VWIDE> 1 "register_operand")
4186    (match_operand:VQ_HSI 2 "register_operand")
4187    (match_operand:VQ_HSI 3 "register_operand")]
4188   "TARGET_SIMD"
4190   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4191   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4192                                                   operands[2], operands[3], p));
4193   DONE;
4196 (define_expand "aarch64_sqdmlsl2<mode>"
4197   [(match_operand:<VWIDE> 0 "register_operand")
4198    (match_operand:<VWIDE> 1 "register_operand")
4199    (match_operand:VQ_HSI 2 "register_operand")
4200    (match_operand:VQ_HSI 3 "register_operand")]
4201   "TARGET_SIMD"
4203   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4204   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4205                                                   operands[2], operands[3], p));
4206   DONE;
4209 ;; vqdml[sa]l2_lane
4211 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4212   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4213         (SBINQOPS:<VWIDE>
4214           (match_operand:<VWIDE> 1 "register_operand" "0")
4215           (ss_ashift:<VWIDE>
4216               (mult:<VWIDE>
4217                 (sign_extend:<VWIDE>
4218                   (vec_select:<VHALF>
4219                     (match_operand:VQ_HSI 2 "register_operand" "w")
4220                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4221                 (sign_extend:<VWIDE>
4222                   (vec_duplicate:<VHALF>
4223                     (vec_select:<VEL>
4224                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4225                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4226                     ))))
4227               (const_int 1))))]
4228   "TARGET_SIMD"
4229   {
4230     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4231     return
4232      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4233   }
4234   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4237 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4238   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4239         (SBINQOPS:<VWIDE>
4240           (match_operand:<VWIDE> 1 "register_operand" "0")
4241           (ss_ashift:<VWIDE>
4242               (mult:<VWIDE>
4243                 (sign_extend:<VWIDE>
4244                   (vec_select:<VHALF>
4245                     (match_operand:VQ_HSI 2 "register_operand" "w")
4246                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4247                 (sign_extend:<VWIDE>
4248                   (vec_duplicate:<VHALF>
4249                     (vec_select:<VEL>
4250                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4251                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4252                     ))))
4253               (const_int 1))))]
4254   "TARGET_SIMD"
4255   {
4256     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4257     return
4258      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4259   }
4260   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4263 (define_expand "aarch64_sqdmlal2_lane<mode>"
4264   [(match_operand:<VWIDE> 0 "register_operand")
4265    (match_operand:<VWIDE> 1 "register_operand")
4266    (match_operand:VQ_HSI 2 "register_operand")
4267    (match_operand:<VCOND> 3 "register_operand")
4268    (match_operand:SI 4 "immediate_operand")]
4269   "TARGET_SIMD"
4271   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4272   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4273                                                        operands[2], operands[3],
4274                                                        operands[4], p));
4275   DONE;
4278 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4279   [(match_operand:<VWIDE> 0 "register_operand")
4280    (match_operand:<VWIDE> 1 "register_operand")
4281    (match_operand:VQ_HSI 2 "register_operand")
4282    (match_operand:<VCONQ> 3 "register_operand")
4283    (match_operand:SI 4 "immediate_operand")]
4284   "TARGET_SIMD"
4286   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4287   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4288                                                        operands[2], operands[3],
4289                                                        operands[4], p));
4290   DONE;
4293 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4294   [(match_operand:<VWIDE> 0 "register_operand")
4295    (match_operand:<VWIDE> 1 "register_operand")
4296    (match_operand:VQ_HSI 2 "register_operand")
4297    (match_operand:<VCOND> 3 "register_operand")
4298    (match_operand:SI 4 "immediate_operand")]
4299   "TARGET_SIMD"
4301   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4302   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4303                                                        operands[2], operands[3],
4304                                                        operands[4], p));
4305   DONE;
4308 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4309   [(match_operand:<VWIDE> 0 "register_operand")
4310    (match_operand:<VWIDE> 1 "register_operand")
4311    (match_operand:VQ_HSI 2 "register_operand")
4312    (match_operand:<VCONQ> 3 "register_operand")
4313    (match_operand:SI 4 "immediate_operand")]
4314   "TARGET_SIMD"
4316   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4317   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4318                                                        operands[2], operands[3],
4319                                                        operands[4], p));
4320   DONE;
4323 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4324   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4325         (SBINQOPS:<VWIDE>
4326           (match_operand:<VWIDE> 1 "register_operand" "0")
4327           (ss_ashift:<VWIDE>
4328             (mult:<VWIDE>
4329               (sign_extend:<VWIDE>
4330                 (vec_select:<VHALF>
4331                   (match_operand:VQ_HSI 2 "register_operand" "w")
4332                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4333               (sign_extend:<VWIDE>
4334                 (vec_duplicate:<VHALF>
4335                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4336             (const_int 1))))]
4337   "TARGET_SIMD"
4338   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4339   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4342 (define_expand "aarch64_sqdmlal2_n<mode>"
4343   [(match_operand:<VWIDE> 0 "register_operand")
4344    (match_operand:<VWIDE> 1 "register_operand")
4345    (match_operand:VQ_HSI 2 "register_operand")
4346    (match_operand:<VEL> 3 "register_operand")]
4347   "TARGET_SIMD"
4349   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4350   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4351                                                     operands[2], operands[3],
4352                                                     p));
4353   DONE;
4356 (define_expand "aarch64_sqdmlsl2_n<mode>"
4357   [(match_operand:<VWIDE> 0 "register_operand")
4358    (match_operand:<VWIDE> 1 "register_operand")
4359    (match_operand:VQ_HSI 2 "register_operand")
4360    (match_operand:<VEL> 3 "register_operand")]
4361   "TARGET_SIMD"
4363   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4364   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4365                                                     operands[2], operands[3],
4366                                                     p));
4367   DONE;
4370 ;; vqdmull
4372 (define_insn "aarch64_sqdmull<mode>"
4373   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4374         (ss_ashift:<VWIDE>
4375              (mult:<VWIDE>
4376                (sign_extend:<VWIDE>
4377                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4378                (sign_extend:<VWIDE>
4379                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4380              (const_int 1)))]
4381   "TARGET_SIMD"
4382   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4383   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4386 ;; vqdmull_lane
4388 (define_insn "aarch64_sqdmull_lane<mode>"
4389   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4390         (ss_ashift:<VWIDE>
4391              (mult:<VWIDE>
4392                (sign_extend:<VWIDE>
4393                  (match_operand:VD_HSI 1 "register_operand" "w"))
4394                (sign_extend:<VWIDE>
4395                  (vec_duplicate:VD_HSI
4396                    (vec_select:<VEL>
4397                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4398                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4399                ))
4400              (const_int 1)))]
4401   "TARGET_SIMD"
4402   {
4403     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4404     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4405   }
4406   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4409 (define_insn "aarch64_sqdmull_laneq<mode>"
4410   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4411         (ss_ashift:<VWIDE>
4412              (mult:<VWIDE>
4413                (sign_extend:<VWIDE>
4414                  (match_operand:VD_HSI 1 "register_operand" "w"))
4415                (sign_extend:<VWIDE>
4416                  (vec_duplicate:VD_HSI
4417                    (vec_select:<VEL>
4418                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4419                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4420                ))
4421              (const_int 1)))]
4422   "TARGET_SIMD"
4423   {
4424     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4425     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4426   }
4427   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4430 (define_insn "aarch64_sqdmull_lane<mode>"
4431   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4432         (ss_ashift:<VWIDE>
4433              (mult:<VWIDE>
4434                (sign_extend:<VWIDE>
4435                  (match_operand:SD_HSI 1 "register_operand" "w"))
4436                (sign_extend:<VWIDE>
4437                  (vec_select:<VEL>
4438                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4439                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4440                ))
4441              (const_int 1)))]
4442   "TARGET_SIMD"
4443   {
4444     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4445     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4446   }
4447   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4450 (define_insn "aarch64_sqdmull_laneq<mode>"
4451   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4452         (ss_ashift:<VWIDE>
4453              (mult:<VWIDE>
4454                (sign_extend:<VWIDE>
4455                  (match_operand:SD_HSI 1 "register_operand" "w"))
4456                (sign_extend:<VWIDE>
4457                  (vec_select:<VEL>
4458                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4459                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4460                ))
4461              (const_int 1)))]
4462   "TARGET_SIMD"
4463   {
4464     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4465     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4466   }
4467   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4470 ;; vqdmull_n
4472 (define_insn "aarch64_sqdmull_n<mode>"
4473   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4474         (ss_ashift:<VWIDE>
4475              (mult:<VWIDE>
4476                (sign_extend:<VWIDE>
4477                  (match_operand:VD_HSI 1 "register_operand" "w"))
4478                (sign_extend:<VWIDE>
4479                  (vec_duplicate:VD_HSI
4480                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4481                )
4482              (const_int 1)))]
4483   "TARGET_SIMD"
4484   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4485   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4488 ;; vqdmull2
4492 (define_insn "aarch64_sqdmull2<mode>_internal"
4493   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4494         (ss_ashift:<VWIDE>
4495              (mult:<VWIDE>
4496                (sign_extend:<VWIDE>
4497                  (vec_select:<VHALF>
4498                    (match_operand:VQ_HSI 1 "register_operand" "w")
4499                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4500                (sign_extend:<VWIDE>
4501                  (vec_select:<VHALF>
4502                    (match_operand:VQ_HSI 2 "register_operand" "w")
4503                    (match_dup 3)))
4504                )
4505              (const_int 1)))]
4506   "TARGET_SIMD"
4507   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4508   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4511 (define_expand "aarch64_sqdmull2<mode>"
4512   [(match_operand:<VWIDE> 0 "register_operand")
4513    (match_operand:VQ_HSI 1 "register_operand")
4514    (match_operand:VQ_HSI 2 "register_operand")]
4515   "TARGET_SIMD"
4517   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4518   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4519                                                   operands[2], p));
4520   DONE;
4523 ;; vqdmull2_lane
4525 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4526   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4527         (ss_ashift:<VWIDE>
4528              (mult:<VWIDE>
4529                (sign_extend:<VWIDE>
4530                  (vec_select:<VHALF>
4531                    (match_operand:VQ_HSI 1 "register_operand" "w")
4532                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4533                (sign_extend:<VWIDE>
4534                  (vec_duplicate:<VHALF>
4535                    (vec_select:<VEL>
4536                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4537                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4538                ))
4539              (const_int 1)))]
4540   "TARGET_SIMD"
4541   {
4542     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4543     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4544   }
4545   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4548 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4549   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4550         (ss_ashift:<VWIDE>
4551              (mult:<VWIDE>
4552                (sign_extend:<VWIDE>
4553                  (vec_select:<VHALF>
4554                    (match_operand:VQ_HSI 1 "register_operand" "w")
4555                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4556                (sign_extend:<VWIDE>
4557                  (vec_duplicate:<VHALF>
4558                    (vec_select:<VEL>
4559                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4560                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4561                ))
4562              (const_int 1)))]
4563   "TARGET_SIMD"
4564   {
4565     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4566     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4567   }
4568   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4571 (define_expand "aarch64_sqdmull2_lane<mode>"
4572   [(match_operand:<VWIDE> 0 "register_operand")
4573    (match_operand:VQ_HSI 1 "register_operand")
4574    (match_operand:<VCOND> 2 "register_operand")
4575    (match_operand:SI 3 "immediate_operand")]
4576   "TARGET_SIMD"
4578   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4579   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4580                                                        operands[2], operands[3],
4581                                                        p));
4582   DONE;
4585 (define_expand "aarch64_sqdmull2_laneq<mode>"
4586   [(match_operand:<VWIDE> 0 "register_operand")
4587    (match_operand:VQ_HSI 1 "register_operand")
4588    (match_operand:<VCONQ> 2 "register_operand")
4589    (match_operand:SI 3 "immediate_operand")]
4590   "TARGET_SIMD"
4592   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4593   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4594                                                        operands[2], operands[3],
4595                                                        p));
4596   DONE;
4599 ;; vqdmull2_n
4601 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4602   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4603         (ss_ashift:<VWIDE>
4604              (mult:<VWIDE>
4605                (sign_extend:<VWIDE>
4606                  (vec_select:<VHALF>
4607                    (match_operand:VQ_HSI 1 "register_operand" "w")
4608                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4609                (sign_extend:<VWIDE>
4610                  (vec_duplicate:<VHALF>
4611                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4612                )
4613              (const_int 1)))]
4614   "TARGET_SIMD"
4615   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4616   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4619 (define_expand "aarch64_sqdmull2_n<mode>"
4620   [(match_operand:<VWIDE> 0 "register_operand")
4621    (match_operand:VQ_HSI 1 "register_operand")
4622    (match_operand:<VEL> 2 "register_operand")]
4623   "TARGET_SIMD"
4625   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4626   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4627                                                     operands[2], p));
4628   DONE;
4631 ;; vshl
4633 (define_insn "aarch64_<sur>shl<mode>"
4634   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4635         (unspec:VSDQ_I_DI
4636           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4637            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4638          VSHL))]
4639   "TARGET_SIMD"
4640   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4641   [(set_attr "type" "neon_shift_reg<q>")]
4645 ;; vqshl
4647 (define_insn "aarch64_<sur>q<r>shl<mode>"
4648   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4649         (unspec:VSDQ_I
4650           [(match_operand:VSDQ_I 1 "register_operand" "w")
4651            (match_operand:VSDQ_I 2 "register_operand" "w")]
4652          VQSHL))]
4653   "TARGET_SIMD"
4654   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4655   [(set_attr "type" "neon_sat_shift_reg<q>")]
4658 ;; vshll_n
4660 (define_insn "aarch64_<sur>shll_n<mode>"
4661   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4662         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4663                          (match_operand:SI 2
4664                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4665                          VSHLL))]
4666   "TARGET_SIMD"
4667   {
4668     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4669       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4670     else
4671       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4672   }
4673   [(set_attr "type" "neon_shift_imm_long")]
4676 ;; vshll_high_n
4678 (define_insn "aarch64_<sur>shll2_n<mode>"
4679   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4680         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4681                          (match_operand:SI 2 "immediate_operand" "i")]
4682                          VSHLL))]
4683   "TARGET_SIMD"
4684   {
4685     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4686       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4687     else
4688       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4689   }
4690   [(set_attr "type" "neon_shift_imm_long")]
4693 ;; vrshr_n
4695 (define_insn "aarch64_<sur>shr_n<mode>"
4696   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4697         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4698                            (match_operand:SI 2
4699                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4700                           VRSHR_N))]
4701   "TARGET_SIMD"
4702   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4703   [(set_attr "type" "neon_sat_shift_imm<q>")]
4706 ;; v(r)sra_n
4708 (define_insn "aarch64_<sur>sra_n<mode>"
4709   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4710         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4711                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4712                        (match_operand:SI 3
4713                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4714                       VSRA))]
4715   "TARGET_SIMD"
4716   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4717   [(set_attr "type" "neon_shift_acc<q>")]
4720 ;; vs<lr>i_n
4722 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4723   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4724         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4725                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4726                        (match_operand:SI 3
4727                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4728                       VSLRI))]
4729   "TARGET_SIMD"
4730   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4731   [(set_attr "type" "neon_shift_imm<q>")]
4734 ;; vqshl(u)
4736 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4737   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4738         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4739                        (match_operand:SI 2
4740                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4741                       VQSHL_N))]
4742   "TARGET_SIMD"
4743   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4744   [(set_attr "type" "neon_sat_shift_imm<q>")]
4748 ;; vq(r)shr(u)n_n
4750 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4751   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4752         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4753                             (match_operand:SI 2
4754                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4755                            VQSHRN_N))]
4756   "TARGET_SIMD"
4757   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4758   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4762 ;; cm(eq|ge|gt|lt|le)
4763 ;; Note, we have constraints for Dz and Z as different expanders
4764 ;; have different ideas of what should be passed to this pattern.
4766 (define_insn "aarch64_cm<optab><mode>"
4767   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4768         (neg:<V_INT_EQUIV>
4769           (COMPARISONS:<V_INT_EQUIV>
4770             (match_operand:VDQ_I 1 "register_operand" "w,w")
4771             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4772           )))]
4773   "TARGET_SIMD"
4774   "@
4775   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4776   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4777   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4780 (define_insn_and_split "aarch64_cm<optab>di"
4781   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4782         (neg:DI
4783           (COMPARISONS:DI
4784             (match_operand:DI 1 "register_operand" "w,w,r")
4785             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4786           )))
4787      (clobber (reg:CC CC_REGNUM))]
4788   "TARGET_SIMD"
4789   "#"
4790   "&& reload_completed"
4791   [(set (match_operand:DI 0 "register_operand")
4792         (neg:DI
4793           (COMPARISONS:DI
4794             (match_operand:DI 1 "register_operand")
4795             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4796           )))]
4797   {
4798     /* If we are in the general purpose register file,
4799        we split to a sequence of comparison and store.  */
4800     if (GP_REGNUM_P (REGNO (operands[0]))
4801         && GP_REGNUM_P (REGNO (operands[1])))
4802       {
4803         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4804         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4805         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4806         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4807         DONE;
4808       }
4809     /* Otherwise, we expand to a similar pattern which does not
4810        clobber CC_REGNUM.  */
4811   }
4812   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4815 (define_insn "*aarch64_cm<optab>di"
4816   [(set (match_operand:DI 0 "register_operand" "=w,w")
4817         (neg:DI
4818           (COMPARISONS:DI
4819             (match_operand:DI 1 "register_operand" "w,w")
4820             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4821           )))]
4822   "TARGET_SIMD && reload_completed"
4823   "@
4824   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4825   cm<optab>\t%d0, %d1, #0"
4826   [(set_attr "type" "neon_compare, neon_compare_zero")]
4829 ;; cm(hs|hi)
4831 (define_insn "aarch64_cm<optab><mode>"
4832   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4833         (neg:<V_INT_EQUIV>
4834           (UCOMPARISONS:<V_INT_EQUIV>
4835             (match_operand:VDQ_I 1 "register_operand" "w")
4836             (match_operand:VDQ_I 2 "register_operand" "w")
4837           )))]
4838   "TARGET_SIMD"
4839   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4840   [(set_attr "type" "neon_compare<q>")]
4843 (define_insn_and_split "aarch64_cm<optab>di"
4844   [(set (match_operand:DI 0 "register_operand" "=w,r")
4845         (neg:DI
4846           (UCOMPARISONS:DI
4847             (match_operand:DI 1 "register_operand" "w,r")
4848             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4849           )))
4850     (clobber (reg:CC CC_REGNUM))]
4851   "TARGET_SIMD"
4852   "#"
4853   "&& reload_completed"
4854   [(set (match_operand:DI 0 "register_operand")
4855         (neg:DI
4856           (UCOMPARISONS:DI
4857             (match_operand:DI 1 "register_operand")
4858             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4859           )))]
4860   {
4861     /* If we are in the general purpose register file,
4862        we split to a sequence of comparison and store.  */
4863     if (GP_REGNUM_P (REGNO (operands[0]))
4864         && GP_REGNUM_P (REGNO (operands[1])))
4865       {
4866         machine_mode mode = CCmode;
4867         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4868         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4869         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4870         DONE;
4871       }
4872     /* Otherwise, we expand to a similar pattern which does not
4873        clobber CC_REGNUM.  */
4874   }
4875   [(set_attr "type" "neon_compare,multiple")]
4878 (define_insn "*aarch64_cm<optab>di"
4879   [(set (match_operand:DI 0 "register_operand" "=w")
4880         (neg:DI
4881           (UCOMPARISONS:DI
4882             (match_operand:DI 1 "register_operand" "w")
4883             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4884           )))]
4885   "TARGET_SIMD && reload_completed"
4886   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4887   [(set_attr "type" "neon_compare")]
4890 ;; cmtst
4892 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4893 ;; we don't have any insns using ne, and aarch64_vcond outputs
4894 ;; not (neg (eq (and x y) 0))
4895 ;; which is rewritten by simplify_rtx as
4896 ;; plus (eq (and x y) 0) -1.
4898 (define_insn "aarch64_cmtst<mode>"
4899   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4900         (plus:<V_INT_EQUIV>
4901           (eq:<V_INT_EQUIV>
4902             (and:VDQ_I
4903               (match_operand:VDQ_I 1 "register_operand" "w")
4904               (match_operand:VDQ_I 2 "register_operand" "w"))
4905             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4906           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4907   ]
4908   "TARGET_SIMD"
4909   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4910   [(set_attr "type" "neon_tst<q>")]
4913 (define_insn_and_split "aarch64_cmtstdi"
4914   [(set (match_operand:DI 0 "register_operand" "=w,r")
4915         (neg:DI
4916           (ne:DI
4917             (and:DI
4918               (match_operand:DI 1 "register_operand" "w,r")
4919               (match_operand:DI 2 "register_operand" "w,r"))
4920             (const_int 0))))
4921     (clobber (reg:CC CC_REGNUM))]
4922   "TARGET_SIMD"
4923   "#"
4924   "&& reload_completed"
4925   [(set (match_operand:DI 0 "register_operand")
4926         (neg:DI
4927           (ne:DI
4928             (and:DI
4929               (match_operand:DI 1 "register_operand")
4930               (match_operand:DI 2 "register_operand"))
4931             (const_int 0))))]
4932   {
4933     /* If we are in the general purpose register file,
4934        we split to a sequence of comparison and store.  */
4935     if (GP_REGNUM_P (REGNO (operands[0]))
4936         && GP_REGNUM_P (REGNO (operands[1])))
4937       {
4938         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4939         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4940         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4941         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4942         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4943         DONE;
4944       }
4945     /* Otherwise, we expand to a similar pattern which does not
4946        clobber CC_REGNUM.  */
4947   }
4948   [(set_attr "type" "neon_tst,multiple")]
4951 (define_insn "*aarch64_cmtstdi"
4952   [(set (match_operand:DI 0 "register_operand" "=w")
4953         (neg:DI
4954           (ne:DI
4955             (and:DI
4956               (match_operand:DI 1 "register_operand" "w")
4957               (match_operand:DI 2 "register_operand" "w"))
4958             (const_int 0))))]
4959   "TARGET_SIMD"
4960   "cmtst\t%d0, %d1, %d2"
4961   [(set_attr "type" "neon_tst")]
4964 ;; fcm(eq|ge|gt|le|lt)
4966 (define_insn "aarch64_cm<optab><mode>"
4967   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4968         (neg:<V_INT_EQUIV>
4969           (COMPARISONS:<V_INT_EQUIV>
4970             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4971             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4972           )))]
4973   "TARGET_SIMD"
4974   "@
4975   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4976   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4977   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4980 ;; fac(ge|gt)
4981 ;; Note we can also handle what would be fac(le|lt) by
4982 ;; generating fac(ge|gt).
4984 (define_insn "aarch64_fac<optab><mode>"
4985   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4986         (neg:<V_INT_EQUIV>
4987           (FAC_COMPARISONS:<V_INT_EQUIV>
4988             (abs:VHSDF_HSDF
4989               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4990             (abs:VHSDF_HSDF
4991               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4992   )))]
4993   "TARGET_SIMD"
4994   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4995   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4998 ;; addp
5000 (define_insn "aarch64_addp<mode>"
5001   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
5002         (unspec:VD_BHSI
5003           [(match_operand:VD_BHSI 1 "register_operand" "w")
5004            (match_operand:VD_BHSI 2 "register_operand" "w")]
5005           UNSPEC_ADDP))]
5006   "TARGET_SIMD"
5007   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5008   [(set_attr "type" "neon_reduc_add<q>")]
5011 (define_insn "aarch64_addpdi"
5012   [(set (match_operand:DI 0 "register_operand" "=w")
5013         (unspec:DI
5014           [(match_operand:V2DI 1 "register_operand" "w")]
5015           UNSPEC_ADDP))]
5016   "TARGET_SIMD"
5017   "addp\t%d0, %1.2d"
5018   [(set_attr "type" "neon_reduc_add")]
5021 ;; sqrt
5023 (define_expand "sqrt<mode>2"
5024   [(set (match_operand:VHSDF 0 "register_operand")
5025         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
5026   "TARGET_SIMD"
5028   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
5029     DONE;
5032 (define_insn "*sqrt<mode>2"
5033   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5034         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
5035   "TARGET_SIMD"
5036   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
5037   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
5040 ;; Patterns for vector struct loads and stores.
5042 (define_insn "aarch64_simd_ld2<mode>"
5043   [(set (match_operand:OI 0 "register_operand" "=w")
5044         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5045                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5046                    UNSPEC_LD2))]
5047   "TARGET_SIMD"
5048   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5049   [(set_attr "type" "neon_load2_2reg<q>")]
5052 (define_insn "aarch64_simd_ld2r<mode>"
5053   [(set (match_operand:OI 0 "register_operand" "=w")
5054        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5055                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5056                   UNSPEC_LD2_DUP))]
5057   "TARGET_SIMD"
5058   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5059   [(set_attr "type" "neon_load2_all_lanes<q>")]
5062 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
5063   [(set (match_operand:OI 0 "register_operand" "=w")
5064         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5065                     (match_operand:OI 2 "register_operand" "0")
5066                     (match_operand:SI 3 "immediate_operand" "i")
5067                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5068                    UNSPEC_LD2_LANE))]
5069   "TARGET_SIMD"
5070   {
5071     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5072     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
5073   }
5074   [(set_attr "type" "neon_load2_one_lane")]
5077 (define_expand "vec_load_lanesoi<mode>"
5078   [(set (match_operand:OI 0 "register_operand")
5079         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
5080                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5081                    UNSPEC_LD2))]
5082   "TARGET_SIMD"
5084   if (BYTES_BIG_ENDIAN)
5085     {
5086       rtx tmp = gen_reg_rtx (OImode);
5087       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5088       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
5089       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
5090     }
5091   else
5092     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
5093   DONE;
5096 (define_insn "aarch64_simd_st2<mode>"
5097   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5098         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
5099                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5100                    UNSPEC_ST2))]
5101   "TARGET_SIMD"
5102   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5103   [(set_attr "type" "neon_store2_2reg<q>")]
5106 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5107 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
5108   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5109         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5110                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5111                     (match_operand:SI 2 "immediate_operand" "i")]
5112                    UNSPEC_ST2_LANE))]
5113   "TARGET_SIMD"
5114   {
5115     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5116     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5117   }
5118   [(set_attr "type" "neon_store2_one_lane<q>")]
5121 (define_expand "vec_store_lanesoi<mode>"
5122   [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5123         (unspec:OI [(match_operand:OI 1 "register_operand")
5124                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5125                    UNSPEC_ST2))]
5126   "TARGET_SIMD"
5128   if (BYTES_BIG_ENDIAN)
5129     {
5130       rtx tmp = gen_reg_rtx (OImode);
5131       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5132       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5133       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5134     }
5135   else
5136     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5137   DONE;
5140 (define_insn "aarch64_simd_ld3<mode>"
5141   [(set (match_operand:CI 0 "register_operand" "=w")
5142         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5143                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5144                    UNSPEC_LD3))]
5145   "TARGET_SIMD"
5146   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5147   [(set_attr "type" "neon_load3_3reg<q>")]
5150 (define_insn "aarch64_simd_ld3r<mode>"
5151   [(set (match_operand:CI 0 "register_operand" "=w")
5152        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5153                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5154                   UNSPEC_LD3_DUP))]
5155   "TARGET_SIMD"
5156   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5157   [(set_attr "type" "neon_load3_all_lanes<q>")]
5160 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5161   [(set (match_operand:CI 0 "register_operand" "=w")
5162         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5163                     (match_operand:CI 2 "register_operand" "0")
5164                     (match_operand:SI 3 "immediate_operand" "i")
5165                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5166                    UNSPEC_LD3_LANE))]
5167   "TARGET_SIMD"
5169     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5170     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5172   [(set_attr "type" "neon_load3_one_lane")]
5175 (define_expand "vec_load_lanesci<mode>"
5176   [(set (match_operand:CI 0 "register_operand")
5177         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5178                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5179                    UNSPEC_LD3))]
5180   "TARGET_SIMD"
5182   if (BYTES_BIG_ENDIAN)
5183     {
5184       rtx tmp = gen_reg_rtx (CImode);
5185       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5186       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5187       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5188     }
5189   else
5190     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5191   DONE;
5194 (define_insn "aarch64_simd_st3<mode>"
5195   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5196         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5197                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5198                    UNSPEC_ST3))]
5199   "TARGET_SIMD"
5200   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5201   [(set_attr "type" "neon_store3_3reg<q>")]
5204 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5205 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5206   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5207         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5208                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5209                      (match_operand:SI 2 "immediate_operand" "i")]
5210                     UNSPEC_ST3_LANE))]
5211   "TARGET_SIMD"
5212   {
5213     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5214     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5215   }
5216   [(set_attr "type" "neon_store3_one_lane<q>")]
5219 (define_expand "vec_store_lanesci<mode>"
5220   [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5221         (unspec:CI [(match_operand:CI 1 "register_operand")
5222                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5223                    UNSPEC_ST3))]
5224   "TARGET_SIMD"
5226   if (BYTES_BIG_ENDIAN)
5227     {
5228       rtx tmp = gen_reg_rtx (CImode);
5229       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5230       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5231       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5232     }
5233   else
5234     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5235   DONE;
5238 (define_insn "aarch64_simd_ld4<mode>"
5239   [(set (match_operand:XI 0 "register_operand" "=w")
5240         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5241                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5242                    UNSPEC_LD4))]
5243   "TARGET_SIMD"
5244   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5245   [(set_attr "type" "neon_load4_4reg<q>")]
5248 (define_insn "aarch64_simd_ld4r<mode>"
5249   [(set (match_operand:XI 0 "register_operand" "=w")
5250        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5251                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5252                   UNSPEC_LD4_DUP))]
5253   "TARGET_SIMD"
5254   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5255   [(set_attr "type" "neon_load4_all_lanes<q>")]
5258 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5259   [(set (match_operand:XI 0 "register_operand" "=w")
5260         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5261                     (match_operand:XI 2 "register_operand" "0")
5262                     (match_operand:SI 3 "immediate_operand" "i")
5263                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5264                    UNSPEC_LD4_LANE))]
5265   "TARGET_SIMD"
5267     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5268     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5270   [(set_attr "type" "neon_load4_one_lane")]
5273 (define_expand "vec_load_lanesxi<mode>"
5274   [(set (match_operand:XI 0 "register_operand")
5275         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5276                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5277                    UNSPEC_LD4))]
5278   "TARGET_SIMD"
5280   if (BYTES_BIG_ENDIAN)
5281     {
5282       rtx tmp = gen_reg_rtx (XImode);
5283       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5284       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5285       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5286     }
5287   else
5288     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5289   DONE;
5292 (define_insn "aarch64_simd_st4<mode>"
5293   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5294         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5295                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5296                    UNSPEC_ST4))]
5297   "TARGET_SIMD"
5298   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5299   [(set_attr "type" "neon_store4_4reg<q>")]
5302 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5303 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5304   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5305         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5306                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5307                      (match_operand:SI 2 "immediate_operand" "i")]
5308                     UNSPEC_ST4_LANE))]
5309   "TARGET_SIMD"
5310   {
5311     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5312     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5313   }
5314   [(set_attr "type" "neon_store4_one_lane<q>")]
5317 (define_expand "vec_store_lanesxi<mode>"
5318   [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5319         (unspec:XI [(match_operand:XI 1 "register_operand")
5320                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5321                    UNSPEC_ST4))]
5322   "TARGET_SIMD"
5324   if (BYTES_BIG_ENDIAN)
5325     {
5326       rtx tmp = gen_reg_rtx (XImode);
5327       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5328       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5329       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5330     }
5331   else
5332     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5333   DONE;
5336 (define_insn_and_split "aarch64_rev_reglist<mode>"
5337 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5338         (unspec:VSTRUCT
5339                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5340                     (match_operand:V16QI 2 "register_operand" "w")]
5341                    UNSPEC_REV_REGLIST))]
5342   "TARGET_SIMD"
5343   "#"
5344   "&& reload_completed"
5345   [(const_int 0)]
5347   int i;
5348   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5349   for (i = 0; i < nregs; i++)
5350     {
5351       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5352       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5353       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5354     }
5355   DONE;
5357   [(set_attr "type" "neon_tbl1_q")
5358    (set_attr "length" "<insn_count>")]
5361 ;; Reload patterns for AdvSIMD register list operands.
5363 (define_expand "mov<mode>"
5364   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5365         (match_operand:VSTRUCT 1 "general_operand"))]
5366   "TARGET_SIMD"
5368   if (can_create_pseudo_p ())
5369     {
5370       if (GET_CODE (operands[0]) != REG)
5371         operands[1] = force_reg (<MODE>mode, operands[1]);
5372     }
5376 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5377   [(match_operand:CI 0 "register_operand")
5378    (match_operand:DI 1 "register_operand")
5379    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5380   "TARGET_SIMD"
5382   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5383   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5384   DONE;
5387 (define_insn "aarch64_ld1_x3_<mode>"
5388   [(set (match_operand:CI 0 "register_operand" "=w")
5389         (unspec:CI
5390           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5391            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5392   "TARGET_SIMD"
5393   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5394   [(set_attr "type" "neon_load1_3reg<q>")]
5397 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5398   [(match_operand:XI 0 "register_operand" "=w")
5399    (match_operand:DI 1 "register_operand" "r")
5400    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5401   "TARGET_SIMD"
5403   rtx mem = gen_rtx_MEM (XImode, operands[1]);
5404   emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5405   DONE;
5408 (define_insn "aarch64_ld1_x4_<mode>"
5409   [(set (match_operand:XI 0 "register_operand" "=w")
5410         (unspec:XI
5411           [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5412            (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5413         UNSPEC_LD1))]
5414   "TARGET_SIMD"
5415   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5416   [(set_attr "type" "neon_load1_4reg<q>")]
5419 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5420   [(match_operand:DI 0 "register_operand")
5421    (match_operand:OI 1 "register_operand")
5422    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5423   "TARGET_SIMD"
5425   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5426   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5427   DONE;
5430 (define_insn "aarch64_st1_x2_<mode>"
5431    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5432          (unspec:OI
5433           [(match_operand:OI 1 "register_operand" "w")
5434           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5435   "TARGET_SIMD"
5436   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5437   [(set_attr "type" "neon_store1_2reg<q>")]
5440 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5441   [(match_operand:DI 0 "register_operand")
5442    (match_operand:CI 1 "register_operand")
5443    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5444   "TARGET_SIMD"
5446   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5447   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5448   DONE;
5451 (define_insn "aarch64_st1_x3_<mode>"
5452    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5453         (unspec:CI
5454          [(match_operand:CI 1 "register_operand" "w")
5455           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5456   "TARGET_SIMD"
5457   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5458   [(set_attr "type" "neon_store1_3reg<q>")]
5461 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5462   [(match_operand:DI 0 "register_operand" "")
5463    (match_operand:XI 1 "register_operand" "")
5464    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5465   "TARGET_SIMD"
5467   rtx mem = gen_rtx_MEM (XImode, operands[0]);
5468   emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5469   DONE;
5472 (define_insn "aarch64_st1_x4_<mode>"
5473   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5474         (unspec:XI
5475            [(match_operand:XI 1 "register_operand" "w")
5476            (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5477         UNSPEC_ST1))]
5478   "TARGET_SIMD"
5479   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5480   [(set_attr "type" "neon_store1_4reg<q>")]
5483 (define_insn "*aarch64_mov<mode>"
5484   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5485         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5486   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5487    && (register_operand (operands[0], <MODE>mode)
5488        || register_operand (operands[1], <MODE>mode))"
5489   "@
5490    #
5491    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5492    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5493   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5494                      neon_load<nregs>_<nregs>reg_q")
5495    (set_attr "length" "<insn_count>,4,4")]
5498 (define_insn "aarch64_be_ld1<mode>"
5499   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5500         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5501                              "aarch64_simd_struct_operand" "Utv")]
5502         UNSPEC_LD1))]
5503   "TARGET_SIMD"
5504   "ld1\\t{%0<Vmtype>}, %1"
5505   [(set_attr "type" "neon_load1_1reg<q>")]
5508 (define_insn "aarch64_be_st1<mode>"
5509   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5510         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5511         UNSPEC_ST1))]
5512   "TARGET_SIMD"
5513   "st1\\t{%1<Vmtype>}, %0"
5514   [(set_attr "type" "neon_store1_1reg<q>")]
5517 (define_insn "*aarch64_be_movoi"
5518   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5519         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5520   "TARGET_SIMD && BYTES_BIG_ENDIAN
5521    && (register_operand (operands[0], OImode)
5522        || register_operand (operands[1], OImode))"
5523   "@
5524    #
5525    stp\\t%q1, %R1, %0
5526    ldp\\t%q0, %R0, %1"
5527   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5528    (set_attr "length" "8,4,4")]
5531 (define_insn "*aarch64_be_movci"
5532   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5533         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5534   "TARGET_SIMD && BYTES_BIG_ENDIAN
5535    && (register_operand (operands[0], CImode)
5536        || register_operand (operands[1], CImode))"
5537   "#"
5538   [(set_attr "type" "multiple")
5539    (set_attr "length" "12,4,4")]
5542 (define_insn "*aarch64_be_movxi"
5543   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5544         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5545   "TARGET_SIMD && BYTES_BIG_ENDIAN
5546    && (register_operand (operands[0], XImode)
5547        || register_operand (operands[1], XImode))"
5548   "#"
5549   [(set_attr "type" "multiple")
5550    (set_attr "length" "16,4,4")]
5553 (define_split
5554   [(set (match_operand:OI 0 "register_operand")
5555         (match_operand:OI 1 "register_operand"))]
5556   "TARGET_SIMD && reload_completed"
5557   [(const_int 0)]
5559   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5560   DONE;
5563 (define_split
5564   [(set (match_operand:CI 0 "nonimmediate_operand")
5565         (match_operand:CI 1 "general_operand"))]
5566   "TARGET_SIMD && reload_completed"
5567   [(const_int 0)]
5569   if (register_operand (operands[0], CImode)
5570       && register_operand (operands[1], CImode))
5571     {
5572       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5573       DONE;
5574     }
5575   else if (BYTES_BIG_ENDIAN)
5576     {
5577       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5578                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5579       emit_move_insn (gen_lowpart (V16QImode,
5580                                    simplify_gen_subreg (TImode, operands[0],
5581                                                         CImode, 32)),
5582                       gen_lowpart (V16QImode,
5583                                    simplify_gen_subreg (TImode, operands[1],
5584                                                         CImode, 32)));
5585       DONE;
5586     }
5587   else
5588     FAIL;
5591 (define_split
5592   [(set (match_operand:XI 0 "nonimmediate_operand")
5593         (match_operand:XI 1 "general_operand"))]
5594   "TARGET_SIMD && reload_completed"
5595   [(const_int 0)]
5597   if (register_operand (operands[0], XImode)
5598       && register_operand (operands[1], XImode))
5599     {
5600       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5601       DONE;
5602     }
5603   else if (BYTES_BIG_ENDIAN)
5604     {
5605       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5606                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5607       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5608                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5609       DONE;
5610     }
5611   else
5612     FAIL;
5615 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5616   [(match_operand:VSTRUCT 0 "register_operand")
5617    (match_operand:DI 1 "register_operand")
5618    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5619   "TARGET_SIMD"
5621   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5622   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5623                      * <VSTRUCT:nregs>);
5625   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5626                                                                 mem));
5627   DONE;
5630 (define_insn "aarch64_ld2<mode>_dreg"
5631   [(set (match_operand:OI 0 "register_operand" "=w")
5632         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5633                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5634                    UNSPEC_LD2_DREG))]
5635   "TARGET_SIMD"
5636   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5637   [(set_attr "type" "neon_load2_2reg<q>")]
5640 (define_insn "aarch64_ld2<mode>_dreg"
5641   [(set (match_operand:OI 0 "register_operand" "=w")
5642         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5643                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5644                    UNSPEC_LD2_DREG))]
5645   "TARGET_SIMD"
5646   "ld1\\t{%S0.1d - %T0.1d}, %1"
5647   [(set_attr "type" "neon_load1_2reg<q>")]
5650 (define_insn "aarch64_ld3<mode>_dreg"
5651   [(set (match_operand:CI 0 "register_operand" "=w")
5652         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5653                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5654                    UNSPEC_LD3_DREG))]
5655   "TARGET_SIMD"
5656   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5657   [(set_attr "type" "neon_load3_3reg<q>")]
5660 (define_insn "aarch64_ld3<mode>_dreg"
5661   [(set (match_operand:CI 0 "register_operand" "=w")
5662         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5663                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5664                    UNSPEC_LD3_DREG))]
5665   "TARGET_SIMD"
5666   "ld1\\t{%S0.1d - %U0.1d}, %1"
5667   [(set_attr "type" "neon_load1_3reg<q>")]
5670 (define_insn "aarch64_ld4<mode>_dreg"
5671   [(set (match_operand:XI 0 "register_operand" "=w")
5672         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5673                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5674                    UNSPEC_LD4_DREG))]
5675   "TARGET_SIMD"
5676   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5677   [(set_attr "type" "neon_load4_4reg<q>")]
5680 (define_insn "aarch64_ld4<mode>_dreg"
5681   [(set (match_operand:XI 0 "register_operand" "=w")
5682         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5683                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5684                    UNSPEC_LD4_DREG))]
5685   "TARGET_SIMD"
5686   "ld1\\t{%S0.1d - %V0.1d}, %1"
5687   [(set_attr "type" "neon_load1_4reg<q>")]
5690 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5691  [(match_operand:VSTRUCT 0 "register_operand")
5692   (match_operand:DI 1 "register_operand")
5693   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5694   "TARGET_SIMD"
5696   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5697   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5699   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5700   DONE;
5703 (define_expand "aarch64_ld1<VALL_F16:mode>"
5704  [(match_operand:VALL_F16 0 "register_operand")
5705   (match_operand:DI 1 "register_operand")]
5706   "TARGET_SIMD"
5708   machine_mode mode = <VALL_F16:MODE>mode;
5709   rtx mem = gen_rtx_MEM (mode, operands[1]);
5711   if (BYTES_BIG_ENDIAN)
5712     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5713   else
5714     emit_move_insn (operands[0], mem);
5715   DONE;
5718 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5719  [(match_operand:VSTRUCT 0 "register_operand")
5720   (match_operand:DI 1 "register_operand")
5721   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5722   "TARGET_SIMD"
5724   machine_mode mode = <VSTRUCT:MODE>mode;
5725   rtx mem = gen_rtx_MEM (mode, operands[1]);
5727   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5728   DONE;
5731 (define_expand "aarch64_ld1x2<VQ:mode>"
5732  [(match_operand:OI 0 "register_operand")
5733   (match_operand:DI 1 "register_operand")
5734   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5735   "TARGET_SIMD"
5737   machine_mode mode = OImode;
5738   rtx mem = gen_rtx_MEM (mode, operands[1]);
5740   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5741   DONE;
5744 (define_expand "aarch64_ld1x2<VDC:mode>"
5745  [(match_operand:OI 0 "register_operand")
5746   (match_operand:DI 1 "register_operand")
5747   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5748   "TARGET_SIMD"
5750   machine_mode mode = OImode;
5751   rtx mem = gen_rtx_MEM (mode, operands[1]);
5753   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5754   DONE;
5758 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5759   [(match_operand:VSTRUCT 0 "register_operand")
5760         (match_operand:DI 1 "register_operand")
5761         (match_operand:VSTRUCT 2 "register_operand")
5762         (match_operand:SI 3 "immediate_operand")
5763         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5764   "TARGET_SIMD"
5766   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5767   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5768                      * <VSTRUCT:nregs>);
5770   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5771   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5772         operands[0], mem, operands[2], operands[3]));
5773   DONE;
5776 ;; Expanders for builtins to extract vector registers from large
5777 ;; opaque integer modes.
5779 ;; D-register list.
5781 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5782  [(match_operand:VDC 0 "register_operand")
5783   (match_operand:VSTRUCT 1 "register_operand")
5784   (match_operand:SI 2 "immediate_operand")]
5785   "TARGET_SIMD"
5787   int part = INTVAL (operands[2]);
5788   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5789   int offset = part * 16;
5791   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5792   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5793   DONE;
5796 ;; Q-register list.
5798 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5799  [(match_operand:VQ 0 "register_operand")
5800   (match_operand:VSTRUCT 1 "register_operand")
5801   (match_operand:SI 2 "immediate_operand")]
5802   "TARGET_SIMD"
5804   int part = INTVAL (operands[2]);
5805   int offset = part * 16;
5807   emit_move_insn (operands[0],
5808                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5809   DONE;
5812 ;; Permuted-store expanders for neon intrinsics.
5814 ;; Permute instructions
5816 ;; vec_perm support
5818 (define_expand "vec_perm<mode>"
5819   [(match_operand:VB 0 "register_operand")
5820    (match_operand:VB 1 "register_operand")
5821    (match_operand:VB 2 "register_operand")
5822    (match_operand:VB 3 "register_operand")]
5823   "TARGET_SIMD"
5825   aarch64_expand_vec_perm (operands[0], operands[1],
5826                            operands[2], operands[3], <nunits>);
5827   DONE;
5830 (define_insn "aarch64_tbl1<mode>"
5831   [(set (match_operand:VB 0 "register_operand" "=w")
5832         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5833                     (match_operand:VB 2 "register_operand" "w")]
5834                    UNSPEC_TBL))]
5835   "TARGET_SIMD"
5836   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5837   [(set_attr "type" "neon_tbl1<q>")]
5840 ;; Two source registers.
5842 (define_insn "aarch64_tbl2v16qi"
5843   [(set (match_operand:V16QI 0 "register_operand" "=w")
5844         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5845                        (match_operand:V16QI 2 "register_operand" "w")]
5846                       UNSPEC_TBL))]
5847   "TARGET_SIMD"
5848   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5849   [(set_attr "type" "neon_tbl2_q")]
5852 (define_insn "aarch64_tbl3<mode>"
5853   [(set (match_operand:VB 0 "register_operand" "=w")
5854         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5855                       (match_operand:VB 2 "register_operand" "w")]
5856                       UNSPEC_TBL))]
5857   "TARGET_SIMD"
5858   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5859   [(set_attr "type" "neon_tbl3")]
5862 (define_insn "aarch64_tbx4<mode>"
5863   [(set (match_operand:VB 0 "register_operand" "=w")
5864         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5865                       (match_operand:OI 2 "register_operand" "w")
5866                       (match_operand:VB 3 "register_operand" "w")]
5867                       UNSPEC_TBX))]
5868   "TARGET_SIMD"
5869   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5870   [(set_attr "type" "neon_tbl4")]
5873 ;; Three source registers.
5875 (define_insn "aarch64_qtbl3<mode>"
5876   [(set (match_operand:VB 0 "register_operand" "=w")
5877         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5878                       (match_operand:VB 2 "register_operand" "w")]
5879                       UNSPEC_TBL))]
5880   "TARGET_SIMD"
5881   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5882   [(set_attr "type" "neon_tbl3")]
5885 (define_insn "aarch64_qtbx3<mode>"
5886   [(set (match_operand:VB 0 "register_operand" "=w")
5887         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5888                       (match_operand:CI 2 "register_operand" "w")
5889                       (match_operand:VB 3 "register_operand" "w")]
5890                       UNSPEC_TBX))]
5891   "TARGET_SIMD"
5892   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5893   [(set_attr "type" "neon_tbl3")]
5896 ;; Four source registers.
5898 (define_insn "aarch64_qtbl4<mode>"
5899   [(set (match_operand:VB 0 "register_operand" "=w")
5900         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5901                       (match_operand:VB 2 "register_operand" "w")]
5902                       UNSPEC_TBL))]
5903   "TARGET_SIMD"
5904   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5905   [(set_attr "type" "neon_tbl4")]
5908 (define_insn "aarch64_qtbx4<mode>"
5909   [(set (match_operand:VB 0 "register_operand" "=w")
5910         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5911                       (match_operand:XI 2 "register_operand" "w")
5912                       (match_operand:VB 3 "register_operand" "w")]
5913                       UNSPEC_TBX))]
5914   "TARGET_SIMD"
5915   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5916   [(set_attr "type" "neon_tbl4")]
5919 (define_insn_and_split "aarch64_combinev16qi"
5920   [(set (match_operand:OI 0 "register_operand" "=w")
5921         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5922                     (match_operand:V16QI 2 "register_operand" "w")]
5923                    UNSPEC_CONCAT))]
5924   "TARGET_SIMD"
5925   "#"
5926   "&& reload_completed"
5927   [(const_int 0)]
5929   aarch64_split_combinev16qi (operands);
5930   DONE;
5932 [(set_attr "type" "multiple")]
5935 ;; This instruction's pattern is generated directly by
5936 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5937 ;; need corresponding changes there.
5938 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
5939   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5940         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5941                           (match_operand:VALL_F16 2 "register_operand" "w")]
5942          PERMUTE))]
5943   "TARGET_SIMD"
5944   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5945   [(set_attr "type" "neon_permute<q>")]
5948 ;; This instruction's pattern is generated directly by
5949 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5950 ;; need corresponding changes there.  Note that the immediate (third)
5951 ;; operand is a lane index not a byte index.
5952 (define_insn "aarch64_ext<mode>"
5953   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5954         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5955                           (match_operand:VALL_F16 2 "register_operand" "w")
5956                           (match_operand:SI 3 "immediate_operand" "i")]
5957          UNSPEC_EXT))]
5958   "TARGET_SIMD"
5960   operands[3] = GEN_INT (INTVAL (operands[3])
5961       * GET_MODE_UNIT_SIZE (<MODE>mode));
5962   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5964   [(set_attr "type" "neon_ext<q>")]
5967 ;; This instruction's pattern is generated directly by
5968 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5969 ;; need corresponding changes there.
5970 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5971   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5972         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5973                     REVERSE))]
5974   "TARGET_SIMD"
5975   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5976   [(set_attr "type" "neon_rev<q>")]
5979 (define_insn "aarch64_st2<mode>_dreg"
5980   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5981         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5982                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5983                    UNSPEC_ST2))]
5984   "TARGET_SIMD"
5985   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5986   [(set_attr "type" "neon_store2_2reg")]
5989 (define_insn "aarch64_st2<mode>_dreg"
5990   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5991         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5992                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5993                    UNSPEC_ST2))]
5994   "TARGET_SIMD"
5995   "st1\\t{%S1.1d - %T1.1d}, %0"
5996   [(set_attr "type" "neon_store1_2reg")]
5999 (define_insn "aarch64_st3<mode>_dreg"
6000   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6001         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6002                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6003                    UNSPEC_ST3))]
6004   "TARGET_SIMD"
6005   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6006   [(set_attr "type" "neon_store3_3reg")]
6009 (define_insn "aarch64_st3<mode>_dreg"
6010   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6011         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6012                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6013                    UNSPEC_ST3))]
6014   "TARGET_SIMD"
6015   "st1\\t{%S1.1d - %U1.1d}, %0"
6016   [(set_attr "type" "neon_store1_3reg")]
6019 (define_insn "aarch64_st4<mode>_dreg"
6020   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6021         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6022                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6023                    UNSPEC_ST4))]
6024   "TARGET_SIMD"
6025   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
6026   [(set_attr "type" "neon_store4_4reg")]
6029 (define_insn "aarch64_st4<mode>_dreg"
6030   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6031         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6032                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6033                    UNSPEC_ST4))]
6034   "TARGET_SIMD"
6035   "st1\\t{%S1.1d - %V1.1d}, %0"
6036   [(set_attr "type" "neon_store1_4reg")]
6039 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
6040  [(match_operand:DI 0 "register_operand")
6041   (match_operand:VSTRUCT 1 "register_operand")
6042   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6043   "TARGET_SIMD"
6045   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6046   set_mem_size (mem, <VSTRUCT:nregs> * 8);
6048   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
6049   DONE;
6052 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
6053  [(match_operand:DI 0 "register_operand")
6054   (match_operand:VSTRUCT 1 "register_operand")
6055   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6056   "TARGET_SIMD"
6058   machine_mode mode = <VSTRUCT:MODE>mode;
6059   rtx mem = gen_rtx_MEM (mode, operands[0]);
6061   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
6062   DONE;
6065 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6066  [(match_operand:DI 0 "register_operand")
6067   (match_operand:VSTRUCT 1 "register_operand")
6068   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
6069   (match_operand:SI 2 "immediate_operand")]
6070   "TARGET_SIMD"
6072   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6073   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6074                      * <VSTRUCT:nregs>);
6076   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6077                 mem, operands[1], operands[2]));
6078   DONE;
6081 (define_expand "aarch64_st1<VALL_F16:mode>"
6082  [(match_operand:DI 0 "register_operand")
6083   (match_operand:VALL_F16 1 "register_operand")]
6084   "TARGET_SIMD"
6086   machine_mode mode = <VALL_F16:MODE>mode;
6087   rtx mem = gen_rtx_MEM (mode, operands[0]);
6089   if (BYTES_BIG_ENDIAN)
6090     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
6091   else
6092     emit_move_insn (mem, operands[1]);
6093   DONE;
6096 ;; Expander for builtins to insert vector registers into large
6097 ;; opaque integer modes.
6099 ;; Q-register list.  We don't need a D-reg inserter as we zero
6100 ;; extend them in arm_neon.h and insert the resulting Q-regs.
6102 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
6103  [(match_operand:VSTRUCT 0 "register_operand")
6104   (match_operand:VSTRUCT 1 "register_operand")
6105   (match_operand:VQ 2 "register_operand")
6106   (match_operand:SI 3 "immediate_operand")]
6107   "TARGET_SIMD"
6109   int part = INTVAL (operands[3]);
6110   int offset = part * 16;
6112   emit_move_insn (operands[0], operands[1]);
6113   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
6114                   operands[2]);
6115   DONE;
6118 ;; Standard pattern name vec_init<mode><Vel>.
6120 (define_expand "vec_init<mode><Vel>"
6121   [(match_operand:VALL_F16 0 "register_operand")
6122    (match_operand 1 "" "")]
6123   "TARGET_SIMD"
6125   aarch64_expand_vector_init (operands[0], operands[1]);
6126   DONE;
6129 (define_expand "vec_init<mode><Vhalf>"
6130   [(match_operand:VQ_NO2E 0 "register_operand")
6131    (match_operand 1 "" "")]
6132   "TARGET_SIMD"
6134   aarch64_expand_vector_init (operands[0], operands[1]);
6135   DONE;
6138 (define_insn "*aarch64_simd_ld1r<mode>"
6139   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6140         (vec_duplicate:VALL_F16
6141           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
6142   "TARGET_SIMD"
6143   "ld1r\\t{%0.<Vtype>}, %1"
6144   [(set_attr "type" "neon_load1_all_lanes")]
6147 (define_insn "aarch64_simd_ld1<mode>_x2"
6148   [(set (match_operand:OI 0 "register_operand" "=w")
6149         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6150                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6151                    UNSPEC_LD1))]
6152   "TARGET_SIMD"
6153   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6154   [(set_attr "type" "neon_load1_2reg<q>")]
6157 (define_insn "aarch64_simd_ld1<mode>_x2"
6158   [(set (match_operand:OI 0 "register_operand" "=w")
6159         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6160                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6161                    UNSPEC_LD1))]
6162   "TARGET_SIMD"
6163   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6164   [(set_attr "type" "neon_load1_2reg<q>")]
6168 (define_insn "@aarch64_frecpe<mode>"
6169   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6170         (unspec:VHSDF_HSDF
6171          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6172          UNSPEC_FRECPE))]
6173   "TARGET_SIMD"
6174   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6175   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6178 (define_insn "aarch64_frecpx<mode>"
6179   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6180         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6181          UNSPEC_FRECPX))]
6182   "TARGET_SIMD"
6183   "frecpx\t%<s>0, %<s>1"
6184   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6187 (define_insn "@aarch64_frecps<mode>"
6188   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6189         (unspec:VHSDF_HSDF
6190           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6191           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6192           UNSPEC_FRECPS))]
6193   "TARGET_SIMD"
6194   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6195   [(set_attr "type" "neon_fp_recps_<stype><q>")]
6198 (define_insn "aarch64_urecpe<mode>"
6199   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6200         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6201                 UNSPEC_URECPE))]
6202  "TARGET_SIMD"
6203  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6204   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6206 ;; Standard pattern name vec_extract<mode><Vel>.
6208 (define_expand "vec_extract<mode><Vel>"
6209   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6210    (match_operand:VALL_F16 1 "register_operand")
6211    (match_operand:SI 2 "immediate_operand")]
6212   "TARGET_SIMD"
6214     emit_insn
6215       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6216     DONE;
6219 ;; Extract a 64-bit vector from one half of a 128-bit vector.
6220 (define_expand "vec_extract<mode><Vhalf>"
6221   [(match_operand:<VHALF> 0 "register_operand")
6222    (match_operand:VQMOV_NO2E 1 "register_operand")
6223    (match_operand 2 "immediate_operand")]
6224   "TARGET_SIMD"
6226   int start = INTVAL (operands[2]);
6227   if (start != 0 && start != <nunits> / 2)
6228     FAIL;
6229   rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
6230   emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
6231   DONE;
6234 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
6235 (define_expand "vec_extractv2dfv1df"
6236   [(match_operand:V1DF 0 "register_operand")
6237    (match_operand:V2DF 1 "register_operand")
6238    (match_operand 2 "immediate_operand")]
6239   "TARGET_SIMD"
6241   /* V1DF is rarely used by other patterns, so it should be better to hide
6242      it in a subreg destination of a normal DF op.  */
6243   rtx scalar0 = gen_lowpart (DFmode, operands[0]);
6244   emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
6245   DONE;
6248 ;; aes
6250 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6251   [(set (match_operand:V16QI 0 "register_operand" "=w")
6252         (unspec:V16QI
6253                 [(xor:V16QI
6254                  (match_operand:V16QI 1 "register_operand" "%0")
6255                  (match_operand:V16QI 2 "register_operand" "w"))]
6256          CRYPTO_AES))]
6257   "TARGET_SIMD && TARGET_AES"
6258   "aes<aes_op>\\t%0.16b, %2.16b"
6259   [(set_attr "type" "crypto_aese")]
6262 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6263   [(set (match_operand:V16QI 0 "register_operand" "=w")
6264         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6265          CRYPTO_AESMC))]
6266   "TARGET_SIMD && TARGET_AES"
6267   "aes<aesmc_op>\\t%0.16b, %1.16b"
6268   [(set_attr "type" "crypto_aesmc")]
6271 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6272 ;; and enforce the register dependency without scheduling or register
6273 ;; allocation messing up the order or introducing moves inbetween.
6274 ;;  Mash the two together during combine.
6276 (define_insn "*aarch64_crypto_aese_fused"
6277   [(set (match_operand:V16QI 0 "register_operand" "=w")
6278         (unspec:V16QI
6279           [(unspec:V16QI
6280            [(xor:V16QI
6281                 (match_operand:V16QI 1 "register_operand" "%0")
6282                 (match_operand:V16QI 2 "register_operand" "w"))]
6283              UNSPEC_AESE)]
6284         UNSPEC_AESMC))]
6285   "TARGET_SIMD && TARGET_AES
6286    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6287   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6288   [(set_attr "type" "crypto_aese")
6289    (set_attr "length" "8")]
6292 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6293 ;; and enforce the register dependency without scheduling or register
6294 ;; allocation messing up the order or introducing moves inbetween.
6295 ;;  Mash the two together during combine.
6297 (define_insn "*aarch64_crypto_aesd_fused"
6298   [(set (match_operand:V16QI 0 "register_operand" "=w")
6299         (unspec:V16QI
6300           [(unspec:V16QI
6301                     [(xor:V16QI
6302                         (match_operand:V16QI 1 "register_operand" "%0")
6303                         (match_operand:V16QI 2 "register_operand" "w"))]
6304                 UNSPEC_AESD)]
6305           UNSPEC_AESIMC))]
6306   "TARGET_SIMD && TARGET_AES
6307    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6308   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6309   [(set_attr "type" "crypto_aese")
6310    (set_attr "length" "8")]
6313 ;; sha1
6315 (define_insn "aarch64_crypto_sha1hsi"
6316   [(set (match_operand:SI 0 "register_operand" "=w")
6317         (unspec:SI [(match_operand:SI 1
6318                        "register_operand" "w")]
6319          UNSPEC_SHA1H))]
6320   "TARGET_SIMD && TARGET_SHA2"
6321   "sha1h\\t%s0, %s1"
6322   [(set_attr "type" "crypto_sha1_fast")]
6325 (define_insn "aarch64_crypto_sha1hv4si"
6326   [(set (match_operand:SI 0 "register_operand" "=w")
6327         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6328                      (parallel [(const_int 0)]))]
6329          UNSPEC_SHA1H))]
6330   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6331   "sha1h\\t%s0, %s1"
6332   [(set_attr "type" "crypto_sha1_fast")]
6335 (define_insn "aarch64_be_crypto_sha1hv4si"
6336   [(set (match_operand:SI 0 "register_operand" "=w")
6337         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6338                      (parallel [(const_int 3)]))]
6339          UNSPEC_SHA1H))]
6340   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6341   "sha1h\\t%s0, %s1"
6342   [(set_attr "type" "crypto_sha1_fast")]
6345 (define_insn "aarch64_crypto_sha1su1v4si"
6346   [(set (match_operand:V4SI 0 "register_operand" "=w")
6347         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6348                       (match_operand:V4SI 2 "register_operand" "w")]
6349          UNSPEC_SHA1SU1))]
6350   "TARGET_SIMD && TARGET_SHA2"
6351   "sha1su1\\t%0.4s, %2.4s"
6352   [(set_attr "type" "crypto_sha1_fast")]
6355 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6356   [(set (match_operand:V4SI 0 "register_operand" "=w")
6357         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6358                       (match_operand:SI 2 "register_operand" "w")
6359                       (match_operand:V4SI 3 "register_operand" "w")]
6360          CRYPTO_SHA1))]
6361   "TARGET_SIMD && TARGET_SHA2"
6362   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6363   [(set_attr "type" "crypto_sha1_slow")]
6366 (define_insn "aarch64_crypto_sha1su0v4si"
6367   [(set (match_operand:V4SI 0 "register_operand" "=w")
6368         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6369                       (match_operand:V4SI 2 "register_operand" "w")
6370                       (match_operand:V4SI 3 "register_operand" "w")]
6371          UNSPEC_SHA1SU0))]
6372   "TARGET_SIMD && TARGET_SHA2"
6373   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6374   [(set_attr "type" "crypto_sha1_xor")]
6377 ;; sha256
6379 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6380   [(set (match_operand:V4SI 0 "register_operand" "=w")
6381         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6382                       (match_operand:V4SI 2 "register_operand" "w")
6383                       (match_operand:V4SI 3 "register_operand" "w")]
6384          CRYPTO_SHA256))]
6385   "TARGET_SIMD && TARGET_SHA2"
6386   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6387   [(set_attr "type" "crypto_sha256_slow")]
6390 (define_insn "aarch64_crypto_sha256su0v4si"
6391   [(set (match_operand:V4SI 0 "register_operand" "=w")
6392         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6393                       (match_operand:V4SI 2 "register_operand" "w")]
6394          UNSPEC_SHA256SU0))]
6395   "TARGET_SIMD && TARGET_SHA2"
6396   "sha256su0\\t%0.4s, %2.4s"
6397   [(set_attr "type" "crypto_sha256_fast")]
6400 (define_insn "aarch64_crypto_sha256su1v4si"
6401   [(set (match_operand:V4SI 0 "register_operand" "=w")
6402         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6403                       (match_operand:V4SI 2 "register_operand" "w")
6404                       (match_operand:V4SI 3 "register_operand" "w")]
6405          UNSPEC_SHA256SU1))]
6406   "TARGET_SIMD && TARGET_SHA2"
6407   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6408   [(set_attr "type" "crypto_sha256_slow")]
6411 ;; sha512
6413 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6414   [(set (match_operand:V2DI 0 "register_operand" "=w")
6415         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6416                       (match_operand:V2DI 2 "register_operand" "w")
6417                       (match_operand:V2DI 3 "register_operand" "w")]
6418          CRYPTO_SHA512))]
6419   "TARGET_SIMD && TARGET_SHA3"
6420   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6421   [(set_attr "type" "crypto_sha512")]
6424 (define_insn "aarch64_crypto_sha512su0qv2di"
6425   [(set (match_operand:V2DI 0 "register_operand" "=w")
6426         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6427                       (match_operand:V2DI 2 "register_operand" "w")]
6428          UNSPEC_SHA512SU0))]
6429   "TARGET_SIMD && TARGET_SHA3"
6430   "sha512su0\\t%0.2d, %2.2d"
6431   [(set_attr "type" "crypto_sha512")]
6434 (define_insn "aarch64_crypto_sha512su1qv2di"
6435   [(set (match_operand:V2DI 0 "register_operand" "=w")
6436         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6437                       (match_operand:V2DI 2 "register_operand" "w")
6438                       (match_operand:V2DI 3 "register_operand" "w")]
6439          UNSPEC_SHA512SU1))]
6440   "TARGET_SIMD && TARGET_SHA3"
6441   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6442   [(set_attr "type" "crypto_sha512")]
6445 ;; sha3
6447 (define_insn "eor3q<mode>4"
6448   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6449         (xor:VQ_I
6450          (xor:VQ_I
6451           (match_operand:VQ_I 2 "register_operand" "w")
6452           (match_operand:VQ_I 3 "register_operand" "w"))
6453          (match_operand:VQ_I 1 "register_operand" "w")))]
6454   "TARGET_SIMD && TARGET_SHA3"
6455   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6456   [(set_attr "type" "crypto_sha3")]
6459 (define_insn "aarch64_rax1qv2di"
6460   [(set (match_operand:V2DI 0 "register_operand" "=w")
6461         (xor:V2DI
6462          (rotate:V2DI
6463           (match_operand:V2DI 2 "register_operand" "w")
6464           (const_int 1))
6465          (match_operand:V2DI 1 "register_operand" "w")))]
6466   "TARGET_SIMD && TARGET_SHA3"
6467   "rax1\\t%0.2d, %1.2d, %2.2d"
6468   [(set_attr "type" "crypto_sha3")]
6471 (define_insn "aarch64_xarqv2di"
6472   [(set (match_operand:V2DI 0 "register_operand" "=w")
6473         (rotatert:V2DI
6474          (xor:V2DI
6475           (match_operand:V2DI 1 "register_operand" "%w")
6476           (match_operand:V2DI 2 "register_operand" "w"))
6477          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6478   "TARGET_SIMD && TARGET_SHA3"
6479   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6480   [(set_attr "type" "crypto_sha3")]
6483 (define_insn "bcaxq<mode>4"
6484   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6485         (xor:VQ_I
6486          (and:VQ_I
6487           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6488           (match_operand:VQ_I 2 "register_operand" "w"))
6489          (match_operand:VQ_I 1 "register_operand" "w")))]
6490   "TARGET_SIMD && TARGET_SHA3"
6491   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6492   [(set_attr "type" "crypto_sha3")]
6495 ;; SM3
6497 (define_insn "aarch64_sm3ss1qv4si"
6498   [(set (match_operand:V4SI 0 "register_operand" "=w")
6499         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6500                       (match_operand:V4SI 2 "register_operand" "w")
6501                       (match_operand:V4SI 3 "register_operand" "w")]
6502          UNSPEC_SM3SS1))]
6503   "TARGET_SIMD && TARGET_SM4"
6504   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6505   [(set_attr "type" "crypto_sm3")]
6509 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6510   [(set (match_operand:V4SI 0 "register_operand" "=w")
6511         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6512                       (match_operand:V4SI 2 "register_operand" "w")
6513                       (match_operand:V4SI 3 "register_operand" "w")
6514                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6515          CRYPTO_SM3TT))]
6516   "TARGET_SIMD && TARGET_SM4"
6517   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6518   [(set_attr "type" "crypto_sm3")]
6521 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6522   [(set (match_operand:V4SI 0 "register_operand" "=w")
6523         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6524                       (match_operand:V4SI 2 "register_operand" "w")
6525                       (match_operand:V4SI 3 "register_operand" "w")]
6526          CRYPTO_SM3PART))]
6527   "TARGET_SIMD && TARGET_SM4"
6528   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6529   [(set_attr "type" "crypto_sm3")]
6532 ;; SM4
6534 (define_insn "aarch64_sm4eqv4si"
6535   [(set (match_operand:V4SI 0 "register_operand" "=w")
6536         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6537                       (match_operand:V4SI 2 "register_operand" "w")]
6538          UNSPEC_SM4E))]
6539   "TARGET_SIMD && TARGET_SM4"
6540   "sm4e\\t%0.4s, %2.4s"
6541   [(set_attr "type" "crypto_sm4")]
6544 (define_insn "aarch64_sm4ekeyqv4si"
6545   [(set (match_operand:V4SI 0 "register_operand" "=w")
6546         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6547                       (match_operand:V4SI 2 "register_operand" "w")]
6548          UNSPEC_SM4EKEY))]
6549   "TARGET_SIMD && TARGET_SM4"
6550   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6551   [(set_attr "type" "crypto_sm4")]
6554 ;; fp16fml
6556 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6557   [(set (match_operand:VDQSF 0 "register_operand")
6558         (unspec:VDQSF
6559          [(match_operand:VDQSF 1 "register_operand")
6560           (match_operand:<VFMLA_W> 2 "register_operand")
6561           (match_operand:<VFMLA_W> 3 "register_operand")]
6562          VFMLA16_LOW))]
6563   "TARGET_F16FML"
6565   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6566                                             <nunits> * 2, false);
6567   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6568                                             <nunits> * 2, false);
6570   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6571                                                                 operands[1],
6572                                                                 operands[2],
6573                                                                 operands[3],
6574                                                                 p1, p2));
6575   DONE;
6579 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6580   [(set (match_operand:VDQSF 0 "register_operand")
6581         (unspec:VDQSF
6582          [(match_operand:VDQSF 1 "register_operand")
6583           (match_operand:<VFMLA_W> 2 "register_operand")
6584           (match_operand:<VFMLA_W> 3 "register_operand")]
6585          VFMLA16_HIGH))]
6586   "TARGET_F16FML"
6588   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6589   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6591   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6592                                                                  operands[1],
6593                                                                  operands[2],
6594                                                                  operands[3],
6595                                                                  p1, p2));
6596   DONE;
6599 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6600   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6601         (fma:VDQSF
6602          (float_extend:VDQSF
6603           (vec_select:<VFMLA_SEL_W>
6604            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6605            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6606          (float_extend:VDQSF
6607           (vec_select:<VFMLA_SEL_W>
6608            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6609            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6610          (match_operand:VDQSF 1 "register_operand" "0")))]
6611   "TARGET_F16FML"
6612   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6613   [(set_attr "type" "neon_fp_mul_s")]
6616 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6617   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6618         (fma:VDQSF
6619          (float_extend:VDQSF
6620           (neg:<VFMLA_SEL_W>
6621            (vec_select:<VFMLA_SEL_W>
6622             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6623             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6624          (float_extend:VDQSF
6625           (vec_select:<VFMLA_SEL_W>
6626            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6627            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6628          (match_operand:VDQSF 1 "register_operand" "0")))]
6629   "TARGET_F16FML"
6630   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6631   [(set_attr "type" "neon_fp_mul_s")]
6634 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6635   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6636         (fma:VDQSF
6637          (float_extend:VDQSF
6638           (vec_select:<VFMLA_SEL_W>
6639            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6640            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6641          (float_extend:VDQSF
6642           (vec_select:<VFMLA_SEL_W>
6643            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6644            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6645          (match_operand:VDQSF 1 "register_operand" "0")))]
6646   "TARGET_F16FML"
6647   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6648   [(set_attr "type" "neon_fp_mul_s")]
6651 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6652   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6653         (fma:VDQSF
6654          (float_extend:VDQSF
6655           (neg:<VFMLA_SEL_W>
6656            (vec_select:<VFMLA_SEL_W>
6657             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6658             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6659          (float_extend:VDQSF
6660           (vec_select:<VFMLA_SEL_W>
6661            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6662            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6663          (match_operand:VDQSF 1 "register_operand" "0")))]
6664   "TARGET_F16FML"
6665   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6666   [(set_attr "type" "neon_fp_mul_s")]
6669 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6670   [(set (match_operand:V2SF 0 "register_operand")
6671         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6672                            (match_operand:V4HF 2 "register_operand")
6673                            (match_operand:V4HF 3 "register_operand")
6674                            (match_operand:SI 4 "aarch64_imm2")]
6675          VFMLA16_LOW))]
6676   "TARGET_F16FML"
6678     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6679     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6681     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6682                                                             operands[1],
6683                                                             operands[2],
6684                                                             operands[3],
6685                                                             p1, lane));
6686     DONE;
6690 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6691   [(set (match_operand:V2SF 0 "register_operand")
6692         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6693                            (match_operand:V4HF 2 "register_operand")
6694                            (match_operand:V4HF 3 "register_operand")
6695                            (match_operand:SI 4 "aarch64_imm2")]
6696          VFMLA16_HIGH))]
6697   "TARGET_F16FML"
6699     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6700     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6702     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6703                                                              operands[1],
6704                                                              operands[2],
6705                                                              operands[3],
6706                                                              p1, lane));
6707     DONE;
6710 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6711   [(set (match_operand:V2SF 0 "register_operand" "=w")
6712         (fma:V2SF
6713          (float_extend:V2SF
6714            (vec_select:V2HF
6715             (match_operand:V4HF 2 "register_operand" "w")
6716             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6717          (float_extend:V2SF
6718            (vec_duplicate:V2HF
6719             (vec_select:HF
6720              (match_operand:V4HF 3 "register_operand" "x")
6721              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6722          (match_operand:V2SF 1 "register_operand" "0")))]
6723   "TARGET_F16FML"
6724   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6725   [(set_attr "type" "neon_fp_mul_s")]
6728 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6729   [(set (match_operand:V2SF 0 "register_operand" "=w")
6730         (fma:V2SF
6731          (float_extend:V2SF
6732           (neg:V2HF
6733            (vec_select:V2HF
6734             (match_operand:V4HF 2 "register_operand" "w")
6735             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6736          (float_extend:V2SF
6737           (vec_duplicate:V2HF
6738            (vec_select:HF
6739             (match_operand:V4HF 3 "register_operand" "x")
6740             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6741          (match_operand:V2SF 1 "register_operand" "0")))]
6742   "TARGET_F16FML"
6743   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6744   [(set_attr "type" "neon_fp_mul_s")]
6747 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6748   [(set (match_operand:V2SF 0 "register_operand" "=w")
6749         (fma:V2SF
6750          (float_extend:V2SF
6751            (vec_select:V2HF
6752             (match_operand:V4HF 2 "register_operand" "w")
6753             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6754          (float_extend:V2SF
6755            (vec_duplicate:V2HF
6756             (vec_select:HF
6757              (match_operand:V4HF 3 "register_operand" "x")
6758              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6759          (match_operand:V2SF 1 "register_operand" "0")))]
6760   "TARGET_F16FML"
6761   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6762   [(set_attr "type" "neon_fp_mul_s")]
6765 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6766   [(set (match_operand:V2SF 0 "register_operand" "=w")
6767         (fma:V2SF
6768          (float_extend:V2SF
6769            (neg:V2HF
6770             (vec_select:V2HF
6771              (match_operand:V4HF 2 "register_operand" "w")
6772              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6773          (float_extend:V2SF
6774            (vec_duplicate:V2HF
6775             (vec_select:HF
6776              (match_operand:V4HF 3 "register_operand" "x")
6777              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6778          (match_operand:V2SF 1 "register_operand" "0")))]
6779   "TARGET_F16FML"
6780   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6781   [(set_attr "type" "neon_fp_mul_s")]
6784 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6785   [(set (match_operand:V4SF 0 "register_operand")
6786         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6787                            (match_operand:V8HF 2 "register_operand")
6788                            (match_operand:V8HF 3 "register_operand")
6789                            (match_operand:SI 4 "aarch64_lane_imm3")]
6790          VFMLA16_LOW))]
6791   "TARGET_F16FML"
6793     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6794     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6796     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6797                                                               operands[1],
6798                                                               operands[2],
6799                                                               operands[3],
6800                                                               p1, lane));
6801     DONE;
6804 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6805   [(set (match_operand:V4SF 0 "register_operand")
6806         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6807                            (match_operand:V8HF 2 "register_operand")
6808                            (match_operand:V8HF 3 "register_operand")
6809                            (match_operand:SI 4 "aarch64_lane_imm3")]
6810          VFMLA16_HIGH))]
6811   "TARGET_F16FML"
6813     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6814     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6816     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6817                                                                operands[1],
6818                                                                operands[2],
6819                                                                operands[3],
6820                                                                p1, lane));
6821     DONE;
6824 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6825   [(set (match_operand:V4SF 0 "register_operand" "=w")
6826         (fma:V4SF
6827          (float_extend:V4SF
6828           (vec_select:V4HF
6829             (match_operand:V8HF 2 "register_operand" "w")
6830             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6831          (float_extend:V4SF
6832           (vec_duplicate:V4HF
6833            (vec_select:HF
6834             (match_operand:V8HF 3 "register_operand" "x")
6835             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6836          (match_operand:V4SF 1 "register_operand" "0")))]
6837   "TARGET_F16FML"
6838   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6839   [(set_attr "type" "neon_fp_mul_s")]
6842 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6843   [(set (match_operand:V4SF 0 "register_operand" "=w")
6844         (fma:V4SF
6845           (float_extend:V4SF
6846            (neg:V4HF
6847             (vec_select:V4HF
6848              (match_operand:V8HF 2 "register_operand" "w")
6849              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6850          (float_extend:V4SF
6851           (vec_duplicate:V4HF
6852            (vec_select:HF
6853             (match_operand:V8HF 3 "register_operand" "x")
6854             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6855          (match_operand:V4SF 1 "register_operand" "0")))]
6856   "TARGET_F16FML"
6857   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6858   [(set_attr "type" "neon_fp_mul_s")]
6861 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6862   [(set (match_operand:V4SF 0 "register_operand" "=w")
6863         (fma:V4SF
6864          (float_extend:V4SF
6865           (vec_select:V4HF
6866             (match_operand:V8HF 2 "register_operand" "w")
6867             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6868          (float_extend:V4SF
6869           (vec_duplicate:V4HF
6870            (vec_select:HF
6871             (match_operand:V8HF 3 "register_operand" "x")
6872             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6873          (match_operand:V4SF 1 "register_operand" "0")))]
6874   "TARGET_F16FML"
6875   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6876   [(set_attr "type" "neon_fp_mul_s")]
6879 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6880   [(set (match_operand:V4SF 0 "register_operand" "=w")
6881         (fma:V4SF
6882          (float_extend:V4SF
6883           (neg:V4HF
6884            (vec_select:V4HF
6885             (match_operand:V8HF 2 "register_operand" "w")
6886             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6887          (float_extend:V4SF
6888           (vec_duplicate:V4HF
6889            (vec_select:HF
6890             (match_operand:V8HF 3 "register_operand" "x")
6891             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6892          (match_operand:V4SF 1 "register_operand" "0")))]
6893   "TARGET_F16FML"
6894   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6895   [(set_attr "type" "neon_fp_mul_s")]
6898 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6899   [(set (match_operand:V2SF 0 "register_operand")
6900         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6901                       (match_operand:V4HF 2 "register_operand")
6902                       (match_operand:V8HF 3 "register_operand")
6903                       (match_operand:SI 4 "aarch64_lane_imm3")]
6904          VFMLA16_LOW))]
6905   "TARGET_F16FML"
6907     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6908     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6910     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6911                                                              operands[1],
6912                                                              operands[2],
6913                                                              operands[3],
6914                                                              p1, lane));
6915     DONE;
6919 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6920   [(set (match_operand:V2SF 0 "register_operand")
6921         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6922                       (match_operand:V4HF 2 "register_operand")
6923                       (match_operand:V8HF 3 "register_operand")
6924                       (match_operand:SI 4 "aarch64_lane_imm3")]
6925          VFMLA16_HIGH))]
6926   "TARGET_F16FML"
6928     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6929     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6931     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6932                                                               operands[1],
6933                                                               operands[2],
6934                                                               operands[3],
6935                                                               p1, lane));
6936     DONE;
6940 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6941   [(set (match_operand:V2SF 0 "register_operand" "=w")
6942         (fma:V2SF
6943          (float_extend:V2SF
6944            (vec_select:V2HF
6945             (match_operand:V4HF 2 "register_operand" "w")
6946             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6947          (float_extend:V2SF
6948           (vec_duplicate:V2HF
6949            (vec_select:HF
6950             (match_operand:V8HF 3 "register_operand" "x")
6951             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6952          (match_operand:V2SF 1 "register_operand" "0")))]
6953   "TARGET_F16FML"
6954   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6955   [(set_attr "type" "neon_fp_mul_s")]
6958 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6959   [(set (match_operand:V2SF 0 "register_operand" "=w")
6960         (fma:V2SF
6961          (float_extend:V2SF
6962           (neg:V2HF
6963            (vec_select:V2HF
6964             (match_operand:V4HF 2 "register_operand" "w")
6965             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6966          (float_extend:V2SF
6967           (vec_duplicate:V2HF
6968            (vec_select:HF
6969             (match_operand:V8HF 3 "register_operand" "x")
6970             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6971          (match_operand:V2SF 1 "register_operand" "0")))]
6972   "TARGET_F16FML"
6973   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6974   [(set_attr "type" "neon_fp_mul_s")]
6977 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6978   [(set (match_operand:V2SF 0 "register_operand" "=w")
6979         (fma:V2SF
6980          (float_extend:V2SF
6981            (vec_select:V2HF
6982             (match_operand:V4HF 2 "register_operand" "w")
6983             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6984          (float_extend:V2SF
6985           (vec_duplicate:V2HF
6986            (vec_select:HF
6987             (match_operand:V8HF 3 "register_operand" "x")
6988             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6989          (match_operand:V2SF 1 "register_operand" "0")))]
6990   "TARGET_F16FML"
6991   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6992   [(set_attr "type" "neon_fp_mul_s")]
6995 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6996   [(set (match_operand:V2SF 0 "register_operand" "=w")
6997         (fma:V2SF
6998          (float_extend:V2SF
6999           (neg:V2HF
7000            (vec_select:V2HF
7001             (match_operand:V4HF 2 "register_operand" "w")
7002             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7003          (float_extend:V2SF
7004           (vec_duplicate:V2HF
7005            (vec_select:HF
7006             (match_operand:V8HF 3 "register_operand" "x")
7007             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7008          (match_operand:V2SF 1 "register_operand" "0")))]
7009   "TARGET_F16FML"
7010   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
7011   [(set_attr "type" "neon_fp_mul_s")]
7014 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
7015   [(set (match_operand:V4SF 0 "register_operand")
7016         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7017                       (match_operand:V8HF 2 "register_operand")
7018                       (match_operand:V4HF 3 "register_operand")
7019                       (match_operand:SI 4 "aarch64_imm2")]
7020          VFMLA16_LOW))]
7021   "TARGET_F16FML"
7023     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
7024     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7026     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
7027                                                              operands[1],
7028                                                              operands[2],
7029                                                              operands[3],
7030                                                              p1, lane));
7031     DONE;
7034 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
7035   [(set (match_operand:V4SF 0 "register_operand")
7036         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7037                       (match_operand:V8HF 2 "register_operand")
7038                       (match_operand:V4HF 3 "register_operand")
7039                       (match_operand:SI 4 "aarch64_imm2")]
7040          VFMLA16_HIGH))]
7041   "TARGET_F16FML"
7043     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
7044     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7046     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
7047                                                               operands[1],
7048                                                               operands[2],
7049                                                               operands[3],
7050                                                               p1, lane));
7051     DONE;
7054 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
7055   [(set (match_operand:V4SF 0 "register_operand" "=w")
7056         (fma:V4SF
7057          (float_extend:V4SF
7058           (vec_select:V4HF
7059            (match_operand:V8HF 2 "register_operand" "w")
7060            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7061          (float_extend:V4SF
7062           (vec_duplicate:V4HF
7063            (vec_select:HF
7064             (match_operand:V4HF 3 "register_operand" "x")
7065             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7066          (match_operand:V4SF 1 "register_operand" "0")))]
7067   "TARGET_F16FML"
7068   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7069   [(set_attr "type" "neon_fp_mul_s")]
7072 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
7073   [(set (match_operand:V4SF 0 "register_operand" "=w")
7074         (fma:V4SF
7075          (float_extend:V4SF
7076           (neg:V4HF
7077            (vec_select:V4HF
7078             (match_operand:V8HF 2 "register_operand" "w")
7079             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7080          (float_extend:V4SF
7081           (vec_duplicate:V4HF
7082            (vec_select:HF
7083             (match_operand:V4HF 3 "register_operand" "x")
7084             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7085          (match_operand:V4SF 1 "register_operand" "0")))]
7086   "TARGET_F16FML"
7087   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7088   [(set_attr "type" "neon_fp_mul_s")]
7091 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
7092   [(set (match_operand:V4SF 0 "register_operand" "=w")
7093         (fma:V4SF
7094          (float_extend:V4SF
7095           (vec_select:V4HF
7096            (match_operand:V8HF 2 "register_operand" "w")
7097            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7098          (float_extend:V4SF
7099           (vec_duplicate:V4HF
7100            (vec_select:HF
7101             (match_operand:V4HF 3 "register_operand" "x")
7102             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7103          (match_operand:V4SF 1 "register_operand" "0")))]
7104   "TARGET_F16FML"
7105   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7106   [(set_attr "type" "neon_fp_mul_s")]
7109 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
7110   [(set (match_operand:V4SF 0 "register_operand" "=w")
7111         (fma:V4SF
7112          (float_extend:V4SF
7113           (neg:V4HF
7114            (vec_select:V4HF
7115             (match_operand:V8HF 2 "register_operand" "w")
7116             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7117          (float_extend:V4SF
7118           (vec_duplicate:V4HF
7119            (vec_select:HF
7120             (match_operand:V4HF 3 "register_operand" "x")
7121             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7122          (match_operand:V4SF 1 "register_operand" "0")))]
7123   "TARGET_F16FML"
7124   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7125   [(set_attr "type" "neon_fp_mul_s")]
7128 ;; pmull
7130 (define_insn "aarch64_crypto_pmulldi"
7131   [(set (match_operand:TI 0 "register_operand" "=w")
7132         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
7133                      (match_operand:DI 2 "register_operand" "w")]
7134                     UNSPEC_PMULL))]
7135  "TARGET_SIMD && TARGET_AES"
7136  "pmull\\t%0.1q, %1.1d, %2.1d"
7137   [(set_attr "type" "crypto_pmull")]
7140 (define_insn "aarch64_crypto_pmullv2di"
7141  [(set (match_operand:TI 0 "register_operand" "=w")
7142        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
7143                    (match_operand:V2DI 2 "register_operand" "w")]
7144                   UNSPEC_PMULL2))]
7145   "TARGET_SIMD && TARGET_AES"
7146   "pmull2\\t%0.1q, %1.2d, %2.2d"
7147   [(set_attr "type" "crypto_pmull")]
7150 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
7151 (define_insn "<optab><Vnarrowq><mode>2"
7152   [(set (match_operand:VQN 0 "register_operand" "=w")
7153         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7154   "TARGET_SIMD"
7155   "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
7156   [(set_attr "type" "neon_shift_imm_long")]
7159 ;; Truncate a 128-bit integer vector to a 64-bit vector.
7160 (define_insn "trunc<mode><Vnarrowq>2"
7161   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7162         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7163   "TARGET_SIMD"
7164   "xtn\t%0.<Vntype>, %1.<Vtype>"
7165   [(set_attr "type" "neon_shift_imm_narrow_q")]
7168 (define_insn "aarch64_bfdot<mode>"
7169   [(set (match_operand:VDQSF 0 "register_operand" "=w")
7170         (plus:VDQSF
7171           (unspec:VDQSF
7172            [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
7173             (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
7174             UNSPEC_BFDOT)
7175           (match_operand:VDQSF 1 "register_operand" "0")))]
7176   "TARGET_BF16_SIMD"
7177   "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
7178   [(set_attr "type" "neon_dot<q>")]
7181 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
7182   [(set (match_operand:VDQSF 0 "register_operand" "=w")
7183         (plus:VDQSF
7184           (unspec:VDQSF
7185            [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
7186             (match_operand:VBF 3 "register_operand" "w")
7187             (match_operand:SI 4 "const_int_operand" "n")]
7188             UNSPEC_BFDOT)
7189           (match_operand:VDQSF 1 "register_operand" "0")))]
7190   "TARGET_BF16_SIMD"
7192   int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
7193   int lane = INTVAL (operands[4]);
7194   operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
7195   return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
7197   [(set_attr "type" "neon_dot<VDQSF:q>")]
7200 ;; bfmmla
7201 (define_insn "aarch64_bfmmlaqv4sf"
7202   [(set (match_operand:V4SF 0 "register_operand" "=w")
7203         (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
7204                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7205                                  (match_operand:V8BF 3 "register_operand" "w")]
7206                     UNSPEC_BFMMLA)))]
7207   "TARGET_BF16_SIMD"
7208   "bfmmla\\t%0.4s, %2.8h, %3.8h"
7209   [(set_attr "type" "neon_fp_mla_s_q")]
7212 ;; bfmlal<bt>
7213 (define_insn "aarch64_bfmlal<bt>v4sf"
7214   [(set (match_operand:V4SF 0 "register_operand" "=w")
7215         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7216                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7217                                   (match_operand:V8BF 3 "register_operand" "w")]
7218                      BF_MLA)))]
7219   "TARGET_BF16_SIMD"
7220   "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
7221   [(set_attr "type" "neon_fp_mla_s_q")]
7224 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
7225   [(set (match_operand:V4SF 0 "register_operand" "=w")
7226         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7227                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7228                                   (match_operand:VBF 3 "register_operand" "w")
7229                                   (match_operand:SI 4 "const_int_operand" "n")]
7230                      BF_MLA)))]
7231   "TARGET_BF16_SIMD"
7233   operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
7234   return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
7236   [(set_attr "type" "neon_fp_mla_s_scalar_q")]
7239 ;; 8-bit integer matrix multiply-accumulate
7240 (define_insn "aarch64_simd_<sur>mmlav16qi"
7241   [(set (match_operand:V4SI 0 "register_operand" "=w")
7242         (plus:V4SI
7243          (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
7244                        (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
7245          (match_operand:V4SI 1 "register_operand" "0")))]
7246   "TARGET_I8MM"
7247   "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
7248   [(set_attr "type" "neon_mla_s_q")]
7251 ;; bfcvtn
7252 (define_insn "aarch64_bfcvtn<q><mode>"
7253   [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
7254         (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
7255                             UNSPEC_BFCVTN))]
7256   "TARGET_BF16_SIMD"
7257   "bfcvtn\\t%0.4h, %1.4s"
7258   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7261 (define_insn "aarch64_bfcvtn2v8bf"
7262   [(set (match_operand:V8BF 0 "register_operand" "=w")
7263         (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
7264                       (match_operand:V4SF 2 "register_operand" "w")]
7265                       UNSPEC_BFCVTN2))]
7266   "TARGET_BF16_SIMD"
7267   "bfcvtn2\\t%0.8h, %2.4s"
7268   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7271 (define_insn "aarch64_bfcvtbf"
7272   [(set (match_operand:BF 0 "register_operand" "=w")
7273         (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
7274                     UNSPEC_BFCVT))]
7275   "TARGET_BF16_FP"
7276   "bfcvt\\t%h0, %s1"
7277   [(set_attr "type" "f_cvt")]