[AArch64] Fix aarch64_ira_change_pseudo_allocno_class
[official-gcc.git] / gcc / config / aarch64 / aarch64-simd.md
blob962386981afb94ba4039fd19dc298e122101dbeb
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3.  If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23         (match_operand:VALL_F16 1 "general_operand" ""))]
24   "TARGET_SIMD"
25   "
26   /* Force the operand into a register if it is not an
27      immediate whose use can be replaced with xzr.
28      If the mode is 16 bytes wide, then we will be doing
29      a stp in DI mode, so we check the validity of that.
30      If the mode is 8 bytes wide, then we will do doing a
31      normal str, so the check need not apply.  */
32   if (GET_CODE (operands[0]) == MEM
33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35                 && aarch64_mem_pair_operand (operands[0], DImode))
36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37       operands[1] = force_reg (<MODE>mode, operands[1]);
38   "
41 (define_expand "movmisalign<mode>"
42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43         (match_operand:VALL 1 "general_operand" ""))]
44   "TARGET_SIMD"
46   /* This pattern is not permitted to fail during expansion: if both arguments
47      are non-registers (e.g. memory := constant, which can be created by the
48      auto-vectorizer), force operand 1 into a register.  */
49   if (!register_operand (operands[0], <MODE>mode)
50       && !register_operand (operands[1], <MODE>mode))
51     operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56         (vec_duplicate:VDQ_I
57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58   "TARGET_SIMD"
59   "@
60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
61    dup\\t%0.<Vtype>, %<vw>1"
62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67         (vec_duplicate:VDQF_F16
68           (match_operand:<VEL> 1 "register_operand" "w")))]
69   "TARGET_SIMD"
70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71   [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76         (vec_duplicate:VALL_F16
77           (vec_select:<VEL>
78             (match_operand:VALL_F16 1 "register_operand" "w")
79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80           )))]
81   "TARGET_SIMD"
82   {
83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85   }
86   [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91         (vec_duplicate:VALL_F16_NO_V2Q
92           (vec_select:<VEL>
93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95           )))]
96   "TARGET_SIMD"
97   {
98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100   }
101   [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105   [(set (match_operand:VD 0 "nonimmediate_operand"
106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
107         (match_operand:VD 1 "general_operand"
108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
109   "TARGET_SIMD
110    && (register_operand (operands[0], <MODE>mode)
111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113    switch (which_alternative)
114      {
115      case 0: return "ldr\t%d0, %1";
116      case 1: return "str\txzr, %0";
117      case 2: return "str\t%d1, %0";
118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119      case 4: return "umov\t%0, %1.d[0]";
120      case 5: return "fmov\t%d0, %1";
121      case 6: return "mov\t%0, %1";
122      case 7:
123         return aarch64_output_simd_mov_immediate (operands[1], 64);
124      default: gcc_unreachable ();
125      }
127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
129                      mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133   [(set (match_operand:VQ 0 "nonimmediate_operand"
134                 "=w, Umq,  m,  w, ?r, ?w, ?r, w")
135         (match_operand:VQ 1 "general_operand"
136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
137   "TARGET_SIMD
138    && (register_operand (operands[0], <MODE>mode)
139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141   switch (which_alternative)
142     {
143     case 0:
144         return "ldr\t%q0, %1";
145     case 1:
146         return "stp\txzr, xzr, %0";
147     case 2:
148         return "str\t%q1, %0";
149     case 3:
150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151     case 4:
152     case 5:
153     case 6:
154         return "#";
155     case 7:
156         return aarch64_output_simd_mov_immediate (operands[1], 128);
157     default:
158         gcc_unreachable ();
159     }
161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162                      neon_logic<q>, multiple, multiple,\
163                      multiple, neon_move<q>")
164    (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
170 (define_insn "aarch64_store_lane0<mode>"
171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
174   "TARGET_SIMD
175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176   "str\\t%<Vetype>1, %0"
177   [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181   [(set (match_operand:DREG 0 "register_operand" "=w")
182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183    (set (match_operand:DREG2 2 "register_operand" "=w")
184         (match_operand:DREG2 3 "memory_operand" "m"))]
185   "TARGET_SIMD
186    && rtx_equal_p (XEXP (operands[3], 0),
187                    plus_constant (Pmode,
188                                   XEXP (operands[1], 0),
189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
190   "ldp\\t%d0, %d2, %1"
191   [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196         (match_operand:DREG 1 "register_operand" "w"))
197    (set (match_operand:DREG2 2 "memory_operand" "=m")
198         (match_operand:DREG2 3 "register_operand" "w"))]
199   "TARGET_SIMD
200    && rtx_equal_p (XEXP (operands[2], 0),
201                    plus_constant (Pmode,
202                                   XEXP (operands[0], 0),
203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
204   "stp\\t%d1, %d3, %0"
205   [(set_attr "type" "neon_stp")]
208 (define_split
209   [(set (match_operand:VQ 0 "register_operand" "")
210       (match_operand:VQ 1 "register_operand" ""))]
211   "TARGET_SIMD && reload_completed
212    && GP_REGNUM_P (REGNO (operands[0]))
213    && GP_REGNUM_P (REGNO (operands[1]))"
214   [(const_int 0)]
216   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
217   DONE;
220 (define_split
221   [(set (match_operand:VQ 0 "register_operand" "")
222         (match_operand:VQ 1 "register_operand" ""))]
223   "TARGET_SIMD && reload_completed
224    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
225        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
226   [(const_int 0)]
228   aarch64_split_simd_move (operands[0], operands[1]);
229   DONE;
232 (define_expand "aarch64_split_simd_mov<mode>"
233   [(set (match_operand:VQ 0)
234         (match_operand:VQ 1))]
235   "TARGET_SIMD"
236   {
237     rtx dst = operands[0];
238     rtx src = operands[1];
240     if (GP_REGNUM_P (REGNO (src)))
241       {
242         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
243         rtx src_high_part = gen_highpart (<VHALF>mode, src);
245         emit_insn
246           (gen_move_lo_quad_<mode> (dst, src_low_part));
247         emit_insn
248           (gen_move_hi_quad_<mode> (dst, src_high_part));
249       }
251     else
252       {
253         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
254         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
255         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
256         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
258         emit_insn
259           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
260         emit_insn
261           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
262       }
263     DONE;
264   }
267 (define_insn "aarch64_simd_mov_from_<mode>low"
268   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
269         (vec_select:<VHALF>
270           (match_operand:VQ 1 "register_operand" "w")
271           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
272   "TARGET_SIMD && reload_completed"
273   "umov\t%0, %1.d[0]"
274   [(set_attr "type" "neon_to_gp<q>")
275    (set_attr "length" "4")
276   ])
278 (define_insn "aarch64_simd_mov_from_<mode>high"
279   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
280         (vec_select:<VHALF>
281           (match_operand:VQ 1 "register_operand" "w")
282           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
283   "TARGET_SIMD && reload_completed"
284   "umov\t%0, %1.d[1]"
285   [(set_attr "type" "neon_to_gp<q>")
286    (set_attr "length" "4")
287   ])
289 (define_insn "orn<mode>3"
290  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
291        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
292                 (match_operand:VDQ_I 2 "register_operand" "w")))]
293  "TARGET_SIMD"
294  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
295   [(set_attr "type" "neon_logic<q>")]
298 (define_insn "bic<mode>3"
299  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
300        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
301                 (match_operand:VDQ_I 2 "register_operand" "w")))]
302  "TARGET_SIMD"
303  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
304   [(set_attr "type" "neon_logic<q>")]
307 (define_insn "add<mode>3"
308   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
309         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
310                   (match_operand:VDQ_I 2 "register_operand" "w")))]
311   "TARGET_SIMD"
312   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
313   [(set_attr "type" "neon_add<q>")]
316 (define_insn "sub<mode>3"
317   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
318         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
319                    (match_operand:VDQ_I 2 "register_operand" "w")))]
320   "TARGET_SIMD"
321   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
322   [(set_attr "type" "neon_sub<q>")]
325 (define_insn "mul<mode>3"
326   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
327         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
328                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
329   "TARGET_SIMD"
330   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
331   [(set_attr "type" "neon_mul_<Vetype><q>")]
334 (define_insn "bswap<mode>2"
335   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
336         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
337   "TARGET_SIMD"
338   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
339   [(set_attr "type" "neon_rev<q>")]
342 (define_insn "aarch64_rbit<mode>"
343   [(set (match_operand:VB 0 "register_operand" "=w")
344         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
345                    UNSPEC_RBIT))]
346   "TARGET_SIMD"
347   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
348   [(set_attr "type" "neon_rbit")]
351 (define_expand "ctz<mode>2"
352   [(set (match_operand:VS 0 "register_operand")
353         (ctz:VS (match_operand:VS 1 "register_operand")))]
354   "TARGET_SIMD"
355   {
356      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
357      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
358                                              <MODE>mode, 0);
359      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
360      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
361      DONE;
362   }
365 (define_expand "xorsign<mode>3"
366   [(match_operand:VHSDF 0 "register_operand")
367    (match_operand:VHSDF 1 "register_operand")
368    (match_operand:VHSDF 2 "register_operand")]
369   "TARGET_SIMD"
372   machine_mode imode = <V_INT_EQUIV>mode;
373   rtx v_bitmask = gen_reg_rtx (imode);
374   rtx op1x = gen_reg_rtx (imode);
375   rtx op2x = gen_reg_rtx (imode);
377   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
378   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
380   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
382   emit_move_insn (v_bitmask,
383                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
384                                                      HOST_WIDE_INT_M1U << bits));
386   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
387   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
388   emit_move_insn (operands[0],
389                   lowpart_subreg (<MODE>mode, op1x, imode));
390   DONE;
394 ;; These instructions map to the __builtins for the Dot Product operations.
395 (define_insn "aarch64_<sur>dot<vsi2qi>"
396   [(set (match_operand:VS 0 "register_operand" "=w")
397         (plus:VS (match_operand:VS 1 "register_operand" "0")
398                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
399                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
400                 DOTPROD)))]
401   "TARGET_DOTPROD"
402   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
403   [(set_attr "type" "neon_dot")]
406 ;; These expands map to the Dot Product optab the vectorizer checks for.
407 ;; The auto-vectorizer expects a dot product builtin that also does an
408 ;; accumulation into the provided register.
409 ;; Given the following pattern
411 ;; for (i=0; i<len; i++) {
412 ;;     c = a[i] * b[i];
413 ;;     r += c;
414 ;; }
415 ;; return result;
417 ;; This can be auto-vectorized to
418 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
420 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
421 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
422 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
423 ;; ...
425 ;; and so the vectorizer provides r, in which the result has to be accumulated.
426 (define_expand "<sur>dot_prod<vsi2qi>"
427   [(set (match_operand:VS 0 "register_operand")
428         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
429                             (match_operand:<VSI2QI> 2 "register_operand")]
430                  DOTPROD)
431                 (match_operand:VS 3 "register_operand")))]
432   "TARGET_DOTPROD"
434   emit_insn (
435     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
436                                     operands[2]));
437   emit_insn (gen_rtx_SET (operands[0], operands[3]));
438   DONE;
441 ;; These instructions map to the __builtins for the Dot Product
442 ;; indexed operations.
443 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
444   [(set (match_operand:VS 0 "register_operand" "=w")
445         (plus:VS (match_operand:VS 1 "register_operand" "0")
446                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
447                             (match_operand:V8QI 3 "register_operand" "<h_con>")
448                             (match_operand:SI 4 "immediate_operand" "i")]
449                 DOTPROD)))]
450   "TARGET_DOTPROD"
451   {
452     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
453     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
454   }
455   [(set_attr "type" "neon_dot")]
458 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
459   [(set (match_operand:VS 0 "register_operand" "=w")
460         (plus:VS (match_operand:VS 1 "register_operand" "0")
461                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
462                             (match_operand:V16QI 3 "register_operand" "<h_con>")
463                             (match_operand:SI 4 "immediate_operand" "i")]
464                 DOTPROD)))]
465   "TARGET_DOTPROD"
466   {
467     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
468     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
469   }
470   [(set_attr "type" "neon_dot")]
473 (define_expand "copysign<mode>3"
474   [(match_operand:VHSDF 0 "register_operand")
475    (match_operand:VHSDF 1 "register_operand")
476    (match_operand:VHSDF 2 "register_operand")]
477   "TARGET_FLOAT && TARGET_SIMD"
479   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
480   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
482   emit_move_insn (v_bitmask,
483                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
484                                                      HOST_WIDE_INT_M1U << bits));
485   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
486                                          operands[2], operands[1]));
487   DONE;
491 (define_insn "*aarch64_mul3_elt<mode>"
492  [(set (match_operand:VMUL 0 "register_operand" "=w")
493     (mult:VMUL
494       (vec_duplicate:VMUL
495           (vec_select:<VEL>
496             (match_operand:VMUL 1 "register_operand" "<h_con>")
497             (parallel [(match_operand:SI 2 "immediate_operand")])))
498       (match_operand:VMUL 3 "register_operand" "w")))]
499   "TARGET_SIMD"
500   {
501     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
502     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
503   }
504   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
507 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
508   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
509      (mult:VMUL_CHANGE_NLANES
510        (vec_duplicate:VMUL_CHANGE_NLANES
511           (vec_select:<VEL>
512             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
513             (parallel [(match_operand:SI 2 "immediate_operand")])))
514       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
515   "TARGET_SIMD"
516   {
517     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
518     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
519   }
520   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
523 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
524  [(set (match_operand:VMUL 0 "register_operand" "=w")
525     (mult:VMUL
526       (vec_duplicate:VMUL
527             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
528       (match_operand:VMUL 2 "register_operand" "w")))]
529   "TARGET_SIMD"
530   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
531   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
534 (define_insn "aarch64_rsqrte<mode>"
535   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
536         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
537                      UNSPEC_RSQRTE))]
538   "TARGET_SIMD"
539   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
540   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
542 (define_insn "aarch64_rsqrts<mode>"
543   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
544         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
545                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
546          UNSPEC_RSQRTS))]
547   "TARGET_SIMD"
548   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
549   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
551 (define_expand "rsqrt<mode>2"
552   [(set (match_operand:VALLF 0 "register_operand" "=w")
553         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
554                      UNSPEC_RSQRT))]
555   "TARGET_SIMD"
557   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
558   DONE;
561 (define_insn "*aarch64_mul3_elt_to_64v2df"
562   [(set (match_operand:DF 0 "register_operand" "=w")
563      (mult:DF
564        (vec_select:DF
565          (match_operand:V2DF 1 "register_operand" "w")
566          (parallel [(match_operand:SI 2 "immediate_operand")]))
567        (match_operand:DF 3 "register_operand" "w")))]
568   "TARGET_SIMD"
569   {
570     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
571     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
572   }
573   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
576 (define_insn "neg<mode>2"
577   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
578         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
579   "TARGET_SIMD"
580   "neg\t%0.<Vtype>, %1.<Vtype>"
581   [(set_attr "type" "neon_neg<q>")]
584 (define_insn "abs<mode>2"
585   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
586         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
587   "TARGET_SIMD"
588   "abs\t%0.<Vtype>, %1.<Vtype>"
589   [(set_attr "type" "neon_abs<q>")]
592 ;; The intrinsic version of integer ABS must not be allowed to
593 ;; combine with any operation with an integerated ABS step, such
594 ;; as SABD.
595 (define_insn "aarch64_abs<mode>"
596   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
597           (unspec:VSDQ_I_DI
598             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
599            UNSPEC_ABS))]
600   "TARGET_SIMD"
601   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
602   [(set_attr "type" "neon_abs<q>")]
605 (define_insn "abd<mode>_3"
606   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
607         (abs:VDQ_BHSI (minus:VDQ_BHSI
608                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
609                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
610   "TARGET_SIMD"
611   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
612   [(set_attr "type" "neon_abd<q>")]
615 (define_insn "aarch64_<sur>abdl2<mode>_3"
616   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
617         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
618                           (match_operand:VDQV_S 2 "register_operand" "w")]
619         ABDL2))]
620   "TARGET_SIMD"
621   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
622   [(set_attr "type" "neon_abd<q>")]
625 (define_insn "aarch64_<sur>abal<mode>_4"
626   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
627         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
628                           (match_operand:VDQV_S 2 "register_operand" "w")
629                          (match_operand:<VDBLW> 3 "register_operand" "0")]
630         ABAL))]
631   "TARGET_SIMD"
632   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
633   [(set_attr "type" "neon_arith_acc<q>")]
636 (define_insn "aarch64_<sur>adalp<mode>_3"
637   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
638         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
639                           (match_operand:<VDBLW> 2 "register_operand" "0")]
640         ADALP))]
641   "TARGET_SIMD"
642   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
643   [(set_attr "type" "neon_reduc_add<q>")]
646 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
647 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
648 ;; reduction of the difference into a V4SI vector and accumulate that into
649 ;; operand 3 before copying that into the result operand 0.
650 ;; Perform that with a sequence of:
651 ;; UABDL2       tmp.8h, op1.16b, op2.16b
652 ;; UABAL        tmp.8h, op1.16b, op2.16b
653 ;; UADALP       op3.4s, tmp.8h
654 ;; MOV          op0, op3 // should be eliminated in later passes.
655 ;; The signed version just uses the signed variants of the above instructions.
657 (define_expand "<sur>sadv16qi"
658   [(use (match_operand:V4SI 0 "register_operand"))
659    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
660                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
661    (use (match_operand:V4SI 3 "register_operand"))]
662   "TARGET_SIMD"
663   {
664     rtx reduc = gen_reg_rtx (V8HImode);
665     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
666                                                operands[2]));
667     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
668                                               operands[2], reduc));
669     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
670                                               operands[3]));
671     emit_move_insn (operands[0], operands[3]);
672     DONE;
673   }
676 (define_insn "aba<mode>_3"
677   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
678         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
679                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
680                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
681                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
682   "TARGET_SIMD"
683   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
684   [(set_attr "type" "neon_arith_acc<q>")]
687 (define_insn "fabd<mode>3"
688   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
689         (abs:VHSDF_HSDF
690           (minus:VHSDF_HSDF
691             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
692             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
693   "TARGET_SIMD"
694   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
695   [(set_attr "type" "neon_fp_abd_<stype><q>")]
698 ;; For AND (vector, register) and BIC (vector, immediate)
699 (define_insn "and<mode>3"
700   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
701         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
702                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
703   "TARGET_SIMD"
704   {
705     switch (which_alternative)
706       {
707       case 0:
708         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
709       case 1:
710         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
711                                                   AARCH64_CHECK_BIC);
712       default:
713         gcc_unreachable ();
714       }
715   }
716   [(set_attr "type" "neon_logic<q>")]
719 ;; For ORR (vector, register) and ORR (vector, immediate)
720 (define_insn "ior<mode>3"
721   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
722         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
723                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
724   "TARGET_SIMD"
725   {
726     switch (which_alternative)
727       {
728       case 0:
729         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
730       case 1:
731         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
732                                                   AARCH64_CHECK_ORR);
733       default:
734         gcc_unreachable ();
735       }
736   }
737   [(set_attr "type" "neon_logic<q>")]
740 (define_insn "xor<mode>3"
741   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
742         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
743                  (match_operand:VDQ_I 2 "register_operand" "w")))]
744   "TARGET_SIMD"
745   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
746   [(set_attr "type" "neon_logic<q>")]
749 (define_insn "one_cmpl<mode>2"
750   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
751         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
752   "TARGET_SIMD"
753   "not\t%0.<Vbtype>, %1.<Vbtype>"
754   [(set_attr "type" "neon_logic<q>")]
757 (define_insn "aarch64_simd_vec_set<mode>"
758   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
759         (vec_merge:VALL_F16
760             (vec_duplicate:VALL_F16
761                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
762             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
763             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
764   "TARGET_SIMD"
765   {
766    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
767    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
768    switch (which_alternative)
769      {
770      case 0:
771         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
772      case 1:
773         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
774      case 2:
775         return "ld1\\t{%0.<Vetype>}[%p2], %1";
776      default:
777         gcc_unreachable ();
778      }
779   }
780   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
783 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
784   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
785         (vec_merge:VALL_F16
786             (vec_duplicate:VALL_F16
787               (vec_select:<VEL>
788                 (match_operand:VALL_F16 3 "register_operand" "w")
789                 (parallel
790                   [(match_operand:SI 4 "immediate_operand" "i")])))
791             (match_operand:VALL_F16 1 "register_operand" "0")
792             (match_operand:SI 2 "immediate_operand" "i")))]
793   "TARGET_SIMD"
794   {
795     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
796     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
797     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
799     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
800   }
801   [(set_attr "type" "neon_ins<q>")]
804 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
805   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
806         (vec_merge:VALL_F16_NO_V2Q
807             (vec_duplicate:VALL_F16_NO_V2Q
808               (vec_select:<VEL>
809                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
810                 (parallel
811                   [(match_operand:SI 4 "immediate_operand" "i")])))
812             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
813             (match_operand:SI 2 "immediate_operand" "i")))]
814   "TARGET_SIMD"
815   {
816     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
817     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
818     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
819                                            INTVAL (operands[4]));
821     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
822   }
823   [(set_attr "type" "neon_ins<q>")]
826 (define_insn "aarch64_simd_lshr<mode>"
827  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
828        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
829                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
830  "TARGET_SIMD"
831  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
832   [(set_attr "type" "neon_shift_imm<q>")]
835 (define_insn "aarch64_simd_ashr<mode>"
836  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
837        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
838                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
839  "TARGET_SIMD"
840  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
841   [(set_attr "type" "neon_shift_imm<q>")]
844 (define_insn "aarch64_simd_imm_shl<mode>"
845  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
846        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
847                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
848  "TARGET_SIMD"
849   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
850   [(set_attr "type" "neon_shift_imm<q>")]
853 (define_insn "aarch64_simd_reg_sshl<mode>"
854  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
855        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
856                    (match_operand:VDQ_I 2 "register_operand" "w")))]
857  "TARGET_SIMD"
858  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
859   [(set_attr "type" "neon_shift_reg<q>")]
862 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
863  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
864        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
865                     (match_operand:VDQ_I 2 "register_operand" "w")]
866                    UNSPEC_ASHIFT_UNSIGNED))]
867  "TARGET_SIMD"
868  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
869   [(set_attr "type" "neon_shift_reg<q>")]
872 (define_insn "aarch64_simd_reg_shl<mode>_signed"
873  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
875                     (match_operand:VDQ_I 2 "register_operand" "w")]
876                    UNSPEC_ASHIFT_SIGNED))]
877  "TARGET_SIMD"
878  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
879   [(set_attr "type" "neon_shift_reg<q>")]
882 (define_expand "ashl<mode>3"
883   [(match_operand:VDQ_I 0 "register_operand" "")
884    (match_operand:VDQ_I 1 "register_operand" "")
885    (match_operand:SI  2 "general_operand" "")]
886  "TARGET_SIMD"
888   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
889   int shift_amount;
891   if (CONST_INT_P (operands[2]))
892     {
893       shift_amount = INTVAL (operands[2]);
894       if (shift_amount >= 0 && shift_amount < bit_width)
895         {
896           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
897                                                        shift_amount);
898           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
899                                                      operands[1],
900                                                      tmp));
901           DONE;
902         }
903       else
904         {
905           operands[2] = force_reg (SImode, operands[2]);
906         }
907     }
908   else if (MEM_P (operands[2]))
909     {
910       operands[2] = force_reg (SImode, operands[2]);
911     }
913   if (REG_P (operands[2]))
914     {
915       rtx tmp = gen_reg_rtx (<MODE>mode);
916       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
917                                              convert_to_mode (<VEL>mode,
918                                                               operands[2],
919                                                               0)));
920       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
921                                                   tmp));
922       DONE;
923     }
924   else
925     FAIL;
929 (define_expand "lshr<mode>3"
930   [(match_operand:VDQ_I 0 "register_operand" "")
931    (match_operand:VDQ_I 1 "register_operand" "")
932    (match_operand:SI  2 "general_operand" "")]
933  "TARGET_SIMD"
935   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
936   int shift_amount;
938   if (CONST_INT_P (operands[2]))
939     {
940       shift_amount = INTVAL (operands[2]);
941       if (shift_amount > 0 && shift_amount <= bit_width)
942         {
943           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
944                                                        shift_amount);
945           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
946                                                   operands[1],
947                                                   tmp));
948           DONE;
949         }
950       else
951         operands[2] = force_reg (SImode, operands[2]);
952     }
953   else if (MEM_P (operands[2]))
954     {
955       operands[2] = force_reg (SImode, operands[2]);
956     }
958   if (REG_P (operands[2]))
959     {
960       rtx tmp = gen_reg_rtx (SImode);
961       rtx tmp1 = gen_reg_rtx (<MODE>mode);
962       emit_insn (gen_negsi2 (tmp, operands[2]));
963       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
964                                              convert_to_mode (<VEL>mode,
965                                                               tmp, 0)));
966       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
967                                                           operands[1],
968                                                           tmp1));
969       DONE;
970     }
971   else
972     FAIL;
976 (define_expand "ashr<mode>3"
977   [(match_operand:VDQ_I 0 "register_operand" "")
978    (match_operand:VDQ_I 1 "register_operand" "")
979    (match_operand:SI  2 "general_operand" "")]
980  "TARGET_SIMD"
982   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
983   int shift_amount;
985   if (CONST_INT_P (operands[2]))
986     {
987       shift_amount = INTVAL (operands[2]);
988       if (shift_amount > 0 && shift_amount <= bit_width)
989         {
990           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
991                                                        shift_amount);
992           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
993                                                   operands[1],
994                                                   tmp));
995           DONE;
996         }
997       else
998         operands[2] = force_reg (SImode, operands[2]);
999     }
1000   else if (MEM_P (operands[2]))
1001     {
1002       operands[2] = force_reg (SImode, operands[2]);
1003     }
1005   if (REG_P (operands[2]))
1006     {
1007       rtx tmp = gen_reg_rtx (SImode);
1008       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1009       emit_insn (gen_negsi2 (tmp, operands[2]));
1010       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1011                                              convert_to_mode (<VEL>mode,
1012                                                               tmp, 0)));
1013       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1014                                                         operands[1],
1015                                                         tmp1));
1016       DONE;
1017     }
1018   else
1019     FAIL;
1023 (define_expand "vashl<mode>3"
1024  [(match_operand:VDQ_I 0 "register_operand" "")
1025   (match_operand:VDQ_I 1 "register_operand" "")
1026   (match_operand:VDQ_I 2 "register_operand" "")]
1027  "TARGET_SIMD"
1029   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1030                                               operands[2]));
1031   DONE;
1034 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1035 ;; Negating individual lanes most certainly offsets the
1036 ;; gain from vectorization.
1037 (define_expand "vashr<mode>3"
1038  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1039   (match_operand:VDQ_BHSI 1 "register_operand" "")
1040   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1041  "TARGET_SIMD"
1043   rtx neg = gen_reg_rtx (<MODE>mode);
1044   emit (gen_neg<mode>2 (neg, operands[2]));
1045   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1046                                                     neg));
1047   DONE;
1050 ;; DI vector shift
1051 (define_expand "aarch64_ashr_simddi"
1052   [(match_operand:DI 0 "register_operand" "=w")
1053    (match_operand:DI 1 "register_operand" "w")
1054    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1055   "TARGET_SIMD"
1056   {
1057     /* An arithmetic shift right by 64 fills the result with copies of the sign
1058        bit, just like asr by 63 - however the standard pattern does not handle
1059        a shift by 64.  */
1060     if (INTVAL (operands[2]) == 64)
1061       operands[2] = GEN_INT (63);
1062     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1063     DONE;
1064   }
1067 (define_expand "vlshr<mode>3"
1068  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1069   (match_operand:VDQ_BHSI 1 "register_operand" "")
1070   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1071  "TARGET_SIMD"
1073   rtx neg = gen_reg_rtx (<MODE>mode);
1074   emit (gen_neg<mode>2 (neg, operands[2]));
1075   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1076                                                       neg));
1077   DONE;
1080 (define_expand "aarch64_lshr_simddi"
1081   [(match_operand:DI 0 "register_operand" "=w")
1082    (match_operand:DI 1 "register_operand" "w")
1083    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1084   "TARGET_SIMD"
1085   {
1086     if (INTVAL (operands[2]) == 64)
1087       emit_move_insn (operands[0], const0_rtx);
1088     else
1089       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1090     DONE;
1091   }
1094 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1095 (define_insn "vec_shr_<mode>"
1096   [(set (match_operand:VD 0 "register_operand" "=w")
1097         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1098                     (match_operand:SI 2 "immediate_operand" "i")]
1099                    UNSPEC_VEC_SHR))]
1100   "TARGET_SIMD"
1101   {
1102     if (BYTES_BIG_ENDIAN)
1103       return "shl %d0, %d1, %2";
1104     else
1105       return "ushr %d0, %d1, %2";
1106   }
1107   [(set_attr "type" "neon_shift_imm")]
1110 (define_expand "vec_set<mode>"
1111   [(match_operand:VALL_F16 0 "register_operand" "+w")
1112    (match_operand:<VEL> 1 "register_operand" "w")
1113    (match_operand:SI 2 "immediate_operand" "")]
1114   "TARGET_SIMD"
1115   {
1116     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1117     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1118                                           GEN_INT (elem), operands[0]));
1119     DONE;
1120   }
1124 (define_insn "aarch64_mla<mode>"
1125  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1126        (plus:VDQ_BHSI (mult:VDQ_BHSI
1127                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1128                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1129                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1130  "TARGET_SIMD"
1131  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1132   [(set_attr "type" "neon_mla_<Vetype><q>")]
1135 (define_insn "*aarch64_mla_elt<mode>"
1136  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1137        (plus:VDQHS
1138          (mult:VDQHS
1139            (vec_duplicate:VDQHS
1140               (vec_select:<VEL>
1141                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1142                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1143            (match_operand:VDQHS 3 "register_operand" "w"))
1144          (match_operand:VDQHS 4 "register_operand" "0")))]
1145  "TARGET_SIMD"
1146   {
1147     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1148     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1149   }
1150   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1153 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1154  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1155        (plus:VDQHS
1156          (mult:VDQHS
1157            (vec_duplicate:VDQHS
1158               (vec_select:<VEL>
1159                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1160                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1161            (match_operand:VDQHS 3 "register_operand" "w"))
1162          (match_operand:VDQHS 4 "register_operand" "0")))]
1163  "TARGET_SIMD"
1164   {
1165     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1166     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1167   }
1168   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1171 (define_insn "*aarch64_mla_elt_merge<mode>"
1172   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1173         (plus:VDQHS
1174           (mult:VDQHS (vec_duplicate:VDQHS
1175                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1176                 (match_operand:VDQHS 2 "register_operand" "w"))
1177           (match_operand:VDQHS 3 "register_operand" "0")))]
1178  "TARGET_SIMD"
1179  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1180   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1183 (define_insn "aarch64_mls<mode>"
1184  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1185        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1186                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1187                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1188  "TARGET_SIMD"
1189  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1190   [(set_attr "type" "neon_mla_<Vetype><q>")]
1193 (define_insn "*aarch64_mls_elt<mode>"
1194  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1195        (minus:VDQHS
1196          (match_operand:VDQHS 4 "register_operand" "0")
1197          (mult:VDQHS
1198            (vec_duplicate:VDQHS
1199               (vec_select:<VEL>
1200                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1201                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1202            (match_operand:VDQHS 3 "register_operand" "w"))))]
1203  "TARGET_SIMD"
1204   {
1205     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1206     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1207   }
1208   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1211 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1212  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1213        (minus:VDQHS
1214          (match_operand:VDQHS 4 "register_operand" "0")
1215          (mult:VDQHS
1216            (vec_duplicate:VDQHS
1217               (vec_select:<VEL>
1218                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1219                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1220            (match_operand:VDQHS 3 "register_operand" "w"))))]
1221  "TARGET_SIMD"
1222   {
1223     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1224     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1225   }
1226   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1229 (define_insn "*aarch64_mls_elt_merge<mode>"
1230   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1231         (minus:VDQHS
1232           (match_operand:VDQHS 1 "register_operand" "0")
1233           (mult:VDQHS (vec_duplicate:VDQHS
1234                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1235                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1236   "TARGET_SIMD"
1237   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1238   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1241 ;; Max/Min operations.
1242 (define_insn "<su><maxmin><mode>3"
1243  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1244        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1245                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1246  "TARGET_SIMD"
1247  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1248   [(set_attr "type" "neon_minmax<q>")]
1251 (define_expand "<su><maxmin>v2di3"
1252  [(set (match_operand:V2DI 0 "register_operand" "")
1253        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1254                     (match_operand:V2DI 2 "register_operand" "")))]
1255  "TARGET_SIMD"
1257   enum rtx_code cmp_operator;
1258   rtx cmp_fmt;
1260   switch (<CODE>)
1261     {
1262     case UMIN:
1263       cmp_operator = LTU;
1264       break;
1265     case SMIN:
1266       cmp_operator = LT;
1267       break;
1268     case UMAX:
1269       cmp_operator = GTU;
1270       break;
1271     case SMAX:
1272       cmp_operator = GT;
1273       break;
1274     default:
1275       gcc_unreachable ();
1276     }
1278   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1279   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1280               operands[2], cmp_fmt, operands[1], operands[2]));
1281   DONE;
1284 ;; Pairwise Integer Max/Min operations.
1285 (define_insn "aarch64_<maxmin_uns>p<mode>"
1286  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1287        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1288                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1289                         MAXMINV))]
1290  "TARGET_SIMD"
1291  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1292   [(set_attr "type" "neon_minmax<q>")]
1295 ;; Pairwise FP Max/Min operations.
1296 (define_insn "aarch64_<maxmin_uns>p<mode>"
1297  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1298        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1299                       (match_operand:VHSDF 2 "register_operand" "w")]
1300                       FMAXMINV))]
1301  "TARGET_SIMD"
1302  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1303   [(set_attr "type" "neon_minmax<q>")]
1306 ;; vec_concat gives a new vector with the low elements from operand 1, and
1307 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1308 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1309 ;; What that means, is that the RTL descriptions of the below patterns
1310 ;; need to change depending on endianness.
1312 ;; Move to the low architectural bits of the register.
1313 ;; On little-endian this is { operand, zeroes }
1314 ;; On big-endian this is { zeroes, operand }
1316 (define_insn "move_lo_quad_internal_<mode>"
1317   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1318         (vec_concat:VQ_NO2E
1319           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1320           (vec_duplicate:<VHALF> (const_int 0))))]
1321   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1322   "@
1323    dup\\t%d0, %1.d[0]
1324    fmov\\t%d0, %1
1325    dup\\t%d0, %1"
1326   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1327    (set_attr "simd" "yes,*,yes")
1328    (set_attr "fp" "*,yes,*")
1329    (set_attr "length" "4")]
1332 (define_insn "move_lo_quad_internal_<mode>"
1333   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1334         (vec_concat:VQ_2E
1335           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1336           (const_int 0)))]
1337   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1338   "@
1339    dup\\t%d0, %1.d[0]
1340    fmov\\t%d0, %1
1341    dup\\t%d0, %1"
1342   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1343    (set_attr "simd" "yes,*,yes")
1344    (set_attr "fp" "*,yes,*")
1345    (set_attr "length" "4")]
1348 (define_insn "move_lo_quad_internal_be_<mode>"
1349   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1350         (vec_concat:VQ_NO2E
1351           (vec_duplicate:<VHALF> (const_int 0))
1352           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1353   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1354   "@
1355    dup\\t%d0, %1.d[0]
1356    fmov\\t%d0, %1
1357    dup\\t%d0, %1"
1358   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1359    (set_attr "simd" "yes,*,yes")
1360    (set_attr "fp" "*,yes,*")
1361    (set_attr "length" "4")]
1364 (define_insn "move_lo_quad_internal_be_<mode>"
1365   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1366         (vec_concat:VQ_2E
1367           (const_int 0)
1368           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1369   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1370   "@
1371    dup\\t%d0, %1.d[0]
1372    fmov\\t%d0, %1
1373    dup\\t%d0, %1"
1374   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1375    (set_attr "simd" "yes,*,yes")
1376    (set_attr "fp" "*,yes,*")
1377    (set_attr "length" "4")]
1380 (define_expand "move_lo_quad_<mode>"
1381   [(match_operand:VQ 0 "register_operand")
1382    (match_operand:VQ 1 "register_operand")]
1383   "TARGET_SIMD"
1385   if (BYTES_BIG_ENDIAN)
1386     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1387   else
1388     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1389   DONE;
1393 ;; Move operand1 to the high architectural bits of the register, keeping
1394 ;; the low architectural bits of operand2.
1395 ;; For little-endian this is { operand2, operand1 }
1396 ;; For big-endian this is { operand1, operand2 }
1398 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1399   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1400         (vec_concat:VQ
1401           (vec_select:<VHALF>
1402                 (match_dup 0)
1403                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1404           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1405   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1406   "@
1407    ins\\t%0.d[1], %1.d[0]
1408    ins\\t%0.d[1], %1"
1409   [(set_attr "type" "neon_ins")]
1412 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1413   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1414         (vec_concat:VQ
1415           (match_operand:<VHALF> 1 "register_operand" "w,r")
1416           (vec_select:<VHALF>
1417                 (match_dup 0)
1418                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1419   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1420   "@
1421    ins\\t%0.d[1], %1.d[0]
1422    ins\\t%0.d[1], %1"
1423   [(set_attr "type" "neon_ins")]
1426 (define_expand "move_hi_quad_<mode>"
1427  [(match_operand:VQ 0 "register_operand" "")
1428   (match_operand:<VHALF> 1 "register_operand" "")]
1429  "TARGET_SIMD"
1431   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1432   if (BYTES_BIG_ENDIAN)
1433     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1434                     operands[1], p));
1435   else
1436     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1437                     operands[1], p));
1438   DONE;
1441 ;; Narrowing operations.
1443 ;; For doubles.
1444 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1445  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1446        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1447  "TARGET_SIMD"
1448  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1449   [(set_attr "type" "neon_shift_imm_narrow_q")]
1452 (define_expand "vec_pack_trunc_<mode>"
1453  [(match_operand:<VNARROWD> 0 "register_operand" "")
1454   (match_operand:VDN 1 "register_operand" "")
1455   (match_operand:VDN 2 "register_operand" "")]
1456  "TARGET_SIMD"
1458   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1459   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1460   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1462   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1463   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1464   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1465   DONE;
1468 ;; For quads.
1470 (define_insn "vec_pack_trunc_<mode>"
1471  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1472        (vec_concat:<VNARROWQ2>
1473          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1474          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1475  "TARGET_SIMD"
1477    if (BYTES_BIG_ENDIAN)
1478      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1479    else
1480      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1482   [(set_attr "type" "multiple")
1483    (set_attr "length" "8")]
1486 ;; Widening operations.
1488 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1489   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1490         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1491                                (match_operand:VQW 1 "register_operand" "w")
1492                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1493                             )))]
1494   "TARGET_SIMD"
1495   "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1496   [(set_attr "type" "neon_shift_imm_long")]
1499 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1500   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1501         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1502                                (match_operand:VQW 1 "register_operand" "w")
1503                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1504                             )))]
1505   "TARGET_SIMD"
1506   "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1507   [(set_attr "type" "neon_shift_imm_long")]
1510 (define_expand "vec_unpack<su>_hi_<mode>"
1511   [(match_operand:<VWIDE> 0 "register_operand" "")
1512    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1513   "TARGET_SIMD"
1514   {
1515     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1516     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1517                                                           operands[1], p));
1518     DONE;
1519   }
1522 (define_expand "vec_unpack<su>_lo_<mode>"
1523   [(match_operand:<VWIDE> 0 "register_operand" "")
1524    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1525   "TARGET_SIMD"
1526   {
1527     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1528     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1529                                                           operands[1], p));
1530     DONE;
1531   }
1534 ;; Widening arithmetic.
1536 (define_insn "*aarch64_<su>mlal_lo<mode>"
1537   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1538         (plus:<VWIDE>
1539           (mult:<VWIDE>
1540               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1541                  (match_operand:VQW 2 "register_operand" "w")
1542                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1543               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1544                  (match_operand:VQW 4 "register_operand" "w")
1545                  (match_dup 3))))
1546           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1547   "TARGET_SIMD"
1548   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1549   [(set_attr "type" "neon_mla_<Vetype>_long")]
1552 (define_insn "*aarch64_<su>mlal_hi<mode>"
1553   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1554         (plus:<VWIDE>
1555           (mult:<VWIDE>
1556               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1557                  (match_operand:VQW 2 "register_operand" "w")
1558                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1559               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1560                  (match_operand:VQW 4 "register_operand" "w")
1561                  (match_dup 3))))
1562           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1563   "TARGET_SIMD"
1564   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1565   [(set_attr "type" "neon_mla_<Vetype>_long")]
1568 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1569   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1570         (minus:<VWIDE>
1571           (match_operand:<VWIDE> 1 "register_operand" "0")
1572           (mult:<VWIDE>
1573               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1574                  (match_operand:VQW 2 "register_operand" "w")
1575                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1576               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1577                  (match_operand:VQW 4 "register_operand" "w")
1578                  (match_dup 3))))))]
1579   "TARGET_SIMD"
1580   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1581   [(set_attr "type" "neon_mla_<Vetype>_long")]
1584 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1585   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1586         (minus:<VWIDE>
1587           (match_operand:<VWIDE> 1 "register_operand" "0")
1588           (mult:<VWIDE>
1589               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1590                  (match_operand:VQW 2 "register_operand" "w")
1591                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1592               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1593                  (match_operand:VQW 4 "register_operand" "w")
1594                  (match_dup 3))))))]
1595   "TARGET_SIMD"
1596   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1597   [(set_attr "type" "neon_mla_<Vetype>_long")]
1600 (define_insn "*aarch64_<su>mlal<mode>"
1601   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1602         (plus:<VWIDE>
1603           (mult:<VWIDE>
1604             (ANY_EXTEND:<VWIDE>
1605               (match_operand:VD_BHSI 1 "register_operand" "w"))
1606             (ANY_EXTEND:<VWIDE>
1607               (match_operand:VD_BHSI 2 "register_operand" "w")))
1608           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1609   "TARGET_SIMD"
1610   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1611   [(set_attr "type" "neon_mla_<Vetype>_long")]
1614 (define_insn "*aarch64_<su>mlsl<mode>"
1615   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1616         (minus:<VWIDE>
1617           (match_operand:<VWIDE> 1 "register_operand" "0")
1618           (mult:<VWIDE>
1619             (ANY_EXTEND:<VWIDE>
1620               (match_operand:VD_BHSI 2 "register_operand" "w"))
1621             (ANY_EXTEND:<VWIDE>
1622               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1623   "TARGET_SIMD"
1624   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1625   [(set_attr "type" "neon_mla_<Vetype>_long")]
1628 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1629  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1630        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1631                            (match_operand:VQW 1 "register_operand" "w")
1632                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1633                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1634                            (match_operand:VQW 2 "register_operand" "w")
1635                            (match_dup 3)))))]
1636   "TARGET_SIMD"
1637   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1638   [(set_attr "type" "neon_mul_<Vetype>_long")]
1641 (define_expand "vec_widen_<su>mult_lo_<mode>"
1642   [(match_operand:<VWIDE> 0 "register_operand" "")
1643    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1644    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1645  "TARGET_SIMD"
1647    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1648    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1649                                                        operands[1],
1650                                                        operands[2], p));
1651    DONE;
1655 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1656  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1657       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658                             (match_operand:VQW 1 "register_operand" "w")
1659                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1660                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1661                             (match_operand:VQW 2 "register_operand" "w")
1662                             (match_dup 3)))))]
1663   "TARGET_SIMD"
1664   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1665   [(set_attr "type" "neon_mul_<Vetype>_long")]
1668 (define_expand "vec_widen_<su>mult_hi_<mode>"
1669   [(match_operand:<VWIDE> 0 "register_operand" "")
1670    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1671    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1672  "TARGET_SIMD"
1674    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1675    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1676                                                        operands[1],
1677                                                        operands[2], p));
1678    DONE;
1683 ;; FP vector operations.
1684 ;; AArch64 AdvSIMD supports single-precision (32-bit) and 
1685 ;; double-precision (64-bit) floating-point data types and arithmetic as
1686 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable 
1687 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1689 ;; Floating-point operations can raise an exception.  Vectorizing such
1690 ;; operations are safe because of reasons explained below.
1692 ;; ARMv8 permits an extension to enable trapped floating-point
1693 ;; exception handling, however this is an optional feature.  In the
1694 ;; event of a floating-point exception being raised by vectorised
1695 ;; code then:
1696 ;; 1.  If trapped floating-point exceptions are available, then a trap
1697 ;;     will be taken when any lane raises an enabled exception.  A trap
1698 ;;     handler may determine which lane raised the exception.
1699 ;; 2.  Alternatively a sticky exception flag is set in the
1700 ;;     floating-point status register (FPSR).  Software may explicitly
1701 ;;     test the exception flags, in which case the tests will either
1702 ;;     prevent vectorisation, allowing precise identification of the
1703 ;;     failing operation, or if tested outside of vectorisable regions
1704 ;;     then the specific operation and lane are not of interest.
1706 ;; FP arithmetic operations.
1708 (define_insn "add<mode>3"
1709  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1710        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1711                    (match_operand:VHSDF 2 "register_operand" "w")))]
1712  "TARGET_SIMD"
1713  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1714   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1717 (define_insn "sub<mode>3"
1718  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1719        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1720                     (match_operand:VHSDF 2 "register_operand" "w")))]
1721  "TARGET_SIMD"
1722  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1723   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1726 (define_insn "mul<mode>3"
1727  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1728        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1729                    (match_operand:VHSDF 2 "register_operand" "w")))]
1730  "TARGET_SIMD"
1731  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1732   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1735 (define_expand "div<mode>3"
1736  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1737        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1738                   (match_operand:VHSDF 2 "register_operand" "w")))]
1739  "TARGET_SIMD"
1741   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1742     DONE;
1744   operands[1] = force_reg (<MODE>mode, operands[1]);
1747 (define_insn "*div<mode>3"
1748  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1749        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1750                  (match_operand:VHSDF 2 "register_operand" "w")))]
1751  "TARGET_SIMD"
1752  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1753   [(set_attr "type" "neon_fp_div_<stype><q>")]
1756 (define_insn "neg<mode>2"
1757  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1758        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1759  "TARGET_SIMD"
1760  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1761   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1764 (define_insn "abs<mode>2"
1765  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1766        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1767  "TARGET_SIMD"
1768  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1769   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1772 (define_insn "fma<mode>4"
1773   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1774        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1775                   (match_operand:VHSDF 2 "register_operand" "w")
1776                   (match_operand:VHSDF 3 "register_operand" "0")))]
1777   "TARGET_SIMD"
1778  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1779   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1782 (define_insn "*aarch64_fma4_elt<mode>"
1783   [(set (match_operand:VDQF 0 "register_operand" "=w")
1784     (fma:VDQF
1785       (vec_duplicate:VDQF
1786         (vec_select:<VEL>
1787           (match_operand:VDQF 1 "register_operand" "<h_con>")
1788           (parallel [(match_operand:SI 2 "immediate_operand")])))
1789       (match_operand:VDQF 3 "register_operand" "w")
1790       (match_operand:VDQF 4 "register_operand" "0")))]
1791   "TARGET_SIMD"
1792   {
1793     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1794     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1795   }
1796   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1799 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1800   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1801     (fma:VDQSF
1802       (vec_duplicate:VDQSF
1803         (vec_select:<VEL>
1804           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1805           (parallel [(match_operand:SI 2 "immediate_operand")])))
1806       (match_operand:VDQSF 3 "register_operand" "w")
1807       (match_operand:VDQSF 4 "register_operand" "0")))]
1808   "TARGET_SIMD"
1809   {
1810     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1811     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1812   }
1813   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1816 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1817   [(set (match_operand:VMUL 0 "register_operand" "=w")
1818     (fma:VMUL
1819       (vec_duplicate:VMUL
1820           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1821       (match_operand:VMUL 2 "register_operand" "w")
1822       (match_operand:VMUL 3 "register_operand" "0")))]
1823   "TARGET_SIMD"
1824   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1825   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1828 (define_insn "*aarch64_fma4_elt_to_64v2df"
1829   [(set (match_operand:DF 0 "register_operand" "=w")
1830     (fma:DF
1831         (vec_select:DF
1832           (match_operand:V2DF 1 "register_operand" "w")
1833           (parallel [(match_operand:SI 2 "immediate_operand")]))
1834       (match_operand:DF 3 "register_operand" "w")
1835       (match_operand:DF 4 "register_operand" "0")))]
1836   "TARGET_SIMD"
1837   {
1838     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1839     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1840   }
1841   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1844 (define_insn "fnma<mode>4"
1845   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1846         (fma:VHSDF
1847           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1848           (match_operand:VHSDF 2 "register_operand" "w")
1849           (match_operand:VHSDF 3 "register_operand" "0")))]
1850   "TARGET_SIMD"
1851   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1852   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1855 (define_insn "*aarch64_fnma4_elt<mode>"
1856   [(set (match_operand:VDQF 0 "register_operand" "=w")
1857     (fma:VDQF
1858       (neg:VDQF
1859         (match_operand:VDQF 3 "register_operand" "w"))
1860       (vec_duplicate:VDQF
1861         (vec_select:<VEL>
1862           (match_operand:VDQF 1 "register_operand" "<h_con>")
1863           (parallel [(match_operand:SI 2 "immediate_operand")])))
1864       (match_operand:VDQF 4 "register_operand" "0")))]
1865   "TARGET_SIMD"
1866   {
1867     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1868     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1869   }
1870   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1873 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1874   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1875     (fma:VDQSF
1876       (neg:VDQSF
1877         (match_operand:VDQSF 3 "register_operand" "w"))
1878       (vec_duplicate:VDQSF
1879         (vec_select:<VEL>
1880           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1881           (parallel [(match_operand:SI 2 "immediate_operand")])))
1882       (match_operand:VDQSF 4 "register_operand" "0")))]
1883   "TARGET_SIMD"
1884   {
1885     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1886     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1887   }
1888   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1891 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1892   [(set (match_operand:VMUL 0 "register_operand" "=w")
1893     (fma:VMUL
1894       (neg:VMUL
1895         (match_operand:VMUL 2 "register_operand" "w"))
1896       (vec_duplicate:VMUL
1897         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1898       (match_operand:VMUL 3 "register_operand" "0")))]
1899   "TARGET_SIMD"
1900   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1901   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1904 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1905   [(set (match_operand:DF 0 "register_operand" "=w")
1906     (fma:DF
1907       (vec_select:DF
1908         (match_operand:V2DF 1 "register_operand" "w")
1909         (parallel [(match_operand:SI 2 "immediate_operand")]))
1910       (neg:DF
1911         (match_operand:DF 3 "register_operand" "w"))
1912       (match_operand:DF 4 "register_operand" "0")))]
1913   "TARGET_SIMD"
1914   {
1915     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1916     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1917   }
1918   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1921 ;; Vector versions of the floating-point frint patterns.
1922 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1923 (define_insn "<frint_pattern><mode>2"
1924   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1925         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1926                        FRINT))]
1927   "TARGET_SIMD"
1928   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1929   [(set_attr "type" "neon_fp_round_<stype><q>")]
1932 ;; Vector versions of the fcvt standard patterns.
1933 ;; Expands to lbtrunc, lround, lceil, lfloor
1934 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1935   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1936         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1937                                [(match_operand:VHSDF 1 "register_operand" "w")]
1938                                FCVT)))]
1939   "TARGET_SIMD"
1940   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1941   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1944 ;; HF Scalar variants of related SIMD instructions.
1945 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1946   [(set (match_operand:HI 0 "register_operand" "=w")
1947         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1948                       FCVT)))]
1949   "TARGET_SIMD_F16INST"
1950   "fcvt<frint_suffix><su>\t%h0, %h1"
1951   [(set_attr "type" "neon_fp_to_int_s")]
1954 (define_insn "<optab>_trunchfhi2"
1955   [(set (match_operand:HI 0 "register_operand" "=w")
1956         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1957   "TARGET_SIMD_F16INST"
1958   "fcvtz<su>\t%h0, %h1"
1959   [(set_attr "type" "neon_fp_to_int_s")]
1962 (define_insn "<optab>hihf2"
1963   [(set (match_operand:HF 0 "register_operand" "=w")
1964         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1965   "TARGET_SIMD_F16INST"
1966   "<su_optab>cvtf\t%h0, %h1"
1967   [(set_attr "type" "neon_int_to_fp_s")]
1970 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1971   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1972         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1973                                [(mult:VDQF
1974          (match_operand:VDQF 1 "register_operand" "w")
1975          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1976                                UNSPEC_FRINTZ)))]
1977   "TARGET_SIMD
1978    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1979                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1980   {
1981     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1982     char buf[64];
1983     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1984     output_asm_insn (buf, operands);
1985     return "";
1986   }
1987   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1990 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1991   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1992         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1993                                [(match_operand:VHSDF 1 "register_operand")]
1994                                 UNSPEC_FRINTZ)))]
1995   "TARGET_SIMD"
1996   {})
1998 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1999   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2000         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2001                                [(match_operand:VHSDF 1 "register_operand")]
2002                                 UNSPEC_FRINTZ)))]
2003   "TARGET_SIMD"
2004   {})
2006 (define_expand "ftrunc<VHSDF:mode>2"
2007   [(set (match_operand:VHSDF 0 "register_operand")
2008         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2009                        UNSPEC_FRINTZ))]
2010   "TARGET_SIMD"
2011   {})
2013 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2014   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2015         (FLOATUORS:VHSDF
2016           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2017   "TARGET_SIMD"
2018   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2019   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2022 ;; Conversions between vectors of floats and doubles.
2023 ;; Contains a mix of patterns to match standard pattern names
2024 ;; and those for intrinsics.
2026 ;; Float widening operations.
2028 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2029   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2030         (float_extend:<VWIDE> (vec_select:<VHALF>
2031                                (match_operand:VQ_HSF 1 "register_operand" "w")
2032                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2033                             )))]
2034   "TARGET_SIMD"
2035   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2036   [(set_attr "type" "neon_fp_cvt_widen_s")]
2039 ;; Convert between fixed-point and floating-point (vector modes)
2041 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2042   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2043         (unspec:<VHSDF:FCVT_TARGET>
2044           [(match_operand:VHSDF 1 "register_operand" "w")
2045            (match_operand:SI 2 "immediate_operand" "i")]
2046          FCVT_F2FIXED))]
2047   "TARGET_SIMD"
2048   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2049   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2052 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2053   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2054         (unspec:<VDQ_HSDI:FCVT_TARGET>
2055           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2056            (match_operand:SI 2 "immediate_operand" "i")]
2057          FCVT_FIXED2F))]
2058   "TARGET_SIMD"
2059   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2060   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2063 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2064 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2065 ;; the meaning of HI and LO changes depending on the target endianness.
2066 ;; While elsewhere we map the higher numbered elements of a vector to
2067 ;; the lower architectural lanes of the vector, for these patterns we want
2068 ;; to always treat "hi" as referring to the higher architectural lanes.
2069 ;; Consequently, while the patterns below look inconsistent with our
2070 ;; other big-endian patterns their behavior is as required.
2072 (define_expand "vec_unpacks_lo_<mode>"
2073   [(match_operand:<VWIDE> 0 "register_operand" "")
2074    (match_operand:VQ_HSF 1 "register_operand" "")]
2075   "TARGET_SIMD"
2076   {
2077     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2078     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2079                                                        operands[1], p));
2080     DONE;
2081   }
2084 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2085   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2086         (float_extend:<VWIDE> (vec_select:<VHALF>
2087                                (match_operand:VQ_HSF 1 "register_operand" "w")
2088                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2089                             )))]
2090   "TARGET_SIMD"
2091   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2092   [(set_attr "type" "neon_fp_cvt_widen_s")]
2095 (define_expand "vec_unpacks_hi_<mode>"
2096   [(match_operand:<VWIDE> 0 "register_operand" "")
2097    (match_operand:VQ_HSF 1 "register_operand" "")]
2098   "TARGET_SIMD"
2099   {
2100     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2101     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2102                                                        operands[1], p));
2103     DONE;
2104   }
2106 (define_insn "aarch64_float_extend_lo_<Vwide>"
2107   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2108         (float_extend:<VWIDE>
2109           (match_operand:VDF 1 "register_operand" "w")))]
2110   "TARGET_SIMD"
2111   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2112   [(set_attr "type" "neon_fp_cvt_widen_s")]
2115 ;; Float narrowing operations.
2117 (define_insn "aarch64_float_truncate_lo_<mode>"
2118   [(set (match_operand:VDF 0 "register_operand" "=w")
2119       (float_truncate:VDF
2120         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2121   "TARGET_SIMD"
2122   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2123   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2126 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2127   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2128     (vec_concat:<VDBL>
2129       (match_operand:VDF 1 "register_operand" "0")
2130       (float_truncate:VDF
2131         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2132   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2133   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2134   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2137 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2138   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2139     (vec_concat:<VDBL>
2140       (float_truncate:VDF
2141         (match_operand:<VWIDE> 2 "register_operand" "w"))
2142       (match_operand:VDF 1 "register_operand" "0")))]
2143   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2144   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2145   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2148 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2149   [(match_operand:<VDBL> 0 "register_operand" "=w")
2150    (match_operand:VDF 1 "register_operand" "0")
2151    (match_operand:<VWIDE> 2 "register_operand" "w")]
2152   "TARGET_SIMD"
2154   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2155                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2156                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2157   emit_insn (gen (operands[0], operands[1], operands[2]));
2158   DONE;
2162 (define_expand "vec_pack_trunc_v2df"
2163   [(set (match_operand:V4SF 0 "register_operand")
2164       (vec_concat:V4SF
2165         (float_truncate:V2SF
2166             (match_operand:V2DF 1 "register_operand"))
2167         (float_truncate:V2SF
2168             (match_operand:V2DF 2 "register_operand"))
2169           ))]
2170   "TARGET_SIMD"
2171   {
2172     rtx tmp = gen_reg_rtx (V2SFmode);
2173     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2174     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2176     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2177     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2178                                                    tmp, operands[hi]));
2179     DONE;
2180   }
2183 (define_expand "vec_pack_trunc_df"
2184   [(set (match_operand:V2SF 0 "register_operand")
2185       (vec_concat:V2SF
2186         (float_truncate:SF
2187             (match_operand:DF 1 "register_operand"))
2188         (float_truncate:SF
2189             (match_operand:DF 2 "register_operand"))
2190           ))]
2191   "TARGET_SIMD"
2192   {
2193     rtx tmp = gen_reg_rtx (V2SFmode);
2194     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2195     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2197     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2198     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2199     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2200     DONE;
2201   }
2204 ;; FP Max/Min
2205 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2206 ;; expression like:
2207 ;;      a = (b < c) ? b : c;
2208 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2209 ;; either explicitly or indirectly via -ffast-math.
2211 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2212 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2213 ;; operand will be returned when both operands are zero (i.e. they may not
2214 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2215 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2216 ;; NaNs.
2218 (define_insn "<su><maxmin><mode>3"
2219   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2220         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2221                        (match_operand:VHSDF 2 "register_operand" "w")))]
2222   "TARGET_SIMD"
2223   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2224   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2227 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2228 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2229 ;; which implement the IEEE fmax ()/fmin () functions.
2230 (define_insn "<maxmin_uns><mode>3"
2231   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2232        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2233                       (match_operand:VHSDF 2 "register_operand" "w")]
2234                       FMAXMIN_UNS))]
2235   "TARGET_SIMD"
2236   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2237   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2240 ;; 'across lanes' add.
2242 (define_expand "reduc_plus_scal_<mode>"
2243   [(match_operand:<VEL> 0 "register_operand" "=w")
2244    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2245                UNSPEC_ADDV)]
2246   "TARGET_SIMD"
2247   {
2248     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2249     rtx scratch = gen_reg_rtx (<MODE>mode);
2250     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2251     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2252     DONE;
2253   }
2256 (define_insn "aarch64_faddp<mode>"
2257  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2258        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2259                       (match_operand:VHSDF 2 "register_operand" "w")]
2260         UNSPEC_FADDV))]
2261  "TARGET_SIMD"
2262  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2263   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2266 (define_insn "aarch64_reduc_plus_internal<mode>"
2267  [(set (match_operand:VDQV 0 "register_operand" "=w")
2268        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2269                     UNSPEC_ADDV))]
2270  "TARGET_SIMD"
2271  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2272   [(set_attr "type" "neon_reduc_add<q>")]
2275 (define_insn "aarch64_reduc_plus_internalv2si"
2276  [(set (match_operand:V2SI 0 "register_operand" "=w")
2277        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2278                     UNSPEC_ADDV))]
2279  "TARGET_SIMD"
2280  "addp\\t%0.2s, %1.2s, %1.2s"
2281   [(set_attr "type" "neon_reduc_add")]
2284 (define_insn "reduc_plus_scal_<mode>"
2285  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2286        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2287                    UNSPEC_FADDV))]
2288  "TARGET_SIMD"
2289  "faddp\\t%<Vetype>0, %1.<Vtype>"
2290   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2293 (define_expand "reduc_plus_scal_v4sf"
2294  [(set (match_operand:SF 0 "register_operand")
2295        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2296                     UNSPEC_FADDV))]
2297  "TARGET_SIMD"
2299   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2300   rtx scratch = gen_reg_rtx (V4SFmode);
2301   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2302   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2303   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2304   DONE;
2307 (define_insn "clrsb<mode>2"
2308   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2309         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2310   "TARGET_SIMD"
2311   "cls\\t%0.<Vtype>, %1.<Vtype>"
2312   [(set_attr "type" "neon_cls<q>")]
2315 (define_insn "clz<mode>2"
2316  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2317        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2318  "TARGET_SIMD"
2319  "clz\\t%0.<Vtype>, %1.<Vtype>"
2320   [(set_attr "type" "neon_cls<q>")]
2323 (define_insn "popcount<mode>2"
2324   [(set (match_operand:VB 0 "register_operand" "=w")
2325         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2326   "TARGET_SIMD"
2327   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2328   [(set_attr "type" "neon_cnt<q>")]
2331 ;; 'across lanes' max and min ops.
2333 ;; Template for outputting a scalar, so we can create __builtins which can be
2334 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2335 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2336   [(match_operand:<VEL> 0 "register_operand")
2337    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2338                   FMAXMINV)]
2339   "TARGET_SIMD"
2340   {
2341     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2342     rtx scratch = gen_reg_rtx (<MODE>mode);
2343     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2344                                                               operands[1]));
2345     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2346     DONE;
2347   }
2350 ;; Likewise for integer cases, signed and unsigned.
2351 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2352   [(match_operand:<VEL> 0 "register_operand")
2353    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2354                     MAXMINV)]
2355   "TARGET_SIMD"
2356   {
2357     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358     rtx scratch = gen_reg_rtx (<MODE>mode);
2359     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2360                                                               operands[1]));
2361     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2362     DONE;
2363   }
2366 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2367  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2368        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2369                     MAXMINV))]
2370  "TARGET_SIMD"
2371  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2372   [(set_attr "type" "neon_reduc_minmax<q>")]
2375 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2376  [(set (match_operand:V2SI 0 "register_operand" "=w")
2377        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2378                     MAXMINV))]
2379  "TARGET_SIMD"
2380  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2381   [(set_attr "type" "neon_reduc_minmax")]
2384 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2385  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2386        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2387                       FMAXMINV))]
2388  "TARGET_SIMD"
2389  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2390   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2393 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2394 ;; allocation.
2395 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2396 ;; to select.
2398 ;; Thus our BSL is of the form:
2399 ;;   op0 = bsl (mask, op2, op3)
2400 ;; We can use any of:
2402 ;;   if (op0 = mask)
2403 ;;     bsl mask, op1, op2
2404 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2405 ;;     bit op0, op2, mask
2406 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2407 ;;     bif op0, op1, mask
2409 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2410 ;; Some forms of straight-line code may generate the equivalent form
2411 ;; in *aarch64_simd_bsl<mode>_alt.
2413 (define_insn "aarch64_simd_bsl<mode>_internal"
2414   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2415         (xor:VDQ_I
2416            (and:VDQ_I
2417              (xor:VDQ_I
2418                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2419                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2420              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2421           (match_dup:<V_INT_EQUIV> 3)
2422         ))]
2423   "TARGET_SIMD"
2424   "@
2425   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2426   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2427   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2428   [(set_attr "type" "neon_bsl<q>")]
2431 ;; We need this form in addition to the above pattern to match the case
2432 ;; when combine tries merging three insns such that the second operand of
2433 ;; the outer XOR matches the second operand of the inner XOR rather than
2434 ;; the first.  The two are equivalent but since recog doesn't try all
2435 ;; permutations of commutative operations, we have to have a separate pattern.
2437 (define_insn "*aarch64_simd_bsl<mode>_alt"
2438   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2439         (xor:VDQ_I
2440            (and:VDQ_I
2441              (xor:VDQ_I
2442                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2443                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2444               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2445           (match_dup:<V_INT_EQUIV> 2)))]
2446   "TARGET_SIMD"
2447   "@
2448   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2449   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2450   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2451   [(set_attr "type" "neon_bsl<q>")]
2454 ;; DImode is special, we want to avoid computing operations which are
2455 ;; more naturally computed in general purpose registers in the vector
2456 ;; registers.  If we do that, we need to move all three operands from general
2457 ;; purpose registers to vector registers, then back again.  However, we
2458 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2459 ;; optimizations based on the component operations of a BSL.
2461 ;; That means we need a splitter back to the individual operations, if they
2462 ;; would be better calculated on the integer side.
2464 (define_insn_and_split "aarch64_simd_bsldi_internal"
2465   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2466         (xor:DI
2467            (and:DI
2468              (xor:DI
2469                (match_operand:DI 3 "register_operand" "w,0,w,r")
2470                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2471              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2472           (match_dup:DI 3)
2473         ))]
2474   "TARGET_SIMD"
2475   "@
2476   bsl\\t%0.8b, %2.8b, %3.8b
2477   bit\\t%0.8b, %2.8b, %1.8b
2478   bif\\t%0.8b, %3.8b, %1.8b
2479   #"
2480   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2481   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2483   /* Split back to individual operations.  If we're before reload, and
2484      able to create a temporary register, do so.  If we're after reload,
2485      we've got an early-clobber destination register, so use that.
2486      Otherwise, we can't create pseudos and we can't yet guarantee that
2487      operands[0] is safe to write, so FAIL to split.  */
2489   rtx scratch;
2490   if (reload_completed)
2491     scratch = operands[0];
2492   else if (can_create_pseudo_p ())
2493     scratch = gen_reg_rtx (DImode);
2494   else
2495     FAIL;
2497   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2498   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2499   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2500   DONE;
2502   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2503    (set_attr "length" "4,4,4,12")]
2506 (define_insn_and_split "aarch64_simd_bsldi_alt"
2507   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2508         (xor:DI
2509            (and:DI
2510              (xor:DI
2511                (match_operand:DI 3 "register_operand" "w,w,0,r")
2512                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2513              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2514           (match_dup:DI 2)
2515         ))]
2516   "TARGET_SIMD"
2517   "@
2518   bsl\\t%0.8b, %3.8b, %2.8b
2519   bit\\t%0.8b, %3.8b, %1.8b
2520   bif\\t%0.8b, %2.8b, %1.8b
2521   #"
2522   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2523   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2525   /* Split back to individual operations.  If we're before reload, and
2526      able to create a temporary register, do so.  If we're after reload,
2527      we've got an early-clobber destination register, so use that.
2528      Otherwise, we can't create pseudos and we can't yet guarantee that
2529      operands[0] is safe to write, so FAIL to split.  */
2531   rtx scratch;
2532   if (reload_completed)
2533     scratch = operands[0];
2534   else if (can_create_pseudo_p ())
2535     scratch = gen_reg_rtx (DImode);
2536   else
2537     FAIL;
2539   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2540   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2541   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2542   DONE;
2544   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2545    (set_attr "length" "4,4,4,12")]
2548 (define_expand "aarch64_simd_bsl<mode>"
2549   [(match_operand:VALLDIF 0 "register_operand")
2550    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2551    (match_operand:VALLDIF 2 "register_operand")
2552    (match_operand:VALLDIF 3 "register_operand")]
2553  "TARGET_SIMD"
2555   /* We can't alias operands together if they have different modes.  */
2556   rtx tmp = operands[0];
2557   if (FLOAT_MODE_P (<MODE>mode))
2558     {
2559       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2560       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2561       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2562     }
2563   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2564   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2565                                                          operands[1],
2566                                                          operands[2],
2567                                                          operands[3]));
2568   if (tmp != operands[0])
2569     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2571   DONE;
2574 (define_expand "vcond_mask_<mode><v_int_equiv>"
2575   [(match_operand:VALLDI 0 "register_operand")
2576    (match_operand:VALLDI 1 "nonmemory_operand")
2577    (match_operand:VALLDI 2 "nonmemory_operand")
2578    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2579   "TARGET_SIMD"
2581   /* If we have (a = (P) ? -1 : 0);
2582      Then we can simply move the generated mask (result must be int).  */
2583   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2584       && operands[2] == CONST0_RTX (<MODE>mode))
2585     emit_move_insn (operands[0], operands[3]);
2586   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2587   else if (operands[1] == CONST0_RTX (<MODE>mode)
2588            && operands[2] == CONSTM1_RTX (<MODE>mode))
2589     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2590   else
2591     {
2592       if (!REG_P (operands[1]))
2593         operands[1] = force_reg (<MODE>mode, operands[1]);
2594       if (!REG_P (operands[2]))
2595         operands[2] = force_reg (<MODE>mode, operands[2]);
2596       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2597                                              operands[1], operands[2]));
2598     }
2600   DONE;
2603 ;; Patterns comparing two vectors to produce a mask.
2605 (define_expand "vec_cmp<mode><mode>"
2606   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2607           (match_operator 1 "comparison_operator"
2608             [(match_operand:VSDQ_I_DI 2 "register_operand")
2609              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2610   "TARGET_SIMD"
2612   rtx mask = operands[0];
2613   enum rtx_code code = GET_CODE (operands[1]);
2615   switch (code)
2616     {
2617     case NE:
2618     case LE:
2619     case LT:
2620     case GE:
2621     case GT:
2622     case EQ:
2623       if (operands[3] == CONST0_RTX (<MODE>mode))
2624         break;
2626       /* Fall through.  */
2627     default:
2628       if (!REG_P (operands[3]))
2629         operands[3] = force_reg (<MODE>mode, operands[3]);
2631       break;
2632     }
2634   switch (code)
2635     {
2636     case LT:
2637       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2638       break;
2640     case GE:
2641       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2642       break;
2644     case LE:
2645       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2646       break;
2648     case GT:
2649       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2650       break;
2652     case LTU:
2653       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2654       break;
2656     case GEU:
2657       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2658       break;
2660     case LEU:
2661       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2662       break;
2664     case GTU:
2665       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2666       break;
2668     case NE:
2669       /* Handle NE as !EQ.  */
2670       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2671       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2672       break;
2674     case EQ:
2675       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2676       break;
2678     default:
2679       gcc_unreachable ();
2680     }
2682   DONE;
2685 (define_expand "vec_cmp<mode><v_int_equiv>"
2686   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2687         (match_operator 1 "comparison_operator"
2688             [(match_operand:VDQF 2 "register_operand")
2689              (match_operand:VDQF 3 "nonmemory_operand")]))]
2690   "TARGET_SIMD"
2692   int use_zero_form = 0;
2693   enum rtx_code code = GET_CODE (operands[1]);
2694   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2696   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2698   switch (code)
2699     {
2700     case LE:
2701     case LT:
2702     case GE:
2703     case GT:
2704     case EQ:
2705       if (operands[3] == CONST0_RTX (<MODE>mode))
2706         {
2707           use_zero_form = 1;
2708           break;
2709         }
2710       /* Fall through.  */
2711     default:
2712       if (!REG_P (operands[3]))
2713         operands[3] = force_reg (<MODE>mode, operands[3]);
2715       break;
2716     }
2718   switch (code)
2719     {
2720     case LT:
2721       if (use_zero_form)
2722         {
2723           comparison = gen_aarch64_cmlt<mode>;
2724           break;
2725         }
2726       /* Fall through.  */
2727     case UNLT:
2728       std::swap (operands[2], operands[3]);
2729       /* Fall through.  */
2730     case UNGT:
2731     case GT:
2732       comparison = gen_aarch64_cmgt<mode>;
2733       break;
2734     case LE:
2735       if (use_zero_form)
2736         {
2737           comparison = gen_aarch64_cmle<mode>;
2738           break;
2739         }
2740       /* Fall through.  */
2741     case UNLE:
2742       std::swap (operands[2], operands[3]);
2743       /* Fall through.  */
2744     case UNGE:
2745     case GE:
2746       comparison = gen_aarch64_cmge<mode>;
2747       break;
2748     case NE:
2749     case EQ:
2750       comparison = gen_aarch64_cmeq<mode>;
2751       break;
2752     case UNEQ:
2753     case ORDERED:
2754     case UNORDERED:
2755     case LTGT:
2756       break;
2757     default:
2758       gcc_unreachable ();
2759     }
2761   switch (code)
2762     {
2763     case UNGE:
2764     case UNGT:
2765     case UNLE:
2766     case UNLT:
2767       {
2768         /* All of the above must not raise any FP exceptions.  Thus we first
2769            check each operand for NaNs and force any elements containing NaN to
2770            zero before using them in the compare.
2771            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2772                                      (cm<cc> (isnan (a) ? 0.0 : a,
2773                                               isnan (b) ? 0.0 : b))
2774            We use the following transformations for doing the comparisions:
2775            a UNGE b -> a GE b
2776            a UNGT b -> a GT b
2777            a UNLE b -> b GE a
2778            a UNLT b -> b GT a.  */
2780         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2781         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2782         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2783         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2784         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2785         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2786         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2787                                           lowpart_subreg (<V_INT_EQUIV>mode,
2788                                                           operands[2],
2789                                                           <MODE>mode)));
2790         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2791                                           lowpart_subreg (<V_INT_EQUIV>mode,
2792                                                           operands[3],
2793                                                           <MODE>mode)));
2794         gcc_assert (comparison != NULL);
2795         emit_insn (comparison (operands[0],
2796                                lowpart_subreg (<MODE>mode,
2797                                                tmp0, <V_INT_EQUIV>mode),
2798                                lowpart_subreg (<MODE>mode,
2799                                                tmp1, <V_INT_EQUIV>mode)));
2800         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2801       }
2802       break;
2804     case LT:
2805     case LE:
2806     case GT:
2807     case GE:
2808     case EQ:
2809     case NE:
2810       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2811          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2812          a GE b -> a GE b
2813          a GT b -> a GT b
2814          a LE b -> b GE a
2815          a LT b -> b GT a
2816          a EQ b -> a EQ b
2817          a NE b -> ~(a EQ b)  */
2818       gcc_assert (comparison != NULL);
2819       emit_insn (comparison (operands[0], operands[2], operands[3]));
2820       if (code == NE)
2821         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2822       break;
2824     case LTGT:
2825       /* LTGT is not guranteed to not generate a FP exception.  So let's
2826          go the faster way : ((a > b) || (b > a)).  */
2827       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2828                                          operands[2], operands[3]));
2829       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2830       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2831       break;
2833     case ORDERED:
2834     case UNORDERED:
2835     case UNEQ:
2836       /* cmeq (a, a) & cmeq (b, b).  */
2837       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2838                                          operands[2], operands[2]));
2839       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2840       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2842       if (code == UNORDERED)
2843         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2844       else if (code == UNEQ)
2845         {
2846           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2847           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2848         }
2849       break;
2851     default:
2852       gcc_unreachable ();
2853     }
2855   DONE;
2858 (define_expand "vec_cmpu<mode><mode>"
2859   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2860           (match_operator 1 "comparison_operator"
2861             [(match_operand:VSDQ_I_DI 2 "register_operand")
2862              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2863   "TARGET_SIMD"
2865   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2866                                       operands[2], operands[3]));
2867   DONE;
2870 (define_expand "vcond<mode><mode>"
2871   [(set (match_operand:VALLDI 0 "register_operand")
2872         (if_then_else:VALLDI
2873           (match_operator 3 "comparison_operator"
2874             [(match_operand:VALLDI 4 "register_operand")
2875              (match_operand:VALLDI 5 "nonmemory_operand")])
2876           (match_operand:VALLDI 1 "nonmemory_operand")
2877           (match_operand:VALLDI 2 "nonmemory_operand")))]
2878   "TARGET_SIMD"
2880   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2881   enum rtx_code code = GET_CODE (operands[3]);
2883   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2884      it as well as switch operands 1/2 in order to avoid the additional
2885      NOT instruction.  */
2886   if (code == NE)
2887     {
2888       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2889                                     operands[4], operands[5]);
2890       std::swap (operands[1], operands[2]);
2891     }
2892   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2893                                              operands[4], operands[5]));
2894   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2895                                                  operands[2], mask));
2897   DONE;
2900 (define_expand "vcond<v_cmp_mixed><mode>"
2901   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2902         (if_then_else:<V_cmp_mixed>
2903           (match_operator 3 "comparison_operator"
2904             [(match_operand:VDQF_COND 4 "register_operand")
2905              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2906           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2907           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2908   "TARGET_SIMD"
2910   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2911   enum rtx_code code = GET_CODE (operands[3]);
2913   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2914      it as well as switch operands 1/2 in order to avoid the additional
2915      NOT instruction.  */
2916   if (code == NE)
2917     {
2918       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2919                                     operands[4], operands[5]);
2920       std::swap (operands[1], operands[2]);
2921     }
2922   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2923                                              operands[4], operands[5]));
2924   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2925                                                 operands[0], operands[1],
2926                                                 operands[2], mask));
2928   DONE;
2931 (define_expand "vcondu<mode><mode>"
2932   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2933         (if_then_else:VSDQ_I_DI
2934           (match_operator 3 "comparison_operator"
2935             [(match_operand:VSDQ_I_DI 4 "register_operand")
2936              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2937           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2938           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2939   "TARGET_SIMD"
2941   rtx mask = gen_reg_rtx (<MODE>mode);
2942   enum rtx_code code = GET_CODE (operands[3]);
2944   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2945      it as well as switch operands 1/2 in order to avoid the additional
2946      NOT instruction.  */
2947   if (code == NE)
2948     {
2949       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2950                                     operands[4], operands[5]);
2951       std::swap (operands[1], operands[2]);
2952     }
2953   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2954                                       operands[4], operands[5]));
2955   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2956                                                  operands[2], mask));
2957   DONE;
2960 (define_expand "vcondu<mode><v_cmp_mixed>"
2961   [(set (match_operand:VDQF 0 "register_operand")
2962         (if_then_else:VDQF
2963           (match_operator 3 "comparison_operator"
2964             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2965              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2966           (match_operand:VDQF 1 "nonmemory_operand")
2967           (match_operand:VDQF 2 "nonmemory_operand")))]
2968   "TARGET_SIMD"
2970   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2971   enum rtx_code code = GET_CODE (operands[3]);
2973   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974      it as well as switch operands 1/2 in order to avoid the additional
2975      NOT instruction.  */
2976   if (code == NE)
2977     {
2978       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979                                     operands[4], operands[5]);
2980       std::swap (operands[1], operands[2]);
2981     }
2982   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2983                                                   mask, operands[3],
2984                                                   operands[4], operands[5]));
2985   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2986                                                  operands[2], mask));
2987   DONE;
2990 ;; Patterns for AArch64 SIMD Intrinsics.
2992 ;; Lane extraction with sign extension to general purpose register.
2993 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2994   [(set (match_operand:GPI 0 "register_operand" "=r")
2995         (sign_extend:GPI
2996           (vec_select:<VEL>
2997             (match_operand:VDQQH 1 "register_operand" "w")
2998             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2999   "TARGET_SIMD"
3000   {
3001     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3002     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3003   }
3004   [(set_attr "type" "neon_to_gp<q>")]
3007 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3008   [(set (match_operand:SI 0 "register_operand" "=r")
3009         (zero_extend:SI
3010           (vec_select:<VEL>
3011             (match_operand:VDQQH 1 "register_operand" "w")
3012             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3013   "TARGET_SIMD"
3014   {
3015     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3016     return "umov\\t%w0, %1.<Vetype>[%2]";
3017   }
3018   [(set_attr "type" "neon_to_gp<q>")]
3021 ;; Lane extraction of a value, neither sign nor zero extension
3022 ;; is guaranteed so upper bits should be considered undefined.
3023 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3024 (define_insn "aarch64_get_lane<mode>"
3025   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3026         (vec_select:<VEL>
3027           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3028           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3029   "TARGET_SIMD"
3030   {
3031     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3032     switch (which_alternative)
3033       {
3034         case 0:
3035           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3036         case 1:
3037           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3038         case 2:
3039           return "st1\\t{%1.<Vetype>}[%2], %0";
3040         default:
3041           gcc_unreachable ();
3042       }
3043   }
3044   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3047 (define_insn "load_pair_lanes<mode>"
3048   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3049         (vec_concat:<VDBL>
3050            (match_operand:VDC 1 "memory_operand" "Utq")
3051            (match_operand:VDC 2 "memory_operand" "m")))]
3052   "TARGET_SIMD && !STRICT_ALIGNMENT
3053    && rtx_equal_p (XEXP (operands[2], 0),
3054                    plus_constant (Pmode,
3055                                   XEXP (operands[1], 0),
3056                                   GET_MODE_SIZE (<MODE>mode)))"
3057   "ldr\\t%q0, %1"
3058   [(set_attr "type" "neon_load1_1reg_q")]
3061 (define_insn "store_pair_lanes<mode>"
3062   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3063         (vec_concat:<VDBL>
3064            (match_operand:VDC 1 "register_operand" "w, r")
3065            (match_operand:VDC 2 "register_operand" "w, r")))]
3066   "TARGET_SIMD"
3067   "@
3068    stp\\t%d1, %d2, %y0
3069    stp\\t%x1, %x2, %y0"
3070   [(set_attr "type" "neon_stp, store_16")]
3073 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3074 ;; dest vector.
3076 (define_insn "*aarch64_combinez<mode>"
3077   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3078         (vec_concat:<VDBL>
3079           (match_operand:VDC 1 "general_operand" "w,?r,m")
3080           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3081   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3082   "@
3083    mov\\t%0.8b, %1.8b
3084    fmov\t%d0, %1
3085    ldr\\t%d0, %1"
3086   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3087    (set_attr "simd" "yes,*,yes")
3088    (set_attr "fp" "*,yes,*")]
3091 (define_insn "*aarch64_combinez_be<mode>"
3092   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3093         (vec_concat:<VDBL>
3094           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3095           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3096   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3097   "@
3098    mov\\t%0.8b, %1.8b
3099    fmov\t%d0, %1
3100    ldr\\t%d0, %1"
3101   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3102    (set_attr "simd" "yes,*,yes")
3103    (set_attr "fp" "*,yes,*")]
3106 (define_expand "aarch64_combine<mode>"
3107   [(match_operand:<VDBL> 0 "register_operand")
3108    (match_operand:VDC 1 "register_operand")
3109    (match_operand:VDC 2 "register_operand")]
3110   "TARGET_SIMD"
3112   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3114   DONE;
3118 (define_expand "aarch64_simd_combine<mode>"
3119   [(match_operand:<VDBL> 0 "register_operand")
3120    (match_operand:VDC 1 "register_operand")
3121    (match_operand:VDC 2 "register_operand")]
3122   "TARGET_SIMD"
3123   {
3124     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3125     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3126     DONE;
3127   }
3128 [(set_attr "type" "multiple")]
3131 ;; <su><addsub>l<q>.
3133 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3134  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3135        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3136                            (match_operand:VQW 1 "register_operand" "w")
3137                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3138                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3139                            (match_operand:VQW 2 "register_operand" "w")
3140                            (match_dup 3)))))]
3141   "TARGET_SIMD"
3142   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3143   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3146 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3147  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3148        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3149                            (match_operand:VQW 1 "register_operand" "w")
3150                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3151                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3152                            (match_operand:VQW 2 "register_operand" "w")
3153                            (match_dup 3)))))]
3154   "TARGET_SIMD"
3155   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3156   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3160 (define_expand "aarch64_saddl2<mode>"
3161   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3162    (match_operand:VQW 1 "register_operand" "w")
3163    (match_operand:VQW 2 "register_operand" "w")]
3164   "TARGET_SIMD"
3166   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3167   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3168                                                   operands[2], p));
3169   DONE;
3172 (define_expand "aarch64_uaddl2<mode>"
3173   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3174    (match_operand:VQW 1 "register_operand" "w")
3175    (match_operand:VQW 2 "register_operand" "w")]
3176   "TARGET_SIMD"
3178   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3179   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3180                                                   operands[2], p));
3181   DONE;
3184 (define_expand "aarch64_ssubl2<mode>"
3185   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186    (match_operand:VQW 1 "register_operand" "w")
3187    (match_operand:VQW 2 "register_operand" "w")]
3188   "TARGET_SIMD"
3190   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3191   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3192                                                 operands[2], p));
3193   DONE;
3196 (define_expand "aarch64_usubl2<mode>"
3197   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198    (match_operand:VQW 1 "register_operand" "w")
3199    (match_operand:VQW 2 "register_operand" "w")]
3200   "TARGET_SIMD"
3202   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3203   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3204                                                 operands[2], p));
3205   DONE;
3208 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3209  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3210        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3211                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3212                        (ANY_EXTEND:<VWIDE>
3213                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3214   "TARGET_SIMD"
3215   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3216   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3219 ;; <su><addsub>w<q>.
3221 (define_expand "widen_ssum<mode>3"
3222   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3223         (plus:<VDBLW> (sign_extend:<VDBLW> 
3224                         (match_operand:VQW 1 "register_operand" ""))
3225                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3226   "TARGET_SIMD"
3227   {
3228     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3229     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3231     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3232                                                 operands[1], p));
3233     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3234     DONE;
3235   }
3238 (define_expand "widen_ssum<mode>3"
3239   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3240         (plus:<VWIDE> (sign_extend:<VWIDE>
3241                         (match_operand:VD_BHSI 1 "register_operand" ""))
3242                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3243   "TARGET_SIMD"
3245   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3246   DONE;
3249 (define_expand "widen_usum<mode>3"
3250   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3251         (plus:<VDBLW> (zero_extend:<VDBLW> 
3252                         (match_operand:VQW 1 "register_operand" ""))
3253                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3254   "TARGET_SIMD"
3255   {
3256     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3257     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3259     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3260                                                  operands[1], p));
3261     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3262     DONE;
3263   }
3266 (define_expand "widen_usum<mode>3"
3267   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3268         (plus:<VWIDE> (zero_extend:<VWIDE>
3269                         (match_operand:VD_BHSI 1 "register_operand" ""))
3270                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3271   "TARGET_SIMD"
3273   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3274   DONE;
3277 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3278   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3279         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3280                         (ANY_EXTEND:<VWIDE>
3281                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3282   "TARGET_SIMD"
3283   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3284   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3288   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3290                         (ANY_EXTEND:<VWIDE>
3291                           (vec_select:<VHALF>
3292                            (match_operand:VQW 2 "register_operand" "w")
3293                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3294   "TARGET_SIMD"
3295   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3296   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3299 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3300   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3301         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3302                         (ANY_EXTEND:<VWIDE>
3303                           (vec_select:<VHALF>
3304                            (match_operand:VQW 2 "register_operand" "w")
3305                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3306   "TARGET_SIMD"
3307   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3311 (define_expand "aarch64_saddw2<mode>"
3312   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3313    (match_operand:<VWIDE> 1 "register_operand" "w")
3314    (match_operand:VQW 2 "register_operand" "w")]
3315   "TARGET_SIMD"
3317   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3318   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3319                                                 operands[2], p));
3320   DONE;
3323 (define_expand "aarch64_uaddw2<mode>"
3324   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3325    (match_operand:<VWIDE> 1 "register_operand" "w")
3326    (match_operand:VQW 2 "register_operand" "w")]
3327   "TARGET_SIMD"
3329   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3330   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3331                                                 operands[2], p));
3332   DONE;
3336 (define_expand "aarch64_ssubw2<mode>"
3337   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3338    (match_operand:<VWIDE> 1 "register_operand" "w")
3339    (match_operand:VQW 2 "register_operand" "w")]
3340   "TARGET_SIMD"
3342   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3343   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3344                                                 operands[2], p));
3345   DONE;
3348 (define_expand "aarch64_usubw2<mode>"
3349   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3350    (match_operand:<VWIDE> 1 "register_operand" "w")
3351    (match_operand:VQW 2 "register_operand" "w")]
3352   "TARGET_SIMD"
3354   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3355   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3356                                                 operands[2], p));
3357   DONE;
3360 ;; <su><r>h<addsub>.
3362 (define_insn "aarch64_<sur>h<addsub><mode>"
3363   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3364         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3365                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3366                      HADDSUB))]
3367   "TARGET_SIMD"
3368   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3369   [(set_attr "type" "neon_<addsub>_halve<q>")]
3372 ;; <r><addsub>hn<q>.
3374 (define_insn "aarch64_<sur><addsub>hn<mode>"
3375   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3376         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3377                             (match_operand:VQN 2 "register_operand" "w")]
3378                            ADDSUBHN))]
3379   "TARGET_SIMD"
3380   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3381   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3384 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3385   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3386         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3387                              (match_operand:VQN 2 "register_operand" "w")
3388                              (match_operand:VQN 3 "register_operand" "w")]
3389                             ADDSUBHN2))]
3390   "TARGET_SIMD"
3391   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3392   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3395 ;; pmul.
3397 (define_insn "aarch64_pmul<mode>"
3398   [(set (match_operand:VB 0 "register_operand" "=w")
3399         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3400                     (match_operand:VB 2 "register_operand" "w")]
3401                    UNSPEC_PMUL))]
3402  "TARGET_SIMD"
3403  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3404   [(set_attr "type" "neon_mul_<Vetype><q>")]
3407 ;; fmulx.
3409 (define_insn "aarch64_fmulx<mode>"
3410   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3411         (unspec:VHSDF_HSDF
3412           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3413            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3414            UNSPEC_FMULX))]
3415  "TARGET_SIMD"
3416  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3417  [(set_attr "type" "neon_fp_mul_<stype>")]
3420 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3422 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3423   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3424         (unspec:VDQSF
3425          [(match_operand:VDQSF 1 "register_operand" "w")
3426           (vec_duplicate:VDQSF
3427            (vec_select:<VEL>
3428             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3429             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3430          UNSPEC_FMULX))]
3431   "TARGET_SIMD"
3432   {
3433     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3434     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3435   }
3436   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3439 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3441 (define_insn "*aarch64_mulx_elt<mode>"
3442   [(set (match_operand:VDQF 0 "register_operand" "=w")
3443         (unspec:VDQF
3444          [(match_operand:VDQF 1 "register_operand" "w")
3445           (vec_duplicate:VDQF
3446            (vec_select:<VEL>
3447             (match_operand:VDQF 2 "register_operand" "w")
3448             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3449          UNSPEC_FMULX))]
3450   "TARGET_SIMD"
3451   {
3452     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3453     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3454   }
3455   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3458 ;; vmulxq_lane
3460 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3461   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3462         (unspec:VHSDF
3463          [(match_operand:VHSDF 1 "register_operand" "w")
3464           (vec_duplicate:VHSDF
3465             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3466          UNSPEC_FMULX))]
3467   "TARGET_SIMD"
3468   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3469   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3472 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3473 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3474 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3476 (define_insn "*aarch64_vgetfmulx<mode>"
3477   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3478         (unspec:<VEL>
3479          [(match_operand:<VEL> 1 "register_operand" "w")
3480           (vec_select:<VEL>
3481            (match_operand:VDQF 2 "register_operand" "w")
3482             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3483          UNSPEC_FMULX))]
3484   "TARGET_SIMD"
3485   {
3486     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3487     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3488   }
3489   [(set_attr "type" "fmul<Vetype>")]
3491 ;; <su>q<addsub>
3493 (define_insn "aarch64_<su_optab><optab><mode>"
3494   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3495         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3496                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3497   "TARGET_SIMD"
3498   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3499   [(set_attr "type" "neon_<optab><q>")]
3502 ;; suqadd and usqadd
3504 (define_insn "aarch64_<sur>qadd<mode>"
3505   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3506         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3507                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3508                        USSUQADD))]
3509   "TARGET_SIMD"
3510   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3511   [(set_attr "type" "neon_qadd<q>")]
3514 ;; sqmovun
3516 (define_insn "aarch64_sqmovun<mode>"
3517   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3518         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3519                             UNSPEC_SQXTUN))]
3520    "TARGET_SIMD"
3521    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3522    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3525 ;; sqmovn and uqmovn
3527 (define_insn "aarch64_<sur>qmovn<mode>"
3528   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3529         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3530                             SUQMOVN))]
3531   "TARGET_SIMD"
3532   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3533    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3536 ;; <su>q<absneg>
3538 (define_insn "aarch64_s<optab><mode>"
3539   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3540         (UNQOPS:VSDQ_I
3541           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3542   "TARGET_SIMD"
3543   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3544   [(set_attr "type" "neon_<optab><q>")]
3547 ;; sq<r>dmulh.
3549 (define_insn "aarch64_sq<r>dmulh<mode>"
3550   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3551         (unspec:VSDQ_HSI
3552           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3553            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3554          VQDMULH))]
3555   "TARGET_SIMD"
3556   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3557   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3560 ;; sq<r>dmulh_lane
3562 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3563   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3564         (unspec:VDQHS
3565           [(match_operand:VDQHS 1 "register_operand" "w")
3566            (vec_select:<VEL>
3567              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3568              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3569          VQDMULH))]
3570   "TARGET_SIMD"
3571   "*
3572    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3573    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3574   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3577 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3578   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3579         (unspec:VDQHS
3580           [(match_operand:VDQHS 1 "register_operand" "w")
3581            (vec_select:<VEL>
3582              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3583              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3584          VQDMULH))]
3585   "TARGET_SIMD"
3586   "*
3587    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3588    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3589   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3592 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3593   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3594         (unspec:SD_HSI
3595           [(match_operand:SD_HSI 1 "register_operand" "w")
3596            (vec_select:<VEL>
3597              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3598              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3599          VQDMULH))]
3600   "TARGET_SIMD"
3601   "*
3602    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3603    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3604   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3607 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3608   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3609         (unspec:SD_HSI
3610           [(match_operand:SD_HSI 1 "register_operand" "w")
3611            (vec_select:<VEL>
3612              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3613              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3614          VQDMULH))]
3615   "TARGET_SIMD"
3616   "*
3617    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3618    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3619   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3622 ;; sqrdml[as]h.
3624 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3625   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3626         (unspec:VSDQ_HSI
3627           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3628            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3629            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3630           SQRDMLH_AS))]
3631    "TARGET_SIMD_RDMA"
3632    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3633    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3636 ;; sqrdml[as]h_lane.
3638 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3639   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3640         (unspec:VDQHS
3641           [(match_operand:VDQHS 1 "register_operand" "0")
3642            (match_operand:VDQHS 2 "register_operand" "w")
3643            (vec_select:<VEL>
3644              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3645              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3646           SQRDMLH_AS))]
3647    "TARGET_SIMD_RDMA"
3648    {
3649      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3650      return
3651       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3652    }
3653    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3656 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3657   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3658         (unspec:SD_HSI
3659           [(match_operand:SD_HSI 1 "register_operand" "0")
3660            (match_operand:SD_HSI 2 "register_operand" "w")
3661            (vec_select:<VEL>
3662              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3663              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3664           SQRDMLH_AS))]
3665    "TARGET_SIMD_RDMA"
3666    {
3667      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3668      return
3669       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3670    }
3671    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3674 ;; sqrdml[as]h_laneq.
3676 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3677   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3678         (unspec:VDQHS
3679           [(match_operand:VDQHS 1 "register_operand" "0")
3680            (match_operand:VDQHS 2 "register_operand" "w")
3681            (vec_select:<VEL>
3682              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3684           SQRDMLH_AS))]
3685    "TARGET_SIMD_RDMA"
3686    {
3687      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3688      return
3689       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3690    }
3691    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3694 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3695   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3696         (unspec:SD_HSI
3697           [(match_operand:SD_HSI 1 "register_operand" "0")
3698            (match_operand:SD_HSI 2 "register_operand" "w")
3699            (vec_select:<VEL>
3700              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3701              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3702           SQRDMLH_AS))]
3703    "TARGET_SIMD_RDMA"
3704    {
3705      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3706      return
3707       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3708    }
3709    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3712 ;; vqdml[sa]l
3714 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3715   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3716         (SBINQOPS:<VWIDE>
3717           (match_operand:<VWIDE> 1 "register_operand" "0")
3718           (ss_ashift:<VWIDE>
3719               (mult:<VWIDE>
3720                 (sign_extend:<VWIDE>
3721                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3722                 (sign_extend:<VWIDE>
3723                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3724               (const_int 1))))]
3725   "TARGET_SIMD"
3726   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3727   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3730 ;; vqdml[sa]l_lane
3732 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3733   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3734         (SBINQOPS:<VWIDE>
3735           (match_operand:<VWIDE> 1 "register_operand" "0")
3736           (ss_ashift:<VWIDE>
3737             (mult:<VWIDE>
3738               (sign_extend:<VWIDE>
3739                 (match_operand:VD_HSI 2 "register_operand" "w"))
3740               (sign_extend:<VWIDE>
3741                 (vec_duplicate:VD_HSI
3742                   (vec_select:<VEL>
3743                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3744                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3745               ))
3746             (const_int 1))))]
3747   "TARGET_SIMD"
3748   {
3749     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3750     return
3751       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3752   }
3753   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3756 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3757   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3758         (SBINQOPS:<VWIDE>
3759           (match_operand:<VWIDE> 1 "register_operand" "0")
3760           (ss_ashift:<VWIDE>
3761             (mult:<VWIDE>
3762               (sign_extend:<VWIDE>
3763                 (match_operand:VD_HSI 2 "register_operand" "w"))
3764               (sign_extend:<VWIDE>
3765                 (vec_duplicate:VD_HSI
3766                   (vec_select:<VEL>
3767                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3768                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3769               ))
3770             (const_int 1))))]
3771   "TARGET_SIMD"
3772   {
3773     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3774     return
3775       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3776   }
3777   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3780 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3781   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3782         (SBINQOPS:<VWIDE>
3783           (match_operand:<VWIDE> 1 "register_operand" "0")
3784           (ss_ashift:<VWIDE>
3785             (mult:<VWIDE>
3786               (sign_extend:<VWIDE>
3787                 (match_operand:SD_HSI 2 "register_operand" "w"))
3788               (sign_extend:<VWIDE>
3789                 (vec_select:<VEL>
3790                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3791                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3792               )
3793             (const_int 1))))]
3794   "TARGET_SIMD"
3795   {
3796     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3797     return
3798       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3799   }
3800   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3803 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3804   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3805         (SBINQOPS:<VWIDE>
3806           (match_operand:<VWIDE> 1 "register_operand" "0")
3807           (ss_ashift:<VWIDE>
3808             (mult:<VWIDE>
3809               (sign_extend:<VWIDE>
3810                 (match_operand:SD_HSI 2 "register_operand" "w"))
3811               (sign_extend:<VWIDE>
3812                 (vec_select:<VEL>
3813                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3814                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3815               )
3816             (const_int 1))))]
3817   "TARGET_SIMD"
3818   {
3819     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3820     return
3821       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3822   }
3823   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3826 ;; vqdml[sa]l_n
3828 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3829   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3830         (SBINQOPS:<VWIDE>
3831           (match_operand:<VWIDE> 1 "register_operand" "0")
3832           (ss_ashift:<VWIDE>
3833               (mult:<VWIDE>
3834                 (sign_extend:<VWIDE>
3835                       (match_operand:VD_HSI 2 "register_operand" "w"))
3836                 (sign_extend:<VWIDE>
3837                   (vec_duplicate:VD_HSI
3838                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3839               (const_int 1))))]
3840   "TARGET_SIMD"
3841   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3842   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3845 ;; sqdml[as]l2
3847 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3848   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3849         (SBINQOPS:<VWIDE>
3850          (match_operand:<VWIDE> 1 "register_operand" "0")
3851          (ss_ashift:<VWIDE>
3852              (mult:<VWIDE>
3853                (sign_extend:<VWIDE>
3854                  (vec_select:<VHALF>
3855                      (match_operand:VQ_HSI 2 "register_operand" "w")
3856                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3857                (sign_extend:<VWIDE>
3858                  (vec_select:<VHALF>
3859                      (match_operand:VQ_HSI 3 "register_operand" "w")
3860                      (match_dup 4))))
3861              (const_int 1))))]
3862   "TARGET_SIMD"
3863   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3864   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3867 (define_expand "aarch64_sqdmlal2<mode>"
3868   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3869    (match_operand:<VWIDE> 1 "register_operand" "w")
3870    (match_operand:VQ_HSI 2 "register_operand" "w")
3871    (match_operand:VQ_HSI 3 "register_operand" "w")]
3872   "TARGET_SIMD"
3874   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3875   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3876                                                   operands[2], operands[3], p));
3877   DONE;
3880 (define_expand "aarch64_sqdmlsl2<mode>"
3881   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3882    (match_operand:<VWIDE> 1 "register_operand" "w")
3883    (match_operand:VQ_HSI 2 "register_operand" "w")
3884    (match_operand:VQ_HSI 3 "register_operand" "w")]
3885   "TARGET_SIMD"
3887   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3888   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3889                                                   operands[2], operands[3], p));
3890   DONE;
3893 ;; vqdml[sa]l2_lane
3895 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3896   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3897         (SBINQOPS:<VWIDE>
3898           (match_operand:<VWIDE> 1 "register_operand" "0")
3899           (ss_ashift:<VWIDE>
3900               (mult:<VWIDE>
3901                 (sign_extend:<VWIDE>
3902                   (vec_select:<VHALF>
3903                     (match_operand:VQ_HSI 2 "register_operand" "w")
3904                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3905                 (sign_extend:<VWIDE>
3906                   (vec_duplicate:<VHALF>
3907                     (vec_select:<VEL>
3908                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3909                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3910                     ))))
3911               (const_int 1))))]
3912   "TARGET_SIMD"
3913   {
3914     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3915     return
3916      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3917   }
3918   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3921 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3922   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3923         (SBINQOPS:<VWIDE>
3924           (match_operand:<VWIDE> 1 "register_operand" "0")
3925           (ss_ashift:<VWIDE>
3926               (mult:<VWIDE>
3927                 (sign_extend:<VWIDE>
3928                   (vec_select:<VHALF>
3929                     (match_operand:VQ_HSI 2 "register_operand" "w")
3930                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3931                 (sign_extend:<VWIDE>
3932                   (vec_duplicate:<VHALF>
3933                     (vec_select:<VEL>
3934                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3935                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3936                     ))))
3937               (const_int 1))))]
3938   "TARGET_SIMD"
3939   {
3940     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3941     return
3942      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3943   }
3944   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3947 (define_expand "aarch64_sqdmlal2_lane<mode>"
3948   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3949    (match_operand:<VWIDE> 1 "register_operand" "w")
3950    (match_operand:VQ_HSI 2 "register_operand" "w")
3951    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3952    (match_operand:SI 4 "immediate_operand" "i")]
3953   "TARGET_SIMD"
3955   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3956   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3957                                                        operands[2], operands[3],
3958                                                        operands[4], p));
3959   DONE;
3962 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3963   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3964    (match_operand:<VWIDE> 1 "register_operand" "w")
3965    (match_operand:VQ_HSI 2 "register_operand" "w")
3966    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3967    (match_operand:SI 4 "immediate_operand" "i")]
3968   "TARGET_SIMD"
3970   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3971   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3972                                                        operands[2], operands[3],
3973                                                        operands[4], p));
3974   DONE;
3977 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3978   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3979    (match_operand:<VWIDE> 1 "register_operand" "w")
3980    (match_operand:VQ_HSI 2 "register_operand" "w")
3981    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3982    (match_operand:SI 4 "immediate_operand" "i")]
3983   "TARGET_SIMD"
3985   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3986   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3987                                                        operands[2], operands[3],
3988                                                        operands[4], p));
3989   DONE;
3992 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3993   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3994    (match_operand:<VWIDE> 1 "register_operand" "w")
3995    (match_operand:VQ_HSI 2 "register_operand" "w")
3996    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3997    (match_operand:SI 4 "immediate_operand" "i")]
3998   "TARGET_SIMD"
4000   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4001   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4002                                                        operands[2], operands[3],
4003                                                        operands[4], p));
4004   DONE;
4007 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4008   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4009         (SBINQOPS:<VWIDE>
4010           (match_operand:<VWIDE> 1 "register_operand" "0")
4011           (ss_ashift:<VWIDE>
4012             (mult:<VWIDE>
4013               (sign_extend:<VWIDE>
4014                 (vec_select:<VHALF>
4015                   (match_operand:VQ_HSI 2 "register_operand" "w")
4016                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4017               (sign_extend:<VWIDE>
4018                 (vec_duplicate:<VHALF>
4019                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4020             (const_int 1))))]
4021   "TARGET_SIMD"
4022   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4023   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4026 (define_expand "aarch64_sqdmlal2_n<mode>"
4027   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4028    (match_operand:<VWIDE> 1 "register_operand" "w")
4029    (match_operand:VQ_HSI 2 "register_operand" "w")
4030    (match_operand:<VEL> 3 "register_operand" "w")]
4031   "TARGET_SIMD"
4033   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4034   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4035                                                     operands[2], operands[3],
4036                                                     p));
4037   DONE;
4040 (define_expand "aarch64_sqdmlsl2_n<mode>"
4041   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4042    (match_operand:<VWIDE> 1 "register_operand" "w")
4043    (match_operand:VQ_HSI 2 "register_operand" "w")
4044    (match_operand:<VEL> 3 "register_operand" "w")]
4045   "TARGET_SIMD"
4047   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4048   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4049                                                     operands[2], operands[3],
4050                                                     p));
4051   DONE;
4054 ;; vqdmull
4056 (define_insn "aarch64_sqdmull<mode>"
4057   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4058         (ss_ashift:<VWIDE>
4059              (mult:<VWIDE>
4060                (sign_extend:<VWIDE>
4061                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4062                (sign_extend:<VWIDE>
4063                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4064              (const_int 1)))]
4065   "TARGET_SIMD"
4066   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4067   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4070 ;; vqdmull_lane
4072 (define_insn "aarch64_sqdmull_lane<mode>"
4073   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4074         (ss_ashift:<VWIDE>
4075              (mult:<VWIDE>
4076                (sign_extend:<VWIDE>
4077                  (match_operand:VD_HSI 1 "register_operand" "w"))
4078                (sign_extend:<VWIDE>
4079                  (vec_duplicate:VD_HSI
4080                    (vec_select:<VEL>
4081                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4082                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4083                ))
4084              (const_int 1)))]
4085   "TARGET_SIMD"
4086   {
4087     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4088     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4089   }
4090   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4093 (define_insn "aarch64_sqdmull_laneq<mode>"
4094   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4095         (ss_ashift:<VWIDE>
4096              (mult:<VWIDE>
4097                (sign_extend:<VWIDE>
4098                  (match_operand:VD_HSI 1 "register_operand" "w"))
4099                (sign_extend:<VWIDE>
4100                  (vec_duplicate:VD_HSI
4101                    (vec_select:<VEL>
4102                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4103                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4104                ))
4105              (const_int 1)))]
4106   "TARGET_SIMD"
4107   {
4108     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4109     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4110   }
4111   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4114 (define_insn "aarch64_sqdmull_lane<mode>"
4115   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4116         (ss_ashift:<VWIDE>
4117              (mult:<VWIDE>
4118                (sign_extend:<VWIDE>
4119                  (match_operand:SD_HSI 1 "register_operand" "w"))
4120                (sign_extend:<VWIDE>
4121                  (vec_select:<VEL>
4122                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4123                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4124                ))
4125              (const_int 1)))]
4126   "TARGET_SIMD"
4127   {
4128     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4129     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4130   }
4131   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4134 (define_insn "aarch64_sqdmull_laneq<mode>"
4135   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4136         (ss_ashift:<VWIDE>
4137              (mult:<VWIDE>
4138                (sign_extend:<VWIDE>
4139                  (match_operand:SD_HSI 1 "register_operand" "w"))
4140                (sign_extend:<VWIDE>
4141                  (vec_select:<VEL>
4142                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4143                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4144                ))
4145              (const_int 1)))]
4146   "TARGET_SIMD"
4147   {
4148     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4149     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4150   }
4151   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4154 ;; vqdmull_n
4156 (define_insn "aarch64_sqdmull_n<mode>"
4157   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4158         (ss_ashift:<VWIDE>
4159              (mult:<VWIDE>
4160                (sign_extend:<VWIDE>
4161                  (match_operand:VD_HSI 1 "register_operand" "w"))
4162                (sign_extend:<VWIDE>
4163                  (vec_duplicate:VD_HSI
4164                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4165                )
4166              (const_int 1)))]
4167   "TARGET_SIMD"
4168   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4169   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4172 ;; vqdmull2
4176 (define_insn "aarch64_sqdmull2<mode>_internal"
4177   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4178         (ss_ashift:<VWIDE>
4179              (mult:<VWIDE>
4180                (sign_extend:<VWIDE>
4181                  (vec_select:<VHALF>
4182                    (match_operand:VQ_HSI 1 "register_operand" "w")
4183                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4184                (sign_extend:<VWIDE>
4185                  (vec_select:<VHALF>
4186                    (match_operand:VQ_HSI 2 "register_operand" "w")
4187                    (match_dup 3)))
4188                )
4189              (const_int 1)))]
4190   "TARGET_SIMD"
4191   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4192   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4195 (define_expand "aarch64_sqdmull2<mode>"
4196   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4197    (match_operand:VQ_HSI 1 "register_operand" "w")
4198    (match_operand:VQ_HSI 2 "register_operand" "w")]
4199   "TARGET_SIMD"
4201   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4202   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4203                                                   operands[2], p));
4204   DONE;
4207 ;; vqdmull2_lane
4209 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4210   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4211         (ss_ashift:<VWIDE>
4212              (mult:<VWIDE>
4213                (sign_extend:<VWIDE>
4214                  (vec_select:<VHALF>
4215                    (match_operand:VQ_HSI 1 "register_operand" "w")
4216                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4217                (sign_extend:<VWIDE>
4218                  (vec_duplicate:<VHALF>
4219                    (vec_select:<VEL>
4220                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4221                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4222                ))
4223              (const_int 1)))]
4224   "TARGET_SIMD"
4225   {
4226     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4227     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4228   }
4229   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4232 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4233   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234         (ss_ashift:<VWIDE>
4235              (mult:<VWIDE>
4236                (sign_extend:<VWIDE>
4237                  (vec_select:<VHALF>
4238                    (match_operand:VQ_HSI 1 "register_operand" "w")
4239                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4240                (sign_extend:<VWIDE>
4241                  (vec_duplicate:<VHALF>
4242                    (vec_select:<VEL>
4243                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4244                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4245                ))
4246              (const_int 1)))]
4247   "TARGET_SIMD"
4248   {
4249     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4250     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4251   }
4252   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4255 (define_expand "aarch64_sqdmull2_lane<mode>"
4256   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4257    (match_operand:VQ_HSI 1 "register_operand" "w")
4258    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4259    (match_operand:SI 3 "immediate_operand" "i")]
4260   "TARGET_SIMD"
4262   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4263   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4264                                                        operands[2], operands[3],
4265                                                        p));
4266   DONE;
4269 (define_expand "aarch64_sqdmull2_laneq<mode>"
4270   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4271    (match_operand:VQ_HSI 1 "register_operand" "w")
4272    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4273    (match_operand:SI 3 "immediate_operand" "i")]
4274   "TARGET_SIMD"
4276   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4277   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4278                                                        operands[2], operands[3],
4279                                                        p));
4280   DONE;
4283 ;; vqdmull2_n
4285 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4286   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4287         (ss_ashift:<VWIDE>
4288              (mult:<VWIDE>
4289                (sign_extend:<VWIDE>
4290                  (vec_select:<VHALF>
4291                    (match_operand:VQ_HSI 1 "register_operand" "w")
4292                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4293                (sign_extend:<VWIDE>
4294                  (vec_duplicate:<VHALF>
4295                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4296                )
4297              (const_int 1)))]
4298   "TARGET_SIMD"
4299   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4300   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4303 (define_expand "aarch64_sqdmull2_n<mode>"
4304   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4305    (match_operand:VQ_HSI 1 "register_operand" "w")
4306    (match_operand:<VEL> 2 "register_operand" "w")]
4307   "TARGET_SIMD"
4309   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4310   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4311                                                     operands[2], p));
4312   DONE;
4315 ;; vshl
4317 (define_insn "aarch64_<sur>shl<mode>"
4318   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4319         (unspec:VSDQ_I_DI
4320           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4321            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4322          VSHL))]
4323   "TARGET_SIMD"
4324   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4325   [(set_attr "type" "neon_shift_reg<q>")]
4329 ;; vqshl
4331 (define_insn "aarch64_<sur>q<r>shl<mode>"
4332   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4333         (unspec:VSDQ_I
4334           [(match_operand:VSDQ_I 1 "register_operand" "w")
4335            (match_operand:VSDQ_I 2 "register_operand" "w")]
4336          VQSHL))]
4337   "TARGET_SIMD"
4338   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4339   [(set_attr "type" "neon_sat_shift_reg<q>")]
4342 ;; vshll_n
4344 (define_insn "aarch64_<sur>shll_n<mode>"
4345   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4346         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4347                          (match_operand:SI 2
4348                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4349                          VSHLL))]
4350   "TARGET_SIMD"
4351   {
4352     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4353       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4354     else
4355       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4356   }
4357   [(set_attr "type" "neon_shift_imm_long")]
4360 ;; vshll_high_n
4362 (define_insn "aarch64_<sur>shll2_n<mode>"
4363   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4364         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4365                          (match_operand:SI 2 "immediate_operand" "i")]
4366                          VSHLL))]
4367   "TARGET_SIMD"
4368   {
4369     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4370       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4371     else
4372       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4373   }
4374   [(set_attr "type" "neon_shift_imm_long")]
4377 ;; vrshr_n
4379 (define_insn "aarch64_<sur>shr_n<mode>"
4380   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4381         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4382                            (match_operand:SI 2
4383                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4384                           VRSHR_N))]
4385   "TARGET_SIMD"
4386   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4387   [(set_attr "type" "neon_sat_shift_imm<q>")]
4390 ;; v(r)sra_n
4392 (define_insn "aarch64_<sur>sra_n<mode>"
4393   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4394         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4395                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4396                        (match_operand:SI 3
4397                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4398                       VSRA))]
4399   "TARGET_SIMD"
4400   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4401   [(set_attr "type" "neon_shift_acc<q>")]
4404 ;; vs<lr>i_n
4406 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4407   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4408         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4409                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4410                        (match_operand:SI 3
4411                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4412                       VSLRI))]
4413   "TARGET_SIMD"
4414   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4415   [(set_attr "type" "neon_shift_imm<q>")]
4418 ;; vqshl(u)
4420 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4421   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4422         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4423                        (match_operand:SI 2
4424                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4425                       VQSHL_N))]
4426   "TARGET_SIMD"
4427   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4428   [(set_attr "type" "neon_sat_shift_imm<q>")]
4432 ;; vq(r)shr(u)n_n
4434 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4435   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4436         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4437                             (match_operand:SI 2
4438                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4439                            VQSHRN_N))]
4440   "TARGET_SIMD"
4441   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4442   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4446 ;; cm(eq|ge|gt|lt|le)
4447 ;; Note, we have constraints for Dz and Z as different expanders
4448 ;; have different ideas of what should be passed to this pattern.
4450 (define_insn "aarch64_cm<optab><mode>"
4451   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4452         (neg:<V_INT_EQUIV>
4453           (COMPARISONS:<V_INT_EQUIV>
4454             (match_operand:VDQ_I 1 "register_operand" "w,w")
4455             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4456           )))]
4457   "TARGET_SIMD"
4458   "@
4459   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4460   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4461   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4464 (define_insn_and_split "aarch64_cm<optab>di"
4465   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4466         (neg:DI
4467           (COMPARISONS:DI
4468             (match_operand:DI 1 "register_operand" "w,w,r")
4469             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4470           )))
4471      (clobber (reg:CC CC_REGNUM))]
4472   "TARGET_SIMD"
4473   "#"
4474   "&& reload_completed"
4475   [(set (match_operand:DI 0 "register_operand")
4476         (neg:DI
4477           (COMPARISONS:DI
4478             (match_operand:DI 1 "register_operand")
4479             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4480           )))]
4481   {
4482     /* If we are in the general purpose register file,
4483        we split to a sequence of comparison and store.  */
4484     if (GP_REGNUM_P (REGNO (operands[0]))
4485         && GP_REGNUM_P (REGNO (operands[1])))
4486       {
4487         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4488         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4489         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4490         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4491         DONE;
4492       }
4493     /* Otherwise, we expand to a similar pattern which does not
4494        clobber CC_REGNUM.  */
4495   }
4496   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4499 (define_insn "*aarch64_cm<optab>di"
4500   [(set (match_operand:DI 0 "register_operand" "=w,w")
4501         (neg:DI
4502           (COMPARISONS:DI
4503             (match_operand:DI 1 "register_operand" "w,w")
4504             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4505           )))]
4506   "TARGET_SIMD && reload_completed"
4507   "@
4508   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4509   cm<optab>\t%d0, %d1, #0"
4510   [(set_attr "type" "neon_compare, neon_compare_zero")]
4513 ;; cm(hs|hi)
4515 (define_insn "aarch64_cm<optab><mode>"
4516   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4517         (neg:<V_INT_EQUIV>
4518           (UCOMPARISONS:<V_INT_EQUIV>
4519             (match_operand:VDQ_I 1 "register_operand" "w")
4520             (match_operand:VDQ_I 2 "register_operand" "w")
4521           )))]
4522   "TARGET_SIMD"
4523   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4524   [(set_attr "type" "neon_compare<q>")]
4527 (define_insn_and_split "aarch64_cm<optab>di"
4528   [(set (match_operand:DI 0 "register_operand" "=w,r")
4529         (neg:DI
4530           (UCOMPARISONS:DI
4531             (match_operand:DI 1 "register_operand" "w,r")
4532             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4533           )))
4534     (clobber (reg:CC CC_REGNUM))]
4535   "TARGET_SIMD"
4536   "#"
4537   "&& reload_completed"
4538   [(set (match_operand:DI 0 "register_operand")
4539         (neg:DI
4540           (UCOMPARISONS:DI
4541             (match_operand:DI 1 "register_operand")
4542             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4543           )))]
4544   {
4545     /* If we are in the general purpose register file,
4546        we split to a sequence of comparison and store.  */
4547     if (GP_REGNUM_P (REGNO (operands[0]))
4548         && GP_REGNUM_P (REGNO (operands[1])))
4549       {
4550         machine_mode mode = CCmode;
4551         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4552         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4553         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4554         DONE;
4555       }
4556     /* Otherwise, we expand to a similar pattern which does not
4557        clobber CC_REGNUM.  */
4558   }
4559   [(set_attr "type" "neon_compare,multiple")]
4562 (define_insn "*aarch64_cm<optab>di"
4563   [(set (match_operand:DI 0 "register_operand" "=w")
4564         (neg:DI
4565           (UCOMPARISONS:DI
4566             (match_operand:DI 1 "register_operand" "w")
4567             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4568           )))]
4569   "TARGET_SIMD && reload_completed"
4570   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4571   [(set_attr "type" "neon_compare")]
4574 ;; cmtst
4576 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4577 ;; we don't have any insns using ne, and aarch64_vcond outputs
4578 ;; not (neg (eq (and x y) 0))
4579 ;; which is rewritten by simplify_rtx as
4580 ;; plus (eq (and x y) 0) -1.
4582 (define_insn "aarch64_cmtst<mode>"
4583   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4584         (plus:<V_INT_EQUIV>
4585           (eq:<V_INT_EQUIV>
4586             (and:VDQ_I
4587               (match_operand:VDQ_I 1 "register_operand" "w")
4588               (match_operand:VDQ_I 2 "register_operand" "w"))
4589             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4590           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4591   ]
4592   "TARGET_SIMD"
4593   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4594   [(set_attr "type" "neon_tst<q>")]
4597 (define_insn_and_split "aarch64_cmtstdi"
4598   [(set (match_operand:DI 0 "register_operand" "=w,r")
4599         (neg:DI
4600           (ne:DI
4601             (and:DI
4602               (match_operand:DI 1 "register_operand" "w,r")
4603               (match_operand:DI 2 "register_operand" "w,r"))
4604             (const_int 0))))
4605     (clobber (reg:CC CC_REGNUM))]
4606   "TARGET_SIMD"
4607   "#"
4608   "&& reload_completed"
4609   [(set (match_operand:DI 0 "register_operand")
4610         (neg:DI
4611           (ne:DI
4612             (and:DI
4613               (match_operand:DI 1 "register_operand")
4614               (match_operand:DI 2 "register_operand"))
4615             (const_int 0))))]
4616   {
4617     /* If we are in the general purpose register file,
4618        we split to a sequence of comparison and store.  */
4619     if (GP_REGNUM_P (REGNO (operands[0]))
4620         && GP_REGNUM_P (REGNO (operands[1])))
4621       {
4622         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4623         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4624         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4625         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4626         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4627         DONE;
4628       }
4629     /* Otherwise, we expand to a similar pattern which does not
4630        clobber CC_REGNUM.  */
4631   }
4632   [(set_attr "type" "neon_tst,multiple")]
4635 (define_insn "*aarch64_cmtstdi"
4636   [(set (match_operand:DI 0 "register_operand" "=w")
4637         (neg:DI
4638           (ne:DI
4639             (and:DI
4640               (match_operand:DI 1 "register_operand" "w")
4641               (match_operand:DI 2 "register_operand" "w"))
4642             (const_int 0))))]
4643   "TARGET_SIMD"
4644   "cmtst\t%d0, %d1, %d2"
4645   [(set_attr "type" "neon_tst")]
4648 ;; fcm(eq|ge|gt|le|lt)
4650 (define_insn "aarch64_cm<optab><mode>"
4651   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4652         (neg:<V_INT_EQUIV>
4653           (COMPARISONS:<V_INT_EQUIV>
4654             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4655             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4656           )))]
4657   "TARGET_SIMD"
4658   "@
4659   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4660   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4661   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4664 ;; fac(ge|gt)
4665 ;; Note we can also handle what would be fac(le|lt) by
4666 ;; generating fac(ge|gt).
4668 (define_insn "aarch64_fac<optab><mode>"
4669   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4670         (neg:<V_INT_EQUIV>
4671           (FAC_COMPARISONS:<V_INT_EQUIV>
4672             (abs:VHSDF_HSDF
4673               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4674             (abs:VHSDF_HSDF
4675               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4676   )))]
4677   "TARGET_SIMD"
4678   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4679   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4682 ;; addp
4684 (define_insn "aarch64_addp<mode>"
4685   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4686         (unspec:VD_BHSI
4687           [(match_operand:VD_BHSI 1 "register_operand" "w")
4688            (match_operand:VD_BHSI 2 "register_operand" "w")]
4689           UNSPEC_ADDP))]
4690   "TARGET_SIMD"
4691   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4692   [(set_attr "type" "neon_reduc_add<q>")]
4695 (define_insn "aarch64_addpdi"
4696   [(set (match_operand:DI 0 "register_operand" "=w")
4697         (unspec:DI
4698           [(match_operand:V2DI 1 "register_operand" "w")]
4699           UNSPEC_ADDP))]
4700   "TARGET_SIMD"
4701   "addp\t%d0, %1.2d"
4702   [(set_attr "type" "neon_reduc_add")]
4705 ;; sqrt
4707 (define_expand "sqrt<mode>2"
4708   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4709         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4710   "TARGET_SIMD"
4712   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4713     DONE;
4716 (define_insn "*sqrt<mode>2"
4717   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4718         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4719   "TARGET_SIMD"
4720   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4721   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4724 ;; Patterns for vector struct loads and stores.
4726 (define_insn "aarch64_simd_ld2<mode>"
4727   [(set (match_operand:OI 0 "register_operand" "=w")
4728         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4729                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4730                    UNSPEC_LD2))]
4731   "TARGET_SIMD"
4732   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4733   [(set_attr "type" "neon_load2_2reg<q>")]
4736 (define_insn "aarch64_simd_ld2r<mode>"
4737   [(set (match_operand:OI 0 "register_operand" "=w")
4738        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4739                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4740                   UNSPEC_LD2_DUP))]
4741   "TARGET_SIMD"
4742   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4743   [(set_attr "type" "neon_load2_all_lanes<q>")]
4746 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4747   [(set (match_operand:OI 0 "register_operand" "=w")
4748         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4749                     (match_operand:OI 2 "register_operand" "0")
4750                     (match_operand:SI 3 "immediate_operand" "i")
4751                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4752                    UNSPEC_LD2_LANE))]
4753   "TARGET_SIMD"
4754   {
4755     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4756     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4757   }
4758   [(set_attr "type" "neon_load2_one_lane")]
4761 (define_expand "vec_load_lanesoi<mode>"
4762   [(set (match_operand:OI 0 "register_operand" "=w")
4763         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4764                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4765                    UNSPEC_LD2))]
4766   "TARGET_SIMD"
4768   if (BYTES_BIG_ENDIAN)
4769     {
4770       rtx tmp = gen_reg_rtx (OImode);
4771       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4772       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4773       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4774     }
4775   else
4776     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4777   DONE;
4780 (define_insn "aarch64_simd_st2<mode>"
4781   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4782         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4783                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4784                    UNSPEC_ST2))]
4785   "TARGET_SIMD"
4786   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4787   [(set_attr "type" "neon_store2_2reg<q>")]
4790 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4791 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4792   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4793         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4794                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4795                     (match_operand:SI 2 "immediate_operand" "i")]
4796                    UNSPEC_ST2_LANE))]
4797   "TARGET_SIMD"
4798   {
4799     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4800     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4801   }
4802   [(set_attr "type" "neon_store2_one_lane<q>")]
4805 (define_expand "vec_store_lanesoi<mode>"
4806   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4807         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4808                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4809                    UNSPEC_ST2))]
4810   "TARGET_SIMD"
4812   if (BYTES_BIG_ENDIAN)
4813     {
4814       rtx tmp = gen_reg_rtx (OImode);
4815       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4816       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4817       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4818     }
4819   else
4820     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4821   DONE;
4824 (define_insn "aarch64_simd_ld3<mode>"
4825   [(set (match_operand:CI 0 "register_operand" "=w")
4826         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4827                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4828                    UNSPEC_LD3))]
4829   "TARGET_SIMD"
4830   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4831   [(set_attr "type" "neon_load3_3reg<q>")]
4834 (define_insn "aarch64_simd_ld3r<mode>"
4835   [(set (match_operand:CI 0 "register_operand" "=w")
4836        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4837                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4838                   UNSPEC_LD3_DUP))]
4839   "TARGET_SIMD"
4840   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4841   [(set_attr "type" "neon_load3_all_lanes<q>")]
4844 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4845   [(set (match_operand:CI 0 "register_operand" "=w")
4846         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4847                     (match_operand:CI 2 "register_operand" "0")
4848                     (match_operand:SI 3 "immediate_operand" "i")
4849                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4850                    UNSPEC_LD3_LANE))]
4851   "TARGET_SIMD"
4853     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4854     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4856   [(set_attr "type" "neon_load3_one_lane")]
4859 (define_expand "vec_load_lanesci<mode>"
4860   [(set (match_operand:CI 0 "register_operand" "=w")
4861         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4862                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4863                    UNSPEC_LD3))]
4864   "TARGET_SIMD"
4866   if (BYTES_BIG_ENDIAN)
4867     {
4868       rtx tmp = gen_reg_rtx (CImode);
4869       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4870       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4871       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4872     }
4873   else
4874     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4875   DONE;
4878 (define_insn "aarch64_simd_st3<mode>"
4879   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4880         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4881                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4882                    UNSPEC_ST3))]
4883   "TARGET_SIMD"
4884   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4885   [(set_attr "type" "neon_store3_3reg<q>")]
4888 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4889 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4890   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4891         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4892                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4893                      (match_operand:SI 2 "immediate_operand" "i")]
4894                     UNSPEC_ST3_LANE))]
4895   "TARGET_SIMD"
4896   {
4897     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4898     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4899   }
4900   [(set_attr "type" "neon_store3_one_lane<q>")]
4903 (define_expand "vec_store_lanesci<mode>"
4904   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4905         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4906                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4907                    UNSPEC_ST3))]
4908   "TARGET_SIMD"
4910   if (BYTES_BIG_ENDIAN)
4911     {
4912       rtx tmp = gen_reg_rtx (CImode);
4913       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4914       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4915       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4916     }
4917   else
4918     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4919   DONE;
4922 (define_insn "aarch64_simd_ld4<mode>"
4923   [(set (match_operand:XI 0 "register_operand" "=w")
4924         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4925                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926                    UNSPEC_LD4))]
4927   "TARGET_SIMD"
4928   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4929   [(set_attr "type" "neon_load4_4reg<q>")]
4932 (define_insn "aarch64_simd_ld4r<mode>"
4933   [(set (match_operand:XI 0 "register_operand" "=w")
4934        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4935                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4936                   UNSPEC_LD4_DUP))]
4937   "TARGET_SIMD"
4938   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4939   [(set_attr "type" "neon_load4_all_lanes<q>")]
4942 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4943   [(set (match_operand:XI 0 "register_operand" "=w")
4944         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4945                     (match_operand:XI 2 "register_operand" "0")
4946                     (match_operand:SI 3 "immediate_operand" "i")
4947                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4948                    UNSPEC_LD4_LANE))]
4949   "TARGET_SIMD"
4951     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4952     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4954   [(set_attr "type" "neon_load4_one_lane")]
4957 (define_expand "vec_load_lanesxi<mode>"
4958   [(set (match_operand:XI 0 "register_operand" "=w")
4959         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4960                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4961                    UNSPEC_LD4))]
4962   "TARGET_SIMD"
4964   if (BYTES_BIG_ENDIAN)
4965     {
4966       rtx tmp = gen_reg_rtx (XImode);
4967       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4968       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4969       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4970     }
4971   else
4972     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4973   DONE;
4976 (define_insn "aarch64_simd_st4<mode>"
4977   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4978         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4979                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4980                    UNSPEC_ST4))]
4981   "TARGET_SIMD"
4982   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4983   [(set_attr "type" "neon_store4_4reg<q>")]
4986 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4987 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4988   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4989         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4990                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4991                      (match_operand:SI 2 "immediate_operand" "i")]
4992                     UNSPEC_ST4_LANE))]
4993   "TARGET_SIMD"
4994   {
4995     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4996     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4997   }
4998   [(set_attr "type" "neon_store4_one_lane<q>")]
5001 (define_expand "vec_store_lanesxi<mode>"
5002   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5003         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5004                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5005                    UNSPEC_ST4))]
5006   "TARGET_SIMD"
5008   if (BYTES_BIG_ENDIAN)
5009     {
5010       rtx tmp = gen_reg_rtx (XImode);
5011       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5012       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5013       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5014     }
5015   else
5016     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5017   DONE;
5020 (define_insn_and_split "aarch64_rev_reglist<mode>"
5021 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5022         (unspec:VSTRUCT
5023                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5024                     (match_operand:V16QI 2 "register_operand" "w")]
5025                    UNSPEC_REV_REGLIST))]
5026   "TARGET_SIMD"
5027   "#"
5028   "&& reload_completed"
5029   [(const_int 0)]
5031   int i;
5032   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5033   for (i = 0; i < nregs; i++)
5034     {
5035       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5036       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5037       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5038     }
5039   DONE;
5041   [(set_attr "type" "neon_tbl1_q")
5042    (set_attr "length" "<insn_count>")]
5045 ;; Reload patterns for AdvSIMD register list operands.
5047 (define_expand "mov<mode>"
5048   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5049         (match_operand:VSTRUCT 1 "general_operand" ""))]
5050   "TARGET_SIMD"
5052   if (can_create_pseudo_p ())
5053     {
5054       if (GET_CODE (operands[0]) != REG)
5055         operands[1] = force_reg (<MODE>mode, operands[1]);
5056     }
5059 (define_insn "*aarch64_mov<mode>"
5060   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5061         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5062   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5063    && (register_operand (operands[0], <MODE>mode)
5064        || register_operand (operands[1], <MODE>mode))"
5065   "@
5066    #
5067    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5068    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5069   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5070                      neon_load<nregs>_<nregs>reg_q")
5071    (set_attr "length" "<insn_count>,4,4")]
5074 (define_insn "aarch64_be_ld1<mode>"
5075   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5076         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5077                              "aarch64_simd_struct_operand" "Utv")]
5078         UNSPEC_LD1))]
5079   "TARGET_SIMD"
5080   "ld1\\t{%0<Vmtype>}, %1"
5081   [(set_attr "type" "neon_load1_1reg<q>")]
5084 (define_insn "aarch64_be_st1<mode>"
5085   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5086         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5087         UNSPEC_ST1))]
5088   "TARGET_SIMD"
5089   "st1\\t{%1<Vmtype>}, %0"
5090   [(set_attr "type" "neon_store1_1reg<q>")]
5093 (define_insn "*aarch64_be_movoi"
5094   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5095         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5096   "TARGET_SIMD && BYTES_BIG_ENDIAN
5097    && (register_operand (operands[0], OImode)
5098        || register_operand (operands[1], OImode))"
5099   "@
5100    #
5101    stp\\t%q1, %R1, %0
5102    ldp\\t%q0, %R0, %1"
5103   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5104    (set_attr "length" "8,4,4")]
5107 (define_insn "*aarch64_be_movci"
5108   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5109         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5110   "TARGET_SIMD && BYTES_BIG_ENDIAN
5111    && (register_operand (operands[0], CImode)
5112        || register_operand (operands[1], CImode))"
5113   "#"
5114   [(set_attr "type" "multiple")
5115    (set_attr "length" "12,4,4")]
5118 (define_insn "*aarch64_be_movxi"
5119   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5120         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5121   "TARGET_SIMD && BYTES_BIG_ENDIAN
5122    && (register_operand (operands[0], XImode)
5123        || register_operand (operands[1], XImode))"
5124   "#"
5125   [(set_attr "type" "multiple")
5126    (set_attr "length" "16,4,4")]
5129 (define_split
5130   [(set (match_operand:OI 0 "register_operand")
5131         (match_operand:OI 1 "register_operand"))]
5132   "TARGET_SIMD && reload_completed"
5133   [(const_int 0)]
5135   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5136   DONE;
5139 (define_split
5140   [(set (match_operand:CI 0 "nonimmediate_operand")
5141         (match_operand:CI 1 "general_operand"))]
5142   "TARGET_SIMD && reload_completed"
5143   [(const_int 0)]
5145   if (register_operand (operands[0], CImode)
5146       && register_operand (operands[1], CImode))
5147     {
5148       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5149       DONE;
5150     }
5151   else if (BYTES_BIG_ENDIAN)
5152     {
5153       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5154                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5155       emit_move_insn (gen_lowpart (V16QImode,
5156                                    simplify_gen_subreg (TImode, operands[0],
5157                                                         CImode, 32)),
5158                       gen_lowpart (V16QImode,
5159                                    simplify_gen_subreg (TImode, operands[1],
5160                                                         CImode, 32)));
5161       DONE;
5162     }
5163   else
5164     FAIL;
5167 (define_split
5168   [(set (match_operand:XI 0 "nonimmediate_operand")
5169         (match_operand:XI 1 "general_operand"))]
5170   "TARGET_SIMD && reload_completed"
5171   [(const_int 0)]
5173   if (register_operand (operands[0], XImode)
5174       && register_operand (operands[1], XImode))
5175     {
5176       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5177       DONE;
5178     }
5179   else if (BYTES_BIG_ENDIAN)
5180     {
5181       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5182                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5183       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5184                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5185       DONE;
5186     }
5187   else
5188     FAIL;
5191 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5192   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5193    (match_operand:DI 1 "register_operand" "w")
5194    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5195   "TARGET_SIMD"
5197   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5198   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5199                      * <VSTRUCT:nregs>);
5201   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5202                                                                 mem));
5203   DONE;
5206 (define_insn "aarch64_ld2<mode>_dreg"
5207   [(set (match_operand:OI 0 "register_operand" "=w")
5208         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5209                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5210                    UNSPEC_LD2_DREG))]
5211   "TARGET_SIMD"
5212   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5213   [(set_attr "type" "neon_load2_2reg<q>")]
5216 (define_insn "aarch64_ld2<mode>_dreg"
5217   [(set (match_operand:OI 0 "register_operand" "=w")
5218         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5219                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5220                    UNSPEC_LD2_DREG))]
5221   "TARGET_SIMD"
5222   "ld1\\t{%S0.1d - %T0.1d}, %1"
5223   [(set_attr "type" "neon_load1_2reg<q>")]
5226 (define_insn "aarch64_ld3<mode>_dreg"
5227   [(set (match_operand:CI 0 "register_operand" "=w")
5228         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5229                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5230                    UNSPEC_LD3_DREG))]
5231   "TARGET_SIMD"
5232   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5233   [(set_attr "type" "neon_load3_3reg<q>")]
5236 (define_insn "aarch64_ld3<mode>_dreg"
5237   [(set (match_operand:CI 0 "register_operand" "=w")
5238         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5239                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5240                    UNSPEC_LD3_DREG))]
5241   "TARGET_SIMD"
5242   "ld1\\t{%S0.1d - %U0.1d}, %1"
5243   [(set_attr "type" "neon_load1_3reg<q>")]
5246 (define_insn "aarch64_ld4<mode>_dreg"
5247   [(set (match_operand:XI 0 "register_operand" "=w")
5248         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5249                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5250                    UNSPEC_LD4_DREG))]
5251   "TARGET_SIMD"
5252   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5253   [(set_attr "type" "neon_load4_4reg<q>")]
5256 (define_insn "aarch64_ld4<mode>_dreg"
5257   [(set (match_operand:XI 0 "register_operand" "=w")
5258         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5259                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5260                    UNSPEC_LD4_DREG))]
5261   "TARGET_SIMD"
5262   "ld1\\t{%S0.1d - %V0.1d}, %1"
5263   [(set_attr "type" "neon_load1_4reg<q>")]
5266 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5267  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5268   (match_operand:DI 1 "register_operand" "r")
5269   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5270   "TARGET_SIMD"
5272   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5273   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5275   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5276   DONE;
5279 (define_expand "aarch64_ld1<VALL_F16:mode>"
5280  [(match_operand:VALL_F16 0 "register_operand")
5281   (match_operand:DI 1 "register_operand")]
5282   "TARGET_SIMD"
5284   machine_mode mode = <VALL_F16:MODE>mode;
5285   rtx mem = gen_rtx_MEM (mode, operands[1]);
5287   if (BYTES_BIG_ENDIAN)
5288     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5289   else
5290     emit_move_insn (operands[0], mem);
5291   DONE;
5294 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5295  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5296   (match_operand:DI 1 "register_operand" "r")
5297   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5298   "TARGET_SIMD"
5300   machine_mode mode = <VSTRUCT:MODE>mode;
5301   rtx mem = gen_rtx_MEM (mode, operands[1]);
5303   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5304   DONE;
5307 (define_expand "aarch64_ld1x2<VQ:mode>"
5308  [(match_operand:OI 0 "register_operand" "=w")
5309   (match_operand:DI 1 "register_operand" "r")
5310   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5311   "TARGET_SIMD"
5313   machine_mode mode = OImode;
5314   rtx mem = gen_rtx_MEM (mode, operands[1]);
5316   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5317   DONE;
5320 (define_expand "aarch64_ld1x2<VDC:mode>"
5321  [(match_operand:OI 0 "register_operand" "=w")
5322   (match_operand:DI 1 "register_operand" "r")
5323   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5324   "TARGET_SIMD"
5326   machine_mode mode = OImode;
5327   rtx mem = gen_rtx_MEM (mode, operands[1]);
5329   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5330   DONE;
5334 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5335   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5336         (match_operand:DI 1 "register_operand" "w")
5337         (match_operand:VSTRUCT 2 "register_operand" "0")
5338         (match_operand:SI 3 "immediate_operand" "i")
5339         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5340   "TARGET_SIMD"
5342   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5343   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5344                      * <VSTRUCT:nregs>);
5346   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5347   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5348         operands[0], mem, operands[2], operands[3]));
5349   DONE;
5352 ;; Expanders for builtins to extract vector registers from large
5353 ;; opaque integer modes.
5355 ;; D-register list.
5357 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5358  [(match_operand:VDC 0 "register_operand" "=w")
5359   (match_operand:VSTRUCT 1 "register_operand" "w")
5360   (match_operand:SI 2 "immediate_operand" "i")]
5361   "TARGET_SIMD"
5363   int part = INTVAL (operands[2]);
5364   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5365   int offset = part * 16;
5367   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5368   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5369   DONE;
5372 ;; Q-register list.
5374 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5375  [(match_operand:VQ 0 "register_operand" "=w")
5376   (match_operand:VSTRUCT 1 "register_operand" "w")
5377   (match_operand:SI 2 "immediate_operand" "i")]
5378   "TARGET_SIMD"
5380   int part = INTVAL (operands[2]);
5381   int offset = part * 16;
5383   emit_move_insn (operands[0],
5384                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5385   DONE;
5388 ;; Permuted-store expanders for neon intrinsics.
5390 ;; Permute instructions
5392 ;; vec_perm support
5394 (define_expand "vec_perm<mode>"
5395   [(match_operand:VB 0 "register_operand")
5396    (match_operand:VB 1 "register_operand")
5397    (match_operand:VB 2 "register_operand")
5398    (match_operand:VB 3 "register_operand")]
5399   "TARGET_SIMD"
5401   aarch64_expand_vec_perm (operands[0], operands[1],
5402                            operands[2], operands[3], <nunits>);
5403   DONE;
5406 (define_insn "aarch64_tbl1<mode>"
5407   [(set (match_operand:VB 0 "register_operand" "=w")
5408         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5409                     (match_operand:VB 2 "register_operand" "w")]
5410                    UNSPEC_TBL))]
5411   "TARGET_SIMD"
5412   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5413   [(set_attr "type" "neon_tbl1<q>")]
5416 ;; Two source registers.
5418 (define_insn "aarch64_tbl2v16qi"
5419   [(set (match_operand:V16QI 0 "register_operand" "=w")
5420         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5421                        (match_operand:V16QI 2 "register_operand" "w")]
5422                       UNSPEC_TBL))]
5423   "TARGET_SIMD"
5424   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5425   [(set_attr "type" "neon_tbl2_q")]
5428 (define_insn "aarch64_tbl3<mode>"
5429   [(set (match_operand:VB 0 "register_operand" "=w")
5430         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5431                       (match_operand:VB 2 "register_operand" "w")]
5432                       UNSPEC_TBL))]
5433   "TARGET_SIMD"
5434   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5435   [(set_attr "type" "neon_tbl3")]
5438 (define_insn "aarch64_tbx4<mode>"
5439   [(set (match_operand:VB 0 "register_operand" "=w")
5440         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5441                       (match_operand:OI 2 "register_operand" "w")
5442                       (match_operand:VB 3 "register_operand" "w")]
5443                       UNSPEC_TBX))]
5444   "TARGET_SIMD"
5445   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5446   [(set_attr "type" "neon_tbl4")]
5449 ;; Three source registers.
5451 (define_insn "aarch64_qtbl3<mode>"
5452   [(set (match_operand:VB 0 "register_operand" "=w")
5453         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5454                       (match_operand:VB 2 "register_operand" "w")]
5455                       UNSPEC_TBL))]
5456   "TARGET_SIMD"
5457   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5458   [(set_attr "type" "neon_tbl3")]
5461 (define_insn "aarch64_qtbx3<mode>"
5462   [(set (match_operand:VB 0 "register_operand" "=w")
5463         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5464                       (match_operand:CI 2 "register_operand" "w")
5465                       (match_operand:VB 3 "register_operand" "w")]
5466                       UNSPEC_TBX))]
5467   "TARGET_SIMD"
5468   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5469   [(set_attr "type" "neon_tbl3")]
5472 ;; Four source registers.
5474 (define_insn "aarch64_qtbl4<mode>"
5475   [(set (match_operand:VB 0 "register_operand" "=w")
5476         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5477                       (match_operand:VB 2 "register_operand" "w")]
5478                       UNSPEC_TBL))]
5479   "TARGET_SIMD"
5480   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5481   [(set_attr "type" "neon_tbl4")]
5484 (define_insn "aarch64_qtbx4<mode>"
5485   [(set (match_operand:VB 0 "register_operand" "=w")
5486         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5487                       (match_operand:XI 2 "register_operand" "w")
5488                       (match_operand:VB 3 "register_operand" "w")]
5489                       UNSPEC_TBX))]
5490   "TARGET_SIMD"
5491   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5492   [(set_attr "type" "neon_tbl4")]
5495 (define_insn_and_split "aarch64_combinev16qi"
5496   [(set (match_operand:OI 0 "register_operand" "=w")
5497         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5498                     (match_operand:V16QI 2 "register_operand" "w")]
5499                    UNSPEC_CONCAT))]
5500   "TARGET_SIMD"
5501   "#"
5502   "&& reload_completed"
5503   [(const_int 0)]
5505   aarch64_split_combinev16qi (operands);
5506   DONE;
5508 [(set_attr "type" "multiple")]
5511 ;; This instruction's pattern is generated directly by
5512 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5513 ;; need corresponding changes there.
5514 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5515   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5516         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5517                           (match_operand:VALL_F16 2 "register_operand" "w")]
5518          PERMUTE))]
5519   "TARGET_SIMD"
5520   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5521   [(set_attr "type" "neon_permute<q>")]
5524 ;; This instruction's pattern is generated directly by
5525 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5526 ;; need corresponding changes there.  Note that the immediate (third)
5527 ;; operand is a lane index not a byte index.
5528 (define_insn "aarch64_ext<mode>"
5529   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5530         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5531                           (match_operand:VALL_F16 2 "register_operand" "w")
5532                           (match_operand:SI 3 "immediate_operand" "i")]
5533          UNSPEC_EXT))]
5534   "TARGET_SIMD"
5536   operands[3] = GEN_INT (INTVAL (operands[3])
5537       * GET_MODE_UNIT_SIZE (<MODE>mode));
5538   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5540   [(set_attr "type" "neon_ext<q>")]
5543 ;; This instruction's pattern is generated directly by
5544 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5545 ;; need corresponding changes there.
5546 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5547   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5548         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5549                     REVERSE))]
5550   "TARGET_SIMD"
5551   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5552   [(set_attr "type" "neon_rev<q>")]
5555 (define_insn "aarch64_st2<mode>_dreg"
5556   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5557         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5558                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5559                    UNSPEC_ST2))]
5560   "TARGET_SIMD"
5561   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5562   [(set_attr "type" "neon_store2_2reg")]
5565 (define_insn "aarch64_st2<mode>_dreg"
5566   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5567         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5568                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5569                    UNSPEC_ST2))]
5570   "TARGET_SIMD"
5571   "st1\\t{%S1.1d - %T1.1d}, %0"
5572   [(set_attr "type" "neon_store1_2reg")]
5575 (define_insn "aarch64_st3<mode>_dreg"
5576   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5577         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5578                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5579                    UNSPEC_ST3))]
5580   "TARGET_SIMD"
5581   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5582   [(set_attr "type" "neon_store3_3reg")]
5585 (define_insn "aarch64_st3<mode>_dreg"
5586   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5587         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5588                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5589                    UNSPEC_ST3))]
5590   "TARGET_SIMD"
5591   "st1\\t{%S1.1d - %U1.1d}, %0"
5592   [(set_attr "type" "neon_store1_3reg")]
5595 (define_insn "aarch64_st4<mode>_dreg"
5596   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5597         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5598                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5599                    UNSPEC_ST4))]
5600   "TARGET_SIMD"
5601   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5602   [(set_attr "type" "neon_store4_4reg")]
5605 (define_insn "aarch64_st4<mode>_dreg"
5606   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5607         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5608                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5609                    UNSPEC_ST4))]
5610   "TARGET_SIMD"
5611   "st1\\t{%S1.1d - %V1.1d}, %0"
5612   [(set_attr "type" "neon_store1_4reg")]
5615 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5616  [(match_operand:DI 0 "register_operand" "r")
5617   (match_operand:VSTRUCT 1 "register_operand" "w")
5618   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5619   "TARGET_SIMD"
5621   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5622   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5624   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5625   DONE;
5628 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5629  [(match_operand:DI 0 "register_operand" "r")
5630   (match_operand:VSTRUCT 1 "register_operand" "w")
5631   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5632   "TARGET_SIMD"
5634   machine_mode mode = <VSTRUCT:MODE>mode;
5635   rtx mem = gen_rtx_MEM (mode, operands[0]);
5637   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5638   DONE;
5641 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5642  [(match_operand:DI 0 "register_operand" "r")
5643   (match_operand:VSTRUCT 1 "register_operand" "w")
5644   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5645   (match_operand:SI 2 "immediate_operand")]
5646   "TARGET_SIMD"
5648   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5649   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5650                      * <VSTRUCT:nregs>);
5652   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5653                 mem, operands[1], operands[2]));
5654   DONE;
5657 (define_expand "aarch64_st1<VALL_F16:mode>"
5658  [(match_operand:DI 0 "register_operand")
5659   (match_operand:VALL_F16 1 "register_operand")]
5660   "TARGET_SIMD"
5662   machine_mode mode = <VALL_F16:MODE>mode;
5663   rtx mem = gen_rtx_MEM (mode, operands[0]);
5665   if (BYTES_BIG_ENDIAN)
5666     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5667   else
5668     emit_move_insn (mem, operands[1]);
5669   DONE;
5672 ;; Expander for builtins to insert vector registers into large
5673 ;; opaque integer modes.
5675 ;; Q-register list.  We don't need a D-reg inserter as we zero
5676 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5678 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5679  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5680   (match_operand:VSTRUCT 1 "register_operand" "0")
5681   (match_operand:VQ 2 "register_operand" "w")
5682   (match_operand:SI 3 "immediate_operand" "i")]
5683   "TARGET_SIMD"
5685   int part = INTVAL (operands[3]);
5686   int offset = part * 16;
5688   emit_move_insn (operands[0], operands[1]);
5689   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5690                   operands[2]);
5691   DONE;
5694 ;; Standard pattern name vec_init<mode><Vel>.
5696 (define_expand "vec_init<mode><Vel>"
5697   [(match_operand:VALL_F16 0 "register_operand" "")
5698    (match_operand 1 "" "")]
5699   "TARGET_SIMD"
5701   aarch64_expand_vector_init (operands[0], operands[1]);
5702   DONE;
5705 (define_insn "*aarch64_simd_ld1r<mode>"
5706   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5707         (vec_duplicate:VALL_F16
5708           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5709   "TARGET_SIMD"
5710   "ld1r\\t{%0.<Vtype>}, %1"
5711   [(set_attr "type" "neon_load1_all_lanes")]
5714 (define_insn "aarch64_simd_ld1<mode>_x2"
5715   [(set (match_operand:OI 0 "register_operand" "=w")
5716         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5717                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5718                    UNSPEC_LD1))]
5719   "TARGET_SIMD"
5720   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5721   [(set_attr "type" "neon_load1_2reg<q>")]
5724 (define_insn "aarch64_simd_ld1<mode>_x2"
5725   [(set (match_operand:OI 0 "register_operand" "=w")
5726         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5727                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5728                    UNSPEC_LD1))]
5729   "TARGET_SIMD"
5730   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5731   [(set_attr "type" "neon_load1_2reg<q>")]
5735 (define_insn "aarch64_frecpe<mode>"
5736   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5737         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5738          UNSPEC_FRECPE))]
5739   "TARGET_SIMD"
5740   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5741   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5744 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5745   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5746         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5747          FRECP))]
5748   "TARGET_SIMD"
5749   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5750   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5753 (define_insn "aarch64_frecps<mode>"
5754   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5755         (unspec:VHSDF_HSDF
5756           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5757           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5758           UNSPEC_FRECPS))]
5759   "TARGET_SIMD"
5760   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5761   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5764 (define_insn "aarch64_urecpe<mode>"
5765   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5766         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5767                 UNSPEC_URECPE))]
5768  "TARGET_SIMD"
5769  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5770   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5772 ;; Standard pattern name vec_extract<mode><Vel>.
5774 (define_expand "vec_extract<mode><Vel>"
5775   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5776    (match_operand:VALL_F16 1 "register_operand" "")
5777    (match_operand:SI 2 "immediate_operand" "")]
5778   "TARGET_SIMD"
5780     emit_insn
5781       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5782     DONE;
5785 ;; aes
5787 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5788   [(set (match_operand:V16QI 0 "register_operand" "=w")
5789         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5790                        (match_operand:V16QI 2 "register_operand" "w")]
5791          CRYPTO_AES))]
5792   "TARGET_SIMD && TARGET_AES"
5793   "aes<aes_op>\\t%0.16b, %2.16b"
5794   [(set_attr "type" "crypto_aese")]
5797 ;; When AES/AESMC fusion is enabled we want the register allocation to
5798 ;; look like:
5799 ;;    AESE Vn, _
5800 ;;    AESMC Vn, Vn
5801 ;; So prefer to tie operand 1 to operand 0 when fusing.
5803 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5804   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5805         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5806          CRYPTO_AESMC))]
5807   "TARGET_SIMD && TARGET_AES"
5808   "aes<aesmc_op>\\t%0.16b, %1.16b"
5809   [(set_attr "type" "crypto_aesmc")
5810    (set_attr_alternative "enabled"
5811      [(if_then_else (match_test
5812                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5813                      (const_string "yes" )
5814                      (const_string "no"))
5815       (const_string "yes")])]
5818 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5819 ;; and enforce the register dependency without scheduling or register
5820 ;; allocation messing up the order or introducing moves inbetween.
5821 ;;  Mash the two together during combine.
5823 (define_insn "*aarch64_crypto_aese_fused"
5824   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5825         (unspec:V16QI
5826           [(unspec:V16QI
5827             [(match_operand:V16QI 1 "register_operand" "0")
5828              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5829           ] UNSPEC_AESMC))]
5830   "TARGET_SIMD && TARGET_AES
5831    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5832   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5833   [(set_attr "type" "crypto_aese")
5834    (set_attr "length" "8")]
5837 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
5838 ;; and enforce the register dependency without scheduling or register
5839 ;; allocation messing up the order or introducing moves inbetween.
5840 ;;  Mash the two together during combine.
5842 (define_insn "*aarch64_crypto_aesd_fused"
5843   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5844         (unspec:V16QI
5845           [(unspec:V16QI
5846             [(match_operand:V16QI 1 "register_operand" "0")
5847              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
5848           ] UNSPEC_AESIMC))]
5849   "TARGET_SIMD && TARGET_AES
5850    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5851   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
5852   [(set_attr "type" "crypto_aese")
5853    (set_attr "length" "8")]
5856 ;; sha1
5858 (define_insn "aarch64_crypto_sha1hsi"
5859   [(set (match_operand:SI 0 "register_operand" "=w")
5860         (unspec:SI [(match_operand:SI 1
5861                        "register_operand" "w")]
5862          UNSPEC_SHA1H))]
5863   "TARGET_SIMD && TARGET_SHA2"
5864   "sha1h\\t%s0, %s1"
5865   [(set_attr "type" "crypto_sha1_fast")]
5868 (define_insn "aarch64_crypto_sha1hv4si"
5869   [(set (match_operand:SI 0 "register_operand" "=w")
5870         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5871                      (parallel [(const_int 0)]))]
5872          UNSPEC_SHA1H))]
5873   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5874   "sha1h\\t%s0, %s1"
5875   [(set_attr "type" "crypto_sha1_fast")]
5878 (define_insn "aarch64_be_crypto_sha1hv4si"
5879   [(set (match_operand:SI 0 "register_operand" "=w")
5880         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5881                      (parallel [(const_int 3)]))]
5882          UNSPEC_SHA1H))]
5883   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5884   "sha1h\\t%s0, %s1"
5885   [(set_attr "type" "crypto_sha1_fast")]
5888 (define_insn "aarch64_crypto_sha1su1v4si"
5889   [(set (match_operand:V4SI 0 "register_operand" "=w")
5890         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5891                       (match_operand:V4SI 2 "register_operand" "w")]
5892          UNSPEC_SHA1SU1))]
5893   "TARGET_SIMD && TARGET_SHA2"
5894   "sha1su1\\t%0.4s, %2.4s"
5895   [(set_attr "type" "crypto_sha1_fast")]
5898 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5899   [(set (match_operand:V4SI 0 "register_operand" "=w")
5900         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5901                       (match_operand:SI 2 "register_operand" "w")
5902                       (match_operand:V4SI 3 "register_operand" "w")]
5903          CRYPTO_SHA1))]
5904   "TARGET_SIMD && TARGET_SHA2"
5905   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5906   [(set_attr "type" "crypto_sha1_slow")]
5909 (define_insn "aarch64_crypto_sha1su0v4si"
5910   [(set (match_operand:V4SI 0 "register_operand" "=w")
5911         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5912                       (match_operand:V4SI 2 "register_operand" "w")
5913                       (match_operand:V4SI 3 "register_operand" "w")]
5914          UNSPEC_SHA1SU0))]
5915   "TARGET_SIMD && TARGET_SHA2"
5916   "sha1su0\\t%0.4s, %2.4s, %3.4s"
5917   [(set_attr "type" "crypto_sha1_xor")]
5920 ;; sha256
5922 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5923   [(set (match_operand:V4SI 0 "register_operand" "=w")
5924         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5925                       (match_operand:V4SI 2 "register_operand" "w")
5926                       (match_operand:V4SI 3 "register_operand" "w")]
5927          CRYPTO_SHA256))]
5928   "TARGET_SIMD && TARGET_SHA2"
5929   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5930   [(set_attr "type" "crypto_sha256_slow")]
5933 (define_insn "aarch64_crypto_sha256su0v4si"
5934   [(set (match_operand:V4SI 0 "register_operand" "=w")
5935         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5936                       (match_operand:V4SI 2 "register_operand" "w")]
5937          UNSPEC_SHA256SU0))]
5938   "TARGET_SIMD && TARGET_SHA2"
5939   "sha256su0\\t%0.4s, %2.4s"
5940   [(set_attr "type" "crypto_sha256_fast")]
5943 (define_insn "aarch64_crypto_sha256su1v4si"
5944   [(set (match_operand:V4SI 0 "register_operand" "=w")
5945         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5946                       (match_operand:V4SI 2 "register_operand" "w")
5947                       (match_operand:V4SI 3 "register_operand" "w")]
5948          UNSPEC_SHA256SU1))]
5949   "TARGET_SIMD && TARGET_SHA2"
5950   "sha256su1\\t%0.4s, %2.4s, %3.4s"
5951   [(set_attr "type" "crypto_sha256_slow")]
5954 ;; sha512
5956 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
5957   [(set (match_operand:V2DI 0 "register_operand" "=w")
5958         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5959                       (match_operand:V2DI 2 "register_operand" "w")
5960                       (match_operand:V2DI 3 "register_operand" "w")]
5961          CRYPTO_SHA512))]
5962   "TARGET_SIMD && TARGET_SHA3"
5963   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
5964   [(set_attr "type" "crypto_sha512")]
5967 (define_insn "aarch64_crypto_sha512su0qv2di"
5968   [(set (match_operand:V2DI 0 "register_operand" "=w")
5969         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5970                       (match_operand:V2DI 2 "register_operand" "w")]
5971          UNSPEC_SHA512SU0))]
5972   "TARGET_SIMD && TARGET_SHA3"
5973   "sha512su0\\t%0.2d, %2.2d"
5974   [(set_attr "type" "crypto_sha512")]
5977 (define_insn "aarch64_crypto_sha512su1qv2di"
5978   [(set (match_operand:V2DI 0 "register_operand" "=w")
5979         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5980                       (match_operand:V2DI 2 "register_operand" "w")
5981                       (match_operand:V2DI 3 "register_operand" "w")]
5982          UNSPEC_SHA512SU1))]
5983   "TARGET_SIMD && TARGET_SHA3"
5984   "sha512su1\\t%0.2d, %2.2d, %3.2d"
5985   [(set_attr "type" "crypto_sha512")]
5988 ;; sha3
5990 (define_insn "eor3q<mode>4"
5991   [(set (match_operand:VQ_I 0 "register_operand" "=w")
5992         (xor:VQ_I
5993          (xor:VQ_I
5994           (match_operand:VQ_I 2 "register_operand" "w")
5995           (match_operand:VQ_I 3 "register_operand" "w"))
5996          (match_operand:VQ_I 1 "register_operand" "w")))]
5997   "TARGET_SIMD && TARGET_SHA3"
5998   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
5999   [(set_attr "type" "crypto_sha3")]
6002 (define_insn "aarch64_rax1qv2di"
6003   [(set (match_operand:V2DI 0 "register_operand" "=w")
6004         (xor:V2DI
6005          (rotate:V2DI
6006           (match_operand:V2DI 2 "register_operand" "w")
6007           (const_int 1))
6008          (match_operand:V2DI 1 "register_operand" "w")))]
6009   "TARGET_SIMD && TARGET_SHA3"
6010   "rax1\\t%0.2d, %1.2d, %2.2d"
6011   [(set_attr "type" "crypto_sha3")]
6014 (define_insn "aarch64_xarqv2di"
6015   [(set (match_operand:V2DI 0 "register_operand" "=w")
6016         (rotatert:V2DI
6017          (xor:V2DI
6018           (match_operand:V2DI 1 "register_operand" "%w")
6019           (match_operand:V2DI 2 "register_operand" "w"))
6020          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6021   "TARGET_SIMD && TARGET_SHA3"
6022   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6023   [(set_attr "type" "crypto_sha3")]
6026 (define_insn "bcaxq<mode>4"
6027   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6028         (xor:VQ_I
6029          (and:VQ_I
6030           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6031           (match_operand:VQ_I 2 "register_operand" "w"))
6032          (match_operand:VQ_I 1 "register_operand" "w")))]
6033   "TARGET_SIMD && TARGET_SHA3"
6034   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6035   [(set_attr "type" "crypto_sha3")]
6038 ;; SM3
6040 (define_insn "aarch64_sm3ss1qv4si"
6041   [(set (match_operand:V4SI 0 "register_operand" "=w")
6042         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6043                       (match_operand:V4SI 2 "register_operand" "w")
6044                       (match_operand:V4SI 3 "register_operand" "w")]
6045          UNSPEC_SM3SS1))]
6046   "TARGET_SIMD && TARGET_SM4"
6047   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6048   [(set_attr "type" "crypto_sm3")]
6052 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6053   [(set (match_operand:V4SI 0 "register_operand" "=w")
6054         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6055                       (match_operand:V4SI 2 "register_operand" "w")
6056                       (match_operand:V4SI 3 "register_operand" "w")
6057                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6058          CRYPTO_SM3TT))]
6059   "TARGET_SIMD && TARGET_SM4"
6060   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6061   [(set_attr "type" "crypto_sm3")]
6064 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6065   [(set (match_operand:V4SI 0 "register_operand" "=w")
6066         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6067                       (match_operand:V4SI 2 "register_operand" "w")
6068                       (match_operand:V4SI 3 "register_operand" "w")]
6069          CRYPTO_SM3PART))]
6070   "TARGET_SIMD && TARGET_SM4"
6071   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6072   [(set_attr "type" "crypto_sm3")]
6075 ;; SM4
6077 (define_insn "aarch64_sm4eqv4si"
6078   [(set (match_operand:V4SI 0 "register_operand" "=w")
6079         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6080                       (match_operand:V4SI 2 "register_operand" "w")]
6081          UNSPEC_SM4E))]
6082   "TARGET_SIMD && TARGET_SM4"
6083   "sm4e\\t%0.4s, %2.4s"
6084   [(set_attr "type" "crypto_sm4")]
6087 (define_insn "aarch64_sm4ekeyqv4si"
6088   [(set (match_operand:V4SI 0 "register_operand" "=w")
6089         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6090                       (match_operand:V4SI 2 "register_operand" "w")]
6091          UNSPEC_SM4EKEY))]
6092   "TARGET_SIMD && TARGET_SM4"
6093   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6094   [(set_attr "type" "crypto_sm4")]
6097 ;; fp16fml
6099 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6100   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6101         (unspec:VDQSF
6102          [(match_operand:VDQSF 1 "register_operand" "0")
6103           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6104           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6105          VFMLA16_LOW))]
6106   "TARGET_F16FML"
6108   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6109                                             <nunits> * 2, false);
6110   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6111                                             <nunits> * 2, false);
6113   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6114                                                                 operands[1],
6115                                                                 operands[2],
6116                                                                 operands[3],
6117                                                                 p1, p2));
6118   DONE;
6122 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6123   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6124         (unspec:VDQSF
6125          [(match_operand:VDQSF 1 "register_operand" "0")
6126           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6127           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6128          VFMLA16_HIGH))]
6129   "TARGET_F16FML"
6131   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6132   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6134   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6135                                                                  operands[1],
6136                                                                  operands[2],
6137                                                                  operands[3],
6138                                                                  p1, p2));
6139   DONE;
6142 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6143   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6144         (fma:VDQSF
6145          (float_extend:VDQSF
6146           (vec_select:<VFMLA_SEL_W>
6147            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6148            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6149          (float_extend:VDQSF
6150           (vec_select:<VFMLA_SEL_W>
6151            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6152            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6153          (match_operand:VDQSF 1 "register_operand" "0")))]
6154   "TARGET_F16FML"
6155   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6156   [(set_attr "type" "neon_fp_mul_s")]
6159 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6160   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6161         (fma:VDQSF
6162          (float_extend:VDQSF
6163           (neg:<VFMLA_SEL_W>
6164            (vec_select:<VFMLA_SEL_W>
6165             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6166             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6167          (float_extend:VDQSF
6168           (vec_select:<VFMLA_SEL_W>
6169            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6170            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6171          (match_operand:VDQSF 1 "register_operand" "0")))]
6172   "TARGET_F16FML"
6173   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6174   [(set_attr "type" "neon_fp_mul_s")]
6177 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6178   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6179         (fma:VDQSF
6180          (float_extend:VDQSF
6181           (vec_select:<VFMLA_SEL_W>
6182            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6183            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6184          (float_extend:VDQSF
6185           (vec_select:<VFMLA_SEL_W>
6186            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6187            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6188          (match_operand:VDQSF 1 "register_operand" "0")))]
6189   "TARGET_F16FML"
6190   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6191   [(set_attr "type" "neon_fp_mul_s")]
6194 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6195   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6196         (fma:VDQSF
6197          (float_extend:VDQSF
6198           (neg:<VFMLA_SEL_W>
6199            (vec_select:<VFMLA_SEL_W>
6200             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6201             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6202          (float_extend:VDQSF
6203           (vec_select:<VFMLA_SEL_W>
6204            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6205            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6206          (match_operand:VDQSF 1 "register_operand" "0")))]
6207   "TARGET_F16FML"
6208   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6209   [(set_attr "type" "neon_fp_mul_s")]
6212 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6213   [(set (match_operand:V2SF 0 "register_operand" "")
6214         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6215                            (match_operand:V4HF 2 "register_operand" "")
6216                            (match_operand:V4HF 3 "register_operand" "")
6217                            (match_operand:SI 4 "aarch64_imm2" "")]
6218          VFMLA16_LOW))]
6219   "TARGET_F16FML"
6221     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6222     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6224     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6225                                                             operands[1],
6226                                                             operands[2],
6227                                                             operands[3],
6228                                                             p1, lane));
6229     DONE;
6233 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6234   [(set (match_operand:V2SF 0 "register_operand" "")
6235         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6236                            (match_operand:V4HF 2 "register_operand" "")
6237                            (match_operand:V4HF 3 "register_operand" "")
6238                            (match_operand:SI 4 "aarch64_imm2" "")]
6239          VFMLA16_HIGH))]
6240   "TARGET_F16FML"
6242     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6243     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6245     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6246                                                              operands[1],
6247                                                              operands[2],
6248                                                              operands[3],
6249                                                              p1, lane));
6250     DONE;
6253 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6254   [(set (match_operand:V2SF 0 "register_operand" "=w")
6255         (fma:V2SF
6256          (float_extend:V2SF
6257            (vec_select:V2HF
6258             (match_operand:V4HF 2 "register_operand" "w")
6259             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6260          (float_extend:V2SF
6261            (vec_duplicate:V2HF
6262             (vec_select:HF
6263              (match_operand:V4HF 3 "register_operand" "x")
6264              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6265          (match_operand:V2SF 1 "register_operand" "0")))]
6266   "TARGET_F16FML"
6267   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6268   [(set_attr "type" "neon_fp_mul_s")]
6271 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6272   [(set (match_operand:V2SF 0 "register_operand" "=w")
6273         (fma:V2SF
6274          (float_extend:V2SF
6275           (neg:V2HF
6276            (vec_select:V2HF
6277             (match_operand:V4HF 2 "register_operand" "w")
6278             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6279          (float_extend:V2SF
6280           (vec_duplicate:V2HF
6281            (vec_select:HF
6282             (match_operand:V4HF 3 "register_operand" "x")
6283             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6284          (match_operand:V2SF 1 "register_operand" "0")))]
6285   "TARGET_F16FML"
6286   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6287   [(set_attr "type" "neon_fp_mul_s")]
6290 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6291   [(set (match_operand:V2SF 0 "register_operand" "=w")
6292         (fma:V2SF
6293          (float_extend:V2SF
6294            (vec_select:V2HF
6295             (match_operand:V4HF 2 "register_operand" "w")
6296             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6297          (float_extend:V2SF
6298            (vec_duplicate:V2HF
6299             (vec_select:HF
6300              (match_operand:V4HF 3 "register_operand" "x")
6301              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6302          (match_operand:V2SF 1 "register_operand" "0")))]
6303   "TARGET_F16FML"
6304   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6305   [(set_attr "type" "neon_fp_mul_s")]
6308 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6309   [(set (match_operand:V2SF 0 "register_operand" "=w")
6310         (fma:V2SF
6311          (float_extend:V2SF
6312            (neg:V2HF
6313             (vec_select:V2HF
6314              (match_operand:V4HF 2 "register_operand" "w")
6315              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6316          (float_extend:V2SF
6317            (vec_duplicate:V2HF
6318             (vec_select:HF
6319              (match_operand:V4HF 3 "register_operand" "x")
6320              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6321          (match_operand:V2SF 1 "register_operand" "0")))]
6322   "TARGET_F16FML"
6323   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6324   [(set_attr "type" "neon_fp_mul_s")]
6327 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6328   [(set (match_operand:V4SF 0 "register_operand" "")
6329         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6330                            (match_operand:V8HF 2 "register_operand" "")
6331                            (match_operand:V8HF 3 "register_operand" "")
6332                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6333          VFMLA16_LOW))]
6334   "TARGET_F16FML"
6336     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6337     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6339     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6340                                                               operands[1],
6341                                                               operands[2],
6342                                                               operands[3],
6343                                                               p1, lane));
6344     DONE;
6347 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6348   [(set (match_operand:V4SF 0 "register_operand" "")
6349         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6350                            (match_operand:V8HF 2 "register_operand" "")
6351                            (match_operand:V8HF 3 "register_operand" "")
6352                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6353          VFMLA16_HIGH))]
6354   "TARGET_F16FML"
6356     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6357     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6359     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6360                                                                operands[1],
6361                                                                operands[2],
6362                                                                operands[3],
6363                                                                p1, lane));
6364     DONE;
6367 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6368   [(set (match_operand:V4SF 0 "register_operand" "=w")
6369         (fma:V4SF
6370          (float_extend:V4SF
6371           (vec_select:V4HF
6372             (match_operand:V8HF 2 "register_operand" "w")
6373             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6374          (float_extend:V4SF
6375           (vec_duplicate:V4HF
6376            (vec_select:HF
6377             (match_operand:V8HF 3 "register_operand" "x")
6378             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6379          (match_operand:V4SF 1 "register_operand" "0")))]
6380   "TARGET_F16FML"
6381   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6382   [(set_attr "type" "neon_fp_mul_s")]
6385 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6386   [(set (match_operand:V4SF 0 "register_operand" "=w")
6387         (fma:V4SF
6388           (float_extend:V4SF
6389            (neg:V4HF
6390             (vec_select:V4HF
6391              (match_operand:V8HF 2 "register_operand" "w")
6392              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6393          (float_extend:V4SF
6394           (vec_duplicate:V4HF
6395            (vec_select:HF
6396             (match_operand:V8HF 3 "register_operand" "x")
6397             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6398          (match_operand:V4SF 1 "register_operand" "0")))]
6399   "TARGET_F16FML"
6400   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6401   [(set_attr "type" "neon_fp_mul_s")]
6404 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6405   [(set (match_operand:V4SF 0 "register_operand" "=w")
6406         (fma:V4SF
6407          (float_extend:V4SF
6408           (vec_select:V4HF
6409             (match_operand:V8HF 2 "register_operand" "w")
6410             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6411          (float_extend:V4SF
6412           (vec_duplicate:V4HF
6413            (vec_select:HF
6414             (match_operand:V8HF 3 "register_operand" "x")
6415             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6416          (match_operand:V4SF 1 "register_operand" "0")))]
6417   "TARGET_F16FML"
6418   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6419   [(set_attr "type" "neon_fp_mul_s")]
6422 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6423   [(set (match_operand:V4SF 0 "register_operand" "=w")
6424         (fma:V4SF
6425          (float_extend:V4SF
6426           (neg:V4HF
6427            (vec_select:V4HF
6428             (match_operand:V8HF 2 "register_operand" "w")
6429             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6430          (float_extend:V4SF
6431           (vec_duplicate:V4HF
6432            (vec_select:HF
6433             (match_operand:V8HF 3 "register_operand" "x")
6434             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6435          (match_operand:V4SF 1 "register_operand" "0")))]
6436   "TARGET_F16FML"
6437   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6438   [(set_attr "type" "neon_fp_mul_s")]
6441 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6442   [(set (match_operand:V2SF 0 "register_operand" "")
6443         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6444                       (match_operand:V4HF 2 "register_operand" "")
6445                       (match_operand:V8HF 3 "register_operand" "")
6446                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6447          VFMLA16_LOW))]
6448   "TARGET_F16FML"
6450     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6451     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6453     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6454                                                              operands[1],
6455                                                              operands[2],
6456                                                              operands[3],
6457                                                              p1, lane));
6458     DONE;
6462 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6463   [(set (match_operand:V2SF 0 "register_operand" "")
6464         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6465                       (match_operand:V4HF 2 "register_operand" "")
6466                       (match_operand:V8HF 3 "register_operand" "")
6467                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6468          VFMLA16_HIGH))]
6469   "TARGET_F16FML"
6471     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6472     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6474     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6475                                                               operands[1],
6476                                                               operands[2],
6477                                                               operands[3],
6478                                                               p1, lane));
6479     DONE;
6483 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6484   [(set (match_operand:V2SF 0 "register_operand" "=w")
6485         (fma:V2SF
6486          (float_extend:V2SF
6487            (vec_select:V2HF
6488             (match_operand:V4HF 2 "register_operand" "w")
6489             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6490          (float_extend:V2SF
6491           (vec_duplicate:V2HF
6492            (vec_select:HF
6493             (match_operand:V8HF 3 "register_operand" "x")
6494             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6495          (match_operand:V2SF 1 "register_operand" "0")))]
6496   "TARGET_F16FML"
6497   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6498   [(set_attr "type" "neon_fp_mul_s")]
6501 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6502   [(set (match_operand:V2SF 0 "register_operand" "=w")
6503         (fma:V2SF
6504          (float_extend:V2SF
6505           (neg:V2HF
6506            (vec_select:V2HF
6507             (match_operand:V4HF 2 "register_operand" "w")
6508             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6509          (float_extend:V2SF
6510           (vec_duplicate:V2HF
6511            (vec_select:HF
6512             (match_operand:V8HF 3 "register_operand" "x")
6513             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6514          (match_operand:V2SF 1 "register_operand" "0")))]
6515   "TARGET_F16FML"
6516   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6517   [(set_attr "type" "neon_fp_mul_s")]
6520 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6521   [(set (match_operand:V2SF 0 "register_operand" "=w")
6522         (fma:V2SF
6523          (float_extend:V2SF
6524            (vec_select:V2HF
6525             (match_operand:V4HF 2 "register_operand" "w")
6526             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6527          (float_extend:V2SF
6528           (vec_duplicate:V2HF
6529            (vec_select:HF
6530             (match_operand:V8HF 3 "register_operand" "x")
6531             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6532          (match_operand:V2SF 1 "register_operand" "0")))]
6533   "TARGET_F16FML"
6534   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6535   [(set_attr "type" "neon_fp_mul_s")]
6538 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6539   [(set (match_operand:V2SF 0 "register_operand" "=w")
6540         (fma:V2SF
6541          (float_extend:V2SF
6542           (neg:V2HF
6543            (vec_select:V2HF
6544             (match_operand:V4HF 2 "register_operand" "w")
6545             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6546          (float_extend:V2SF
6547           (vec_duplicate:V2HF
6548            (vec_select:HF
6549             (match_operand:V8HF 3 "register_operand" "x")
6550             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6551          (match_operand:V2SF 1 "register_operand" "0")))]
6552   "TARGET_F16FML"
6553   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6554   [(set_attr "type" "neon_fp_mul_s")]
6557 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6558   [(set (match_operand:V4SF 0 "register_operand" "")
6559         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6560                       (match_operand:V8HF 2 "register_operand" "")
6561                       (match_operand:V4HF 3 "register_operand" "")
6562                       (match_operand:SI 4 "aarch64_imm2" "")]
6563          VFMLA16_LOW))]
6564   "TARGET_F16FML"
6566     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6567     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6569     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6570                                                              operands[1],
6571                                                              operands[2],
6572                                                              operands[3],
6573                                                              p1, lane));
6574     DONE;
6577 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6578   [(set (match_operand:V4SF 0 "register_operand" "")
6579         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6580                       (match_operand:V8HF 2 "register_operand" "")
6581                       (match_operand:V4HF 3 "register_operand" "")
6582                       (match_operand:SI 4 "aarch64_imm2" "")]
6583          VFMLA16_HIGH))]
6584   "TARGET_F16FML"
6586     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6587     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6589     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6590                                                               operands[1],
6591                                                               operands[2],
6592                                                               operands[3],
6593                                                               p1, lane));
6594     DONE;
6597 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6598   [(set (match_operand:V4SF 0 "register_operand" "=w")
6599         (fma:V4SF
6600          (float_extend:V4SF
6601           (vec_select:V4HF
6602            (match_operand:V8HF 2 "register_operand" "w")
6603            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6604          (float_extend:V4SF
6605           (vec_duplicate:V4HF
6606            (vec_select:HF
6607             (match_operand:V4HF 3 "register_operand" "x")
6608             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6609          (match_operand:V4SF 1 "register_operand" "0")))]
6610   "TARGET_F16FML"
6611   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6612   [(set_attr "type" "neon_fp_mul_s")]
6615 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6616   [(set (match_operand:V4SF 0 "register_operand" "=w")
6617         (fma:V4SF
6618          (float_extend:V4SF
6619           (neg:V4HF
6620            (vec_select:V4HF
6621             (match_operand:V8HF 2 "register_operand" "w")
6622             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6623          (float_extend:V4SF
6624           (vec_duplicate:V4HF
6625            (vec_select:HF
6626             (match_operand:V4HF 3 "register_operand" "x")
6627             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6628          (match_operand:V4SF 1 "register_operand" "0")))]
6629   "TARGET_F16FML"
6630   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6631   [(set_attr "type" "neon_fp_mul_s")]
6634 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6635   [(set (match_operand:V4SF 0 "register_operand" "=w")
6636         (fma:V4SF
6637          (float_extend:V4SF
6638           (vec_select:V4HF
6639            (match_operand:V8HF 2 "register_operand" "w")
6640            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6641          (float_extend:V4SF
6642           (vec_duplicate:V4HF
6643            (vec_select:HF
6644             (match_operand:V4HF 3 "register_operand" "x")
6645             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6646          (match_operand:V4SF 1 "register_operand" "0")))]
6647   "TARGET_F16FML"
6648   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6649   [(set_attr "type" "neon_fp_mul_s")]
6652 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6653   [(set (match_operand:V4SF 0 "register_operand" "=w")
6654         (fma:V4SF
6655          (float_extend:V4SF
6656           (neg:V4HF
6657            (vec_select:V4HF
6658             (match_operand:V8HF 2 "register_operand" "w")
6659             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6660          (float_extend:V4SF
6661           (vec_duplicate:V4HF
6662            (vec_select:HF
6663             (match_operand:V4HF 3 "register_operand" "x")
6664             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6665          (match_operand:V4SF 1 "register_operand" "0")))]
6666   "TARGET_F16FML"
6667   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6668   [(set_attr "type" "neon_fp_mul_s")]
6671 ;; pmull
6673 (define_insn "aarch64_crypto_pmulldi"
6674   [(set (match_operand:TI 0 "register_operand" "=w")
6675         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6676                      (match_operand:DI 2 "register_operand" "w")]
6677                     UNSPEC_PMULL))]
6678  "TARGET_SIMD && TARGET_AES"
6679  "pmull\\t%0.1q, %1.1d, %2.1d"
6680   [(set_attr "type" "crypto_pmull")]
6683 (define_insn "aarch64_crypto_pmullv2di"
6684  [(set (match_operand:TI 0 "register_operand" "=w")
6685        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6686                    (match_operand:V2DI 2 "register_operand" "w")]
6687                   UNSPEC_PMULL2))]
6688   "TARGET_SIMD && TARGET_AES"
6689   "pmull2\\t%0.1q, %1.2d, %2.2d"
6690   [(set_attr "type" "crypto_pmull")]