Fix version check for ATTRIBUTE_GCC_DUMP_PRINTF
[official-gcc.git] / gcc / config / aarch64 / aarch64-simd.md
blobc4be3101fdec930707918106cd7c53cf7584553e
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3.  If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23         (match_operand:VALL_F16 1 "general_operand" ""))]
24   "TARGET_SIMD"
25   "
26   /* Force the operand into a register if it is not an
27      immediate whose use can be replaced with xzr.
28      If the mode is 16 bytes wide, then we will be doing
29      a stp in DI mode, so we check the validity of that.
30      If the mode is 8 bytes wide, then we will do doing a
31      normal str, so the check need not apply.  */
32   if (GET_CODE (operands[0]) == MEM
33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35                 && aarch64_mem_pair_operand (operands[0], DImode))
36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37       operands[1] = force_reg (<MODE>mode, operands[1]);
38   "
41 (define_expand "movmisalign<mode>"
42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43         (match_operand:VALL 1 "general_operand" ""))]
44   "TARGET_SIMD"
46   /* This pattern is not permitted to fail during expansion: if both arguments
47      are non-registers (e.g. memory := constant, which can be created by the
48      auto-vectorizer), force operand 1 into a register.  */
49   if (!register_operand (operands[0], <MODE>mode)
50       && !register_operand (operands[1], <MODE>mode))
51     operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56         (vec_duplicate:VDQ_I
57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58   "TARGET_SIMD"
59   "@
60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
61    dup\\t%0.<Vtype>, %<vw>1"
62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67         (vec_duplicate:VDQF_F16
68           (match_operand:<VEL> 1 "register_operand" "w")))]
69   "TARGET_SIMD"
70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71   [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76         (vec_duplicate:VALL_F16
77           (vec_select:<VEL>
78             (match_operand:VALL_F16 1 "register_operand" "w")
79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80           )))]
81   "TARGET_SIMD"
82   {
83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85   }
86   [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91         (vec_duplicate:VALL_F16_NO_V2Q
92           (vec_select:<VEL>
93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95           )))]
96   "TARGET_SIMD"
97   {
98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100   }
101   [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105   [(set (match_operand:VD 0 "nonimmediate_operand"
106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
107         (match_operand:VD 1 "general_operand"
108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
109   "TARGET_SIMD
110    && (register_operand (operands[0], <MODE>mode)
111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113    switch (which_alternative)
114      {
115      case 0: return "ldr\t%d0, %1";
116      case 1: return "str\txzr, %0";
117      case 2: return "str\t%d1, %0";
118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119      case 4: return "umov\t%0, %1.d[0]";
120      case 5: return "fmov\t%d0, %1";
121      case 6: return "mov\t%0, %1";
122      case 7:
123         return aarch64_output_simd_mov_immediate (operands[1], 64);
124      default: gcc_unreachable ();
125      }
127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
129                      mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133   [(set (match_operand:VQ 0 "nonimmediate_operand"
134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
135         (match_operand:VQ 1 "general_operand"
136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
137   "TARGET_SIMD
138    && (register_operand (operands[0], <MODE>mode)
139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141   switch (which_alternative)
142     {
143     case 0:
144         return "ldr\t%q0, %1";
145     case 1:
146         return "stp\txzr, xzr, %0";
147     case 2:
148         return "str\t%q1, %0";
149     case 3:
150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151     case 4:
152     case 5:
153     case 6:
154         return "#";
155     case 7:
156         return aarch64_output_simd_mov_immediate (operands[1], 128);
157     default:
158         gcc_unreachable ();
159     }
161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162                      neon_logic<q>, multiple, multiple,\
163                      multiple, neon_move<q>")
164    (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
170 (define_insn "aarch64_store_lane0<mode>"
171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
174   "TARGET_SIMD
175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176   "str\\t%<Vetype>1, %0"
177   [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181   [(set (match_operand:DREG 0 "register_operand" "=w")
182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183    (set (match_operand:DREG2 2 "register_operand" "=w")
184         (match_operand:DREG2 3 "memory_operand" "m"))]
185   "TARGET_SIMD
186    && rtx_equal_p (XEXP (operands[3], 0),
187                    plus_constant (Pmode,
188                                   XEXP (operands[1], 0),
189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
190   "ldp\\t%d0, %d2, %1"
191   [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196         (match_operand:DREG 1 "register_operand" "w"))
197    (set (match_operand:DREG2 2 "memory_operand" "=m")
198         (match_operand:DREG2 3 "register_operand" "w"))]
199   "TARGET_SIMD
200    && rtx_equal_p (XEXP (operands[2], 0),
201                    plus_constant (Pmode,
202                                   XEXP (operands[0], 0),
203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
204   "stp\\t%d1, %d3, %0"
205   [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209   [(set (match_operand:VQ 0 "register_operand" "=w")
210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211    (set (match_operand:VQ2 2 "register_operand" "=w")
212         (match_operand:VQ2 3 "memory_operand" "m"))]
213   "TARGET_SIMD
214     && rtx_equal_p (XEXP (operands[3], 0),
215                     plus_constant (Pmode,
216                                XEXP (operands[1], 0),
217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
218   "ldp\\t%q0, %q2, %1"
219   [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224         (match_operand:VQ 1 "register_operand" "w"))
225    (set (match_operand:VQ2 2 "memory_operand" "=m")
226         (match_operand:VQ2 3 "register_operand" "w"))]
227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228                 plus_constant (Pmode,
229                                XEXP (operands[0], 0),
230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
231   "stp\\t%q1, %q3, %0"
232   [(set_attr "type" "neon_stp_q")]
236 (define_split
237   [(set (match_operand:VQ 0 "register_operand" "")
238       (match_operand:VQ 1 "register_operand" ""))]
239   "TARGET_SIMD && reload_completed
240    && GP_REGNUM_P (REGNO (operands[0]))
241    && GP_REGNUM_P (REGNO (operands[1]))"
242   [(const_int 0)]
244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245   DONE;
248 (define_split
249   [(set (match_operand:VQ 0 "register_operand" "")
250         (match_operand:VQ 1 "register_operand" ""))]
251   "TARGET_SIMD && reload_completed
252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254   [(const_int 0)]
256   aarch64_split_simd_move (operands[0], operands[1]);
257   DONE;
260 (define_expand "@aarch64_split_simd_mov<mode>"
261   [(set (match_operand:VQ 0)
262         (match_operand:VQ 1))]
263   "TARGET_SIMD"
264   {
265     rtx dst = operands[0];
266     rtx src = operands[1];
268     if (GP_REGNUM_P (REGNO (src)))
269       {
270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
273         emit_insn
274           (gen_move_lo_quad_<mode> (dst, src_low_part));
275         emit_insn
276           (gen_move_hi_quad_<mode> (dst, src_high_part));
277       }
279     else
280       {
281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
286         emit_insn
287           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
288         emit_insn
289           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
290       }
291     DONE;
292   }
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
297         (vec_select:<VHALF>
298           (match_operand:VQ 1 "register_operand" "w")
299           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300   "TARGET_SIMD && reload_completed"
301   "umov\t%0, %1.d[0]"
302   [(set_attr "type" "neon_to_gp<q>")
303    (set_attr "length" "4")
304   ])
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
308         (vec_select:<VHALF>
309           (match_operand:VQ 1 "register_operand" "w")
310           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311   "TARGET_SIMD && reload_completed"
312   "umov\t%0, %1.d[1]"
313   [(set_attr "type" "neon_to_gp<q>")
314    (set_attr "length" "4")
315   ])
317 (define_insn "orn<mode>3"
318  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320                 (match_operand:VDQ_I 2 "register_operand" "w")))]
321  "TARGET_SIMD"
322  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323   [(set_attr "type" "neon_logic<q>")]
326 (define_insn "bic<mode>3"
327  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329                 (match_operand:VDQ_I 2 "register_operand" "w")))]
330  "TARGET_SIMD"
331  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332   [(set_attr "type" "neon_logic<q>")]
335 (define_insn "add<mode>3"
336   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338                   (match_operand:VDQ_I 2 "register_operand" "w")))]
339   "TARGET_SIMD"
340   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341   [(set_attr "type" "neon_add<q>")]
344 (define_insn "sub<mode>3"
345   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347                    (match_operand:VDQ_I 2 "register_operand" "w")))]
348   "TARGET_SIMD"
349   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350   [(set_attr "type" "neon_sub<q>")]
353 (define_insn "mul<mode>3"
354   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
357   "TARGET_SIMD"
358   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359   [(set_attr "type" "neon_mul_<Vetype><q>")]
362 (define_insn "bswap<mode>2"
363   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
365   "TARGET_SIMD"
366   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367   [(set_attr "type" "neon_rev<q>")]
370 (define_insn "aarch64_rbit<mode>"
371   [(set (match_operand:VB 0 "register_operand" "=w")
372         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
373                    UNSPEC_RBIT))]
374   "TARGET_SIMD"
375   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376   [(set_attr "type" "neon_rbit")]
379 (define_expand "ctz<mode>2"
380   [(set (match_operand:VS 0 "register_operand")
381         (ctz:VS (match_operand:VS 1 "register_operand")))]
382   "TARGET_SIMD"
383   {
384      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
386                                              <MODE>mode, 0);
387      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
389      DONE;
390   }
393 (define_expand "xorsign<mode>3"
394   [(match_operand:VHSDF 0 "register_operand")
395    (match_operand:VHSDF 1 "register_operand")
396    (match_operand:VHSDF 2 "register_operand")]
397   "TARGET_SIMD"
400   machine_mode imode = <V_INT_EQUIV>mode;
401   rtx v_bitmask = gen_reg_rtx (imode);
402   rtx op1x = gen_reg_rtx (imode);
403   rtx op2x = gen_reg_rtx (imode);
405   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
408   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
410   emit_move_insn (v_bitmask,
411                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412                                                      HOST_WIDE_INT_M1U << bits));
414   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416   emit_move_insn (operands[0],
417                   lowpart_subreg (<MODE>mode, op1x, imode));
418   DONE;
422 ;; These instructions map to the __builtins for the Dot Product operations.
423 (define_insn "aarch64_<sur>dot<vsi2qi>"
424   [(set (match_operand:VS 0 "register_operand" "=w")
425         (plus:VS (match_operand:VS 1 "register_operand" "0")
426                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
427                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
428                 DOTPROD)))]
429   "TARGET_DOTPROD"
430   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
431   [(set_attr "type" "neon_dot")]
434 ;; These expands map to the Dot Product optab the vectorizer checks for.
435 ;; The auto-vectorizer expects a dot product builtin that also does an
436 ;; accumulation into the provided register.
437 ;; Given the following pattern
439 ;; for (i=0; i<len; i++) {
440 ;;     c = a[i] * b[i];
441 ;;     r += c;
442 ;; }
443 ;; return result;
445 ;; This can be auto-vectorized to
446 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
448 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
449 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
450 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
451 ;; ...
453 ;; and so the vectorizer provides r, in which the result has to be accumulated.
454 (define_expand "<sur>dot_prod<vsi2qi>"
455   [(set (match_operand:VS 0 "register_operand")
456         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
457                             (match_operand:<VSI2QI> 2 "register_operand")]
458                  DOTPROD)
459                 (match_operand:VS 3 "register_operand")))]
460   "TARGET_DOTPROD"
462   emit_insn (
463     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
464                                     operands[2]));
465   emit_insn (gen_rtx_SET (operands[0], operands[3]));
466   DONE;
469 ;; These instructions map to the __builtins for the Dot Product
470 ;; indexed operations.
471 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
472   [(set (match_operand:VS 0 "register_operand" "=w")
473         (plus:VS (match_operand:VS 1 "register_operand" "0")
474                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
475                             (match_operand:V8QI 3 "register_operand" "<h_con>")
476                             (match_operand:SI 4 "immediate_operand" "i")]
477                 DOTPROD)))]
478   "TARGET_DOTPROD"
479   {
480     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
481     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
482   }
483   [(set_attr "type" "neon_dot")]
486 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
487   [(set (match_operand:VS 0 "register_operand" "=w")
488         (plus:VS (match_operand:VS 1 "register_operand" "0")
489                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
490                             (match_operand:V16QI 3 "register_operand" "<h_con>")
491                             (match_operand:SI 4 "immediate_operand" "i")]
492                 DOTPROD)))]
493   "TARGET_DOTPROD"
494   {
495     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
496     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
497   }
498   [(set_attr "type" "neon_dot")]
501 (define_expand "copysign<mode>3"
502   [(match_operand:VHSDF 0 "register_operand")
503    (match_operand:VHSDF 1 "register_operand")
504    (match_operand:VHSDF 2 "register_operand")]
505   "TARGET_FLOAT && TARGET_SIMD"
507   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
508   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
510   emit_move_insn (v_bitmask,
511                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
512                                                      HOST_WIDE_INT_M1U << bits));
513   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
514                                          operands[2], operands[1]));
515   DONE;
519 (define_insn "*aarch64_mul3_elt<mode>"
520  [(set (match_operand:VMUL 0 "register_operand" "=w")
521     (mult:VMUL
522       (vec_duplicate:VMUL
523           (vec_select:<VEL>
524             (match_operand:VMUL 1 "register_operand" "<h_con>")
525             (parallel [(match_operand:SI 2 "immediate_operand")])))
526       (match_operand:VMUL 3 "register_operand" "w")))]
527   "TARGET_SIMD"
528   {
529     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
530     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
531   }
532   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
535 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
536   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
537      (mult:VMUL_CHANGE_NLANES
538        (vec_duplicate:VMUL_CHANGE_NLANES
539           (vec_select:<VEL>
540             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
541             (parallel [(match_operand:SI 2 "immediate_operand")])))
542       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
543   "TARGET_SIMD"
544   {
545     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
546     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
547   }
548   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
551 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
552  [(set (match_operand:VMUL 0 "register_operand" "=w")
553     (mult:VMUL
554       (vec_duplicate:VMUL
555             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
556       (match_operand:VMUL 2 "register_operand" "w")))]
557   "TARGET_SIMD"
558   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
559   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
562 (define_insn "@aarch64_rsqrte<mode>"
563   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
564         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
565                      UNSPEC_RSQRTE))]
566   "TARGET_SIMD"
567   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
568   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
570 (define_insn "@aarch64_rsqrts<mode>"
571   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
572         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
573                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
574          UNSPEC_RSQRTS))]
575   "TARGET_SIMD"
576   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
577   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
579 (define_expand "rsqrt<mode>2"
580   [(set (match_operand:VALLF 0 "register_operand" "=w")
581         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
582                      UNSPEC_RSQRT))]
583   "TARGET_SIMD"
585   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
586   DONE;
589 (define_insn "*aarch64_mul3_elt_to_64v2df"
590   [(set (match_operand:DF 0 "register_operand" "=w")
591      (mult:DF
592        (vec_select:DF
593          (match_operand:V2DF 1 "register_operand" "w")
594          (parallel [(match_operand:SI 2 "immediate_operand")]))
595        (match_operand:DF 3 "register_operand" "w")))]
596   "TARGET_SIMD"
597   {
598     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
599     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
600   }
601   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
604 (define_insn "neg<mode>2"
605   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
607   "TARGET_SIMD"
608   "neg\t%0.<Vtype>, %1.<Vtype>"
609   [(set_attr "type" "neon_neg<q>")]
612 (define_insn "abs<mode>2"
613   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
614         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
615   "TARGET_SIMD"
616   "abs\t%0.<Vtype>, %1.<Vtype>"
617   [(set_attr "type" "neon_abs<q>")]
620 ;; The intrinsic version of integer ABS must not be allowed to
621 ;; combine with any operation with an integerated ABS step, such
622 ;; as SABD.
623 (define_insn "aarch64_abs<mode>"
624   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
625           (unspec:VSDQ_I_DI
626             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
627            UNSPEC_ABS))]
628   "TARGET_SIMD"
629   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
630   [(set_attr "type" "neon_abs<q>")]
633 (define_insn "abd<mode>_3"
634   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
635         (abs:VDQ_BHSI (minus:VDQ_BHSI
636                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
637                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
638   "TARGET_SIMD"
639   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
640   [(set_attr "type" "neon_abd<q>")]
643 (define_insn "aarch64_<sur>abdl2<mode>_3"
644   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
645         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
646                           (match_operand:VDQV_S 2 "register_operand" "w")]
647         ABDL2))]
648   "TARGET_SIMD"
649   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
650   [(set_attr "type" "neon_abd<q>")]
653 (define_insn "aarch64_<sur>abal<mode>_4"
654   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
655         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
656                           (match_operand:VDQV_S 2 "register_operand" "w")
657                          (match_operand:<VDBLW> 3 "register_operand" "0")]
658         ABAL))]
659   "TARGET_SIMD"
660   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
661   [(set_attr "type" "neon_arith_acc<q>")]
664 (define_insn "aarch64_<sur>adalp<mode>_3"
665   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
666         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
667                           (match_operand:<VDBLW> 2 "register_operand" "0")]
668         ADALP))]
669   "TARGET_SIMD"
670   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
671   [(set_attr "type" "neon_reduc_add<q>")]
674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
675 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
676 ;; reduction of the difference into a V4SI vector and accumulate that into
677 ;; operand 3 before copying that into the result operand 0.
678 ;; Perform that with a sequence of:
679 ;; UABDL2       tmp.8h, op1.16b, op2.16b
680 ;; UABAL        tmp.8h, op1.16b, op2.16b
681 ;; UADALP       op3.4s, tmp.8h
682 ;; MOV          op0, op3 // should be eliminated in later passes.
683 ;; The signed version just uses the signed variants of the above instructions.
685 (define_expand "<sur>sadv16qi"
686   [(use (match_operand:V4SI 0 "register_operand"))
687    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
688                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
689    (use (match_operand:V4SI 3 "register_operand"))]
690   "TARGET_SIMD"
691   {
692     rtx reduc = gen_reg_rtx (V8HImode);
693     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
694                                                operands[2]));
695     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
696                                               operands[2], reduc));
697     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
698                                               operands[3]));
699     emit_move_insn (operands[0], operands[3]);
700     DONE;
701   }
704 (define_insn "aba<mode>_3"
705   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
706         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
707                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
708                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
709                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
710   "TARGET_SIMD"
711   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
712   [(set_attr "type" "neon_arith_acc<q>")]
715 (define_insn "fabd<mode>3"
716   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
717         (abs:VHSDF_HSDF
718           (minus:VHSDF_HSDF
719             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
720             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
721   "TARGET_SIMD"
722   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
723   [(set_attr "type" "neon_fp_abd_<stype><q>")]
726 ;; For AND (vector, register) and BIC (vector, immediate)
727 (define_insn "and<mode>3"
728   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
729         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
730                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
731   "TARGET_SIMD"
732   {
733     switch (which_alternative)
734       {
735       case 0:
736         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
737       case 1:
738         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
739                                                   AARCH64_CHECK_BIC);
740       default:
741         gcc_unreachable ();
742       }
743   }
744   [(set_attr "type" "neon_logic<q>")]
747 ;; For ORR (vector, register) and ORR (vector, immediate)
748 (define_insn "ior<mode>3"
749   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
750         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
751                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
752   "TARGET_SIMD"
753   {
754     switch (which_alternative)
755       {
756       case 0:
757         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
758       case 1:
759         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
760                                                   AARCH64_CHECK_ORR);
761       default:
762         gcc_unreachable ();
763       }
764   }
765   [(set_attr "type" "neon_logic<q>")]
768 (define_insn "xor<mode>3"
769   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
770         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
771                  (match_operand:VDQ_I 2 "register_operand" "w")))]
772   "TARGET_SIMD"
773   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
774   [(set_attr "type" "neon_logic<q>")]
777 (define_insn "one_cmpl<mode>2"
778   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
779         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
780   "TARGET_SIMD"
781   "not\t%0.<Vbtype>, %1.<Vbtype>"
782   [(set_attr "type" "neon_logic<q>")]
785 (define_insn "aarch64_simd_vec_set<mode>"
786   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
787         (vec_merge:VALL_F16
788             (vec_duplicate:VALL_F16
789                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
790             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
791             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
792   "TARGET_SIMD"
793   {
794    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
795    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
796    switch (which_alternative)
797      {
798      case 0:
799         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
800      case 1:
801         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
802      case 2:
803         return "ld1\\t{%0.<Vetype>}[%p2], %1";
804      default:
805         gcc_unreachable ();
806      }
807   }
808   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
812   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
813         (vec_merge:VALL_F16
814             (vec_duplicate:VALL_F16
815               (vec_select:<VEL>
816                 (match_operand:VALL_F16 3 "register_operand" "w")
817                 (parallel
818                   [(match_operand:SI 4 "immediate_operand" "i")])))
819             (match_operand:VALL_F16 1 "register_operand" "0")
820             (match_operand:SI 2 "immediate_operand" "i")))]
821   "TARGET_SIMD"
822   {
823     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
824     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
825     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
827     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
828   }
829   [(set_attr "type" "neon_ins<q>")]
832 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
833   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
834         (vec_merge:VALL_F16_NO_V2Q
835             (vec_duplicate:VALL_F16_NO_V2Q
836               (vec_select:<VEL>
837                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
838                 (parallel
839                   [(match_operand:SI 4 "immediate_operand" "i")])))
840             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
841             (match_operand:SI 2 "immediate_operand" "i")))]
842   "TARGET_SIMD"
843   {
844     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
845     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
846     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
847                                            INTVAL (operands[4]));
849     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
850   }
851   [(set_attr "type" "neon_ins<q>")]
854 (define_insn "aarch64_simd_lshr<mode>"
855  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
856        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
857                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
858  "TARGET_SIMD"
859  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
860   [(set_attr "type" "neon_shift_imm<q>")]
863 (define_insn "aarch64_simd_ashr<mode>"
864  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
865        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
866                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
867  "TARGET_SIMD"
868  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
869   [(set_attr "type" "neon_shift_imm<q>")]
872 (define_insn "aarch64_simd_imm_shl<mode>"
873  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
875                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
876  "TARGET_SIMD"
877   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
878   [(set_attr "type" "neon_shift_imm<q>")]
881 (define_insn "aarch64_simd_reg_sshl<mode>"
882  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
883        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
884                    (match_operand:VDQ_I 2 "register_operand" "w")))]
885  "TARGET_SIMD"
886  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
887   [(set_attr "type" "neon_shift_reg<q>")]
890 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
891  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
892        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
893                     (match_operand:VDQ_I 2 "register_operand" "w")]
894                    UNSPEC_ASHIFT_UNSIGNED))]
895  "TARGET_SIMD"
896  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
897   [(set_attr "type" "neon_shift_reg<q>")]
900 (define_insn "aarch64_simd_reg_shl<mode>_signed"
901  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
902        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
903                     (match_operand:VDQ_I 2 "register_operand" "w")]
904                    UNSPEC_ASHIFT_SIGNED))]
905  "TARGET_SIMD"
906  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
907   [(set_attr "type" "neon_shift_reg<q>")]
910 (define_expand "ashl<mode>3"
911   [(match_operand:VDQ_I 0 "register_operand" "")
912    (match_operand:VDQ_I 1 "register_operand" "")
913    (match_operand:SI  2 "general_operand" "")]
914  "TARGET_SIMD"
916   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
917   int shift_amount;
919   if (CONST_INT_P (operands[2]))
920     {
921       shift_amount = INTVAL (operands[2]);
922       if (shift_amount >= 0 && shift_amount < bit_width)
923         {
924           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
925                                                        shift_amount);
926           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
927                                                      operands[1],
928                                                      tmp));
929           DONE;
930         }
931       else
932         {
933           operands[2] = force_reg (SImode, operands[2]);
934         }
935     }
936   else if (MEM_P (operands[2]))
937     {
938       operands[2] = force_reg (SImode, operands[2]);
939     }
941   if (REG_P (operands[2]))
942     {
943       rtx tmp = gen_reg_rtx (<MODE>mode);
944       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
945                                              convert_to_mode (<VEL>mode,
946                                                               operands[2],
947                                                               0)));
948       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
949                                                   tmp));
950       DONE;
951     }
952   else
953     FAIL;
957 (define_expand "lshr<mode>3"
958   [(match_operand:VDQ_I 0 "register_operand" "")
959    (match_operand:VDQ_I 1 "register_operand" "")
960    (match_operand:SI  2 "general_operand" "")]
961  "TARGET_SIMD"
963   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
964   int shift_amount;
966   if (CONST_INT_P (operands[2]))
967     {
968       shift_amount = INTVAL (operands[2]);
969       if (shift_amount > 0 && shift_amount <= bit_width)
970         {
971           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
972                                                        shift_amount);
973           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
974                                                   operands[1],
975                                                   tmp));
976           DONE;
977         }
978       else
979         operands[2] = force_reg (SImode, operands[2]);
980     }
981   else if (MEM_P (operands[2]))
982     {
983       operands[2] = force_reg (SImode, operands[2]);
984     }
986   if (REG_P (operands[2]))
987     {
988       rtx tmp = gen_reg_rtx (SImode);
989       rtx tmp1 = gen_reg_rtx (<MODE>mode);
990       emit_insn (gen_negsi2 (tmp, operands[2]));
991       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
992                                              convert_to_mode (<VEL>mode,
993                                                               tmp, 0)));
994       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
995                                                           operands[1],
996                                                           tmp1));
997       DONE;
998     }
999   else
1000     FAIL;
1004 (define_expand "ashr<mode>3"
1005   [(match_operand:VDQ_I 0 "register_operand" "")
1006    (match_operand:VDQ_I 1 "register_operand" "")
1007    (match_operand:SI  2 "general_operand" "")]
1008  "TARGET_SIMD"
1010   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1011   int shift_amount;
1013   if (CONST_INT_P (operands[2]))
1014     {
1015       shift_amount = INTVAL (operands[2]);
1016       if (shift_amount > 0 && shift_amount <= bit_width)
1017         {
1018           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1019                                                        shift_amount);
1020           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1021                                                   operands[1],
1022                                                   tmp));
1023           DONE;
1024         }
1025       else
1026         operands[2] = force_reg (SImode, operands[2]);
1027     }
1028   else if (MEM_P (operands[2]))
1029     {
1030       operands[2] = force_reg (SImode, operands[2]);
1031     }
1033   if (REG_P (operands[2]))
1034     {
1035       rtx tmp = gen_reg_rtx (SImode);
1036       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037       emit_insn (gen_negsi2 (tmp, operands[2]));
1038       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039                                              convert_to_mode (<VEL>mode,
1040                                                               tmp, 0)));
1041       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1042                                                         operands[1],
1043                                                         tmp1));
1044       DONE;
1045     }
1046   else
1047     FAIL;
1051 (define_expand "vashl<mode>3"
1052  [(match_operand:VDQ_I 0 "register_operand" "")
1053   (match_operand:VDQ_I 1 "register_operand" "")
1054   (match_operand:VDQ_I 2 "register_operand" "")]
1055  "TARGET_SIMD"
1057   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1058                                               operands[2]));
1059   DONE;
1062 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1063 ;; Negating individual lanes most certainly offsets the
1064 ;; gain from vectorization.
1065 (define_expand "vashr<mode>3"
1066  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067   (match_operand:VDQ_BHSI 1 "register_operand" "")
1068   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1069  "TARGET_SIMD"
1071   rtx neg = gen_reg_rtx (<MODE>mode);
1072   emit (gen_neg<mode>2 (neg, operands[2]));
1073   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1074                                                     neg));
1075   DONE;
1078 ;; DI vector shift
1079 (define_expand "aarch64_ashr_simddi"
1080   [(match_operand:DI 0 "register_operand" "=w")
1081    (match_operand:DI 1 "register_operand" "w")
1082    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1083   "TARGET_SIMD"
1084   {
1085     /* An arithmetic shift right by 64 fills the result with copies of the sign
1086        bit, just like asr by 63 - however the standard pattern does not handle
1087        a shift by 64.  */
1088     if (INTVAL (operands[2]) == 64)
1089       operands[2] = GEN_INT (63);
1090     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1091     DONE;
1092   }
1095 (define_expand "vlshr<mode>3"
1096  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097   (match_operand:VDQ_BHSI 1 "register_operand" "")
1098   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1099  "TARGET_SIMD"
1101   rtx neg = gen_reg_rtx (<MODE>mode);
1102   emit (gen_neg<mode>2 (neg, operands[2]));
1103   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1104                                                       neg));
1105   DONE;
1108 (define_expand "aarch64_lshr_simddi"
1109   [(match_operand:DI 0 "register_operand" "=w")
1110    (match_operand:DI 1 "register_operand" "w")
1111    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1112   "TARGET_SIMD"
1113   {
1114     if (INTVAL (operands[2]) == 64)
1115       emit_move_insn (operands[0], const0_rtx);
1116     else
1117       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1118     DONE;
1119   }
1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123 (define_insn "vec_shr_<mode>"
1124   [(set (match_operand:VD 0 "register_operand" "=w")
1125         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126                     (match_operand:SI 2 "immediate_operand" "i")]
1127                    UNSPEC_VEC_SHR))]
1128   "TARGET_SIMD"
1129   {
1130     if (BYTES_BIG_ENDIAN)
1131       return "shl %d0, %d1, %2";
1132     else
1133       return "ushr %d0, %d1, %2";
1134   }
1135   [(set_attr "type" "neon_shift_imm")]
1138 (define_expand "vec_set<mode>"
1139   [(match_operand:VALL_F16 0 "register_operand" "+w")
1140    (match_operand:<VEL> 1 "register_operand" "w")
1141    (match_operand:SI 2 "immediate_operand" "")]
1142   "TARGET_SIMD"
1143   {
1144     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146                                           GEN_INT (elem), operands[0]));
1147     DONE;
1148   }
1152 (define_insn "aarch64_mla<mode>"
1153  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154        (plus:VDQ_BHSI (mult:VDQ_BHSI
1155                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1158  "TARGET_SIMD"
1159  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1160   [(set_attr "type" "neon_mla_<Vetype><q>")]
1163 (define_insn "*aarch64_mla_elt<mode>"
1164  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1165        (plus:VDQHS
1166          (mult:VDQHS
1167            (vec_duplicate:VDQHS
1168               (vec_select:<VEL>
1169                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1171            (match_operand:VDQHS 3 "register_operand" "w"))
1172          (match_operand:VDQHS 4 "register_operand" "0")))]
1173  "TARGET_SIMD"
1174   {
1175     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1176     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1177   }
1178   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1181 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1183        (plus:VDQHS
1184          (mult:VDQHS
1185            (vec_duplicate:VDQHS
1186               (vec_select:<VEL>
1187                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1189            (match_operand:VDQHS 3 "register_operand" "w"))
1190          (match_operand:VDQHS 4 "register_operand" "0")))]
1191  "TARGET_SIMD"
1192   {
1193     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1194     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1195   }
1196   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1199 (define_insn "*aarch64_mla_elt_merge<mode>"
1200   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201         (plus:VDQHS
1202           (mult:VDQHS (vec_duplicate:VDQHS
1203                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1204                 (match_operand:VDQHS 2 "register_operand" "w"))
1205           (match_operand:VDQHS 3 "register_operand" "0")))]
1206  "TARGET_SIMD"
1207  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1211 (define_insn "aarch64_mls<mode>"
1212  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1216  "TARGET_SIMD"
1217  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1218   [(set_attr "type" "neon_mla_<Vetype><q>")]
1221 (define_insn "*aarch64_mls_elt<mode>"
1222  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1223        (minus:VDQHS
1224          (match_operand:VDQHS 4 "register_operand" "0")
1225          (mult:VDQHS
1226            (vec_duplicate:VDQHS
1227               (vec_select:<VEL>
1228                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1230            (match_operand:VDQHS 3 "register_operand" "w"))))]
1231  "TARGET_SIMD"
1232   {
1233     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1234     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1235   }
1236   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1239 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1241        (minus:VDQHS
1242          (match_operand:VDQHS 4 "register_operand" "0")
1243          (mult:VDQHS
1244            (vec_duplicate:VDQHS
1245               (vec_select:<VEL>
1246                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1248            (match_operand:VDQHS 3 "register_operand" "w"))))]
1249  "TARGET_SIMD"
1250   {
1251     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1252     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1253   }
1254   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1257 (define_insn "*aarch64_mls_elt_merge<mode>"
1258   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1259         (minus:VDQHS
1260           (match_operand:VDQHS 1 "register_operand" "0")
1261           (mult:VDQHS (vec_duplicate:VDQHS
1262                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1263                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1264   "TARGET_SIMD"
1265   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1269 ;; Max/Min operations.
1270 (define_insn "<su><maxmin><mode>3"
1271  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1274  "TARGET_SIMD"
1275  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1276   [(set_attr "type" "neon_minmax<q>")]
1279 (define_expand "<su><maxmin>v2di3"
1280  [(set (match_operand:V2DI 0 "register_operand" "")
1281        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282                     (match_operand:V2DI 2 "register_operand" "")))]
1283  "TARGET_SIMD"
1285   enum rtx_code cmp_operator;
1286   rtx cmp_fmt;
1288   switch (<CODE>)
1289     {
1290     case UMIN:
1291       cmp_operator = LTU;
1292       break;
1293     case SMIN:
1294       cmp_operator = LT;
1295       break;
1296     case UMAX:
1297       cmp_operator = GTU;
1298       break;
1299     case SMAX:
1300       cmp_operator = GT;
1301       break;
1302     default:
1303       gcc_unreachable ();
1304     }
1306   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1307   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1308               operands[2], cmp_fmt, operands[1], operands[2]));
1309   DONE;
1312 ;; Pairwise Integer Max/Min operations.
1313 (define_insn "aarch64_<maxmin_uns>p<mode>"
1314  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1317                         MAXMINV))]
1318  "TARGET_SIMD"
1319  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320   [(set_attr "type" "neon_minmax<q>")]
1323 ;; Pairwise FP Max/Min operations.
1324 (define_insn "aarch64_<maxmin_uns>p<mode>"
1325  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327                       (match_operand:VHSDF 2 "register_operand" "w")]
1328                       FMAXMINV))]
1329  "TARGET_SIMD"
1330  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331   [(set_attr "type" "neon_minmax<q>")]
1334 ;; vec_concat gives a new vector with the low elements from operand 1, and
1335 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1336 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337 ;; What that means, is that the RTL descriptions of the below patterns
1338 ;; need to change depending on endianness.
1340 ;; Move to the low architectural bits of the register.
1341 ;; On little-endian this is { operand, zeroes }
1342 ;; On big-endian this is { zeroes, operand }
1344 (define_insn "move_lo_quad_internal_<mode>"
1345   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1346         (vec_concat:VQ_NO2E
1347           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1348           (vec_duplicate:<VHALF> (const_int 0))))]
1349   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1350   "@
1351    dup\\t%d0, %1.d[0]
1352    fmov\\t%d0, %1
1353    dup\\t%d0, %1"
1354   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1355    (set_attr "length" "4")
1356    (set_attr "arch" "simd,fp,simd")]
1359 (define_insn "move_lo_quad_internal_<mode>"
1360   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1361         (vec_concat:VQ_2E
1362           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1363           (const_int 0)))]
1364   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1365   "@
1366    dup\\t%d0, %1.d[0]
1367    fmov\\t%d0, %1
1368    dup\\t%d0, %1"
1369   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1370    (set_attr "length" "4")
1371    (set_attr "arch" "simd,fp,simd")]
1374 (define_insn "move_lo_quad_internal_be_<mode>"
1375   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1376         (vec_concat:VQ_NO2E
1377           (vec_duplicate:<VHALF> (const_int 0))
1378           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1379   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1380   "@
1381    dup\\t%d0, %1.d[0]
1382    fmov\\t%d0, %1
1383    dup\\t%d0, %1"
1384   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1385    (set_attr "length" "4")
1386    (set_attr "arch" "simd,fp,simd")]
1389 (define_insn "move_lo_quad_internal_be_<mode>"
1390   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1391         (vec_concat:VQ_2E
1392           (const_int 0)
1393           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1394   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1395   "@
1396    dup\\t%d0, %1.d[0]
1397    fmov\\t%d0, %1
1398    dup\\t%d0, %1"
1399   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1400    (set_attr "length" "4")
1401    (set_attr "arch" "simd,fp,simd")]
1404 (define_expand "move_lo_quad_<mode>"
1405   [(match_operand:VQ 0 "register_operand")
1406    (match_operand:VQ 1 "register_operand")]
1407   "TARGET_SIMD"
1409   if (BYTES_BIG_ENDIAN)
1410     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1411   else
1412     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1413   DONE;
1417 ;; Move operand1 to the high architectural bits of the register, keeping
1418 ;; the low architectural bits of operand2.
1419 ;; For little-endian this is { operand2, operand1 }
1420 ;; For big-endian this is { operand1, operand2 }
1422 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1423   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1424         (vec_concat:VQ
1425           (vec_select:<VHALF>
1426                 (match_dup 0)
1427                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1428           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1429   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1430   "@
1431    ins\\t%0.d[1], %1.d[0]
1432    ins\\t%0.d[1], %1"
1433   [(set_attr "type" "neon_ins")]
1436 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1437   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1438         (vec_concat:VQ
1439           (match_operand:<VHALF> 1 "register_operand" "w,r")
1440           (vec_select:<VHALF>
1441                 (match_dup 0)
1442                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1443   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1444   "@
1445    ins\\t%0.d[1], %1.d[0]
1446    ins\\t%0.d[1], %1"
1447   [(set_attr "type" "neon_ins")]
1450 (define_expand "move_hi_quad_<mode>"
1451  [(match_operand:VQ 0 "register_operand" "")
1452   (match_operand:<VHALF> 1 "register_operand" "")]
1453  "TARGET_SIMD"
1455   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1456   if (BYTES_BIG_ENDIAN)
1457     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1458                     operands[1], p));
1459   else
1460     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1461                     operands[1], p));
1462   DONE;
1465 ;; Narrowing operations.
1467 ;; For doubles.
1468 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1469  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1470        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1471  "TARGET_SIMD"
1472  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1473   [(set_attr "type" "neon_shift_imm_narrow_q")]
1476 (define_expand "vec_pack_trunc_<mode>"
1477  [(match_operand:<VNARROWD> 0 "register_operand" "")
1478   (match_operand:VDN 1 "register_operand" "")
1479   (match_operand:VDN 2 "register_operand" "")]
1480  "TARGET_SIMD"
1482   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1483   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1484   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1486   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1487   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1488   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1489   DONE;
1492 ;; For quads.
1494 (define_insn "vec_pack_trunc_<mode>"
1495  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1496        (vec_concat:<VNARROWQ2>
1497          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1498          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1499  "TARGET_SIMD"
1501    if (BYTES_BIG_ENDIAN)
1502      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1503    else
1504      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1506   [(set_attr "type" "multiple")
1507    (set_attr "length" "8")]
1510 ;; Widening operations.
1512 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1513   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1514         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1515                                (match_operand:VQW 1 "register_operand" "w")
1516                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1517                             )))]
1518   "TARGET_SIMD"
1519   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1520   [(set_attr "type" "neon_shift_imm_long")]
1523 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1524   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1525         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1526                                (match_operand:VQW 1 "register_operand" "w")
1527                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1528                             )))]
1529   "TARGET_SIMD"
1530   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1531   [(set_attr "type" "neon_shift_imm_long")]
1534 (define_expand "vec_unpack<su>_hi_<mode>"
1535   [(match_operand:<VWIDE> 0 "register_operand" "")
1536    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1537   "TARGET_SIMD"
1538   {
1539     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1540     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1541                                                           operands[1], p));
1542     DONE;
1543   }
1546 (define_expand "vec_unpack<su>_lo_<mode>"
1547   [(match_operand:<VWIDE> 0 "register_operand" "")
1548    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1549   "TARGET_SIMD"
1550   {
1551     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1552     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1553                                                           operands[1], p));
1554     DONE;
1555   }
1558 ;; Widening arithmetic.
1560 (define_insn "*aarch64_<su>mlal_lo<mode>"
1561   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1562         (plus:<VWIDE>
1563           (mult:<VWIDE>
1564               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1565                  (match_operand:VQW 2 "register_operand" "w")
1566                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1567               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1568                  (match_operand:VQW 4 "register_operand" "w")
1569                  (match_dup 3))))
1570           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1571   "TARGET_SIMD"
1572   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1573   [(set_attr "type" "neon_mla_<Vetype>_long")]
1576 (define_insn "*aarch64_<su>mlal_hi<mode>"
1577   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1578         (plus:<VWIDE>
1579           (mult:<VWIDE>
1580               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581                  (match_operand:VQW 2 "register_operand" "w")
1582                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1583               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584                  (match_operand:VQW 4 "register_operand" "w")
1585                  (match_dup 3))))
1586           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1587   "TARGET_SIMD"
1588   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1589   [(set_attr "type" "neon_mla_<Vetype>_long")]
1592 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1593   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1594         (minus:<VWIDE>
1595           (match_operand:<VWIDE> 1 "register_operand" "0")
1596           (mult:<VWIDE>
1597               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1598                  (match_operand:VQW 2 "register_operand" "w")
1599                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1600               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1601                  (match_operand:VQW 4 "register_operand" "w")
1602                  (match_dup 3))))))]
1603   "TARGET_SIMD"
1604   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1605   [(set_attr "type" "neon_mla_<Vetype>_long")]
1608 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1609   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1610         (minus:<VWIDE>
1611           (match_operand:<VWIDE> 1 "register_operand" "0")
1612           (mult:<VWIDE>
1613               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1614                  (match_operand:VQW 2 "register_operand" "w")
1615                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1616               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1617                  (match_operand:VQW 4 "register_operand" "w")
1618                  (match_dup 3))))))]
1619   "TARGET_SIMD"
1620   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1621   [(set_attr "type" "neon_mla_<Vetype>_long")]
1624 (define_insn "*aarch64_<su>mlal<mode>"
1625   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1626         (plus:<VWIDE>
1627           (mult:<VWIDE>
1628             (ANY_EXTEND:<VWIDE>
1629               (match_operand:VD_BHSI 1 "register_operand" "w"))
1630             (ANY_EXTEND:<VWIDE>
1631               (match_operand:VD_BHSI 2 "register_operand" "w")))
1632           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1633   "TARGET_SIMD"
1634   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1635   [(set_attr "type" "neon_mla_<Vetype>_long")]
1638 (define_insn "*aarch64_<su>mlsl<mode>"
1639   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1640         (minus:<VWIDE>
1641           (match_operand:<VWIDE> 1 "register_operand" "0")
1642           (mult:<VWIDE>
1643             (ANY_EXTEND:<VWIDE>
1644               (match_operand:VD_BHSI 2 "register_operand" "w"))
1645             (ANY_EXTEND:<VWIDE>
1646               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1647   "TARGET_SIMD"
1648   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1649   [(set_attr "type" "neon_mla_<Vetype>_long")]
1652 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1653  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1654        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1655                            (match_operand:VQW 1 "register_operand" "w")
1656                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1657                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658                            (match_operand:VQW 2 "register_operand" "w")
1659                            (match_dup 3)))))]
1660   "TARGET_SIMD"
1661   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1662   [(set_attr "type" "neon_mul_<Vetype>_long")]
1665 (define_expand "vec_widen_<su>mult_lo_<mode>"
1666   [(match_operand:<VWIDE> 0 "register_operand" "")
1667    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1668    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1669  "TARGET_SIMD"
1671    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1672    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1673                                                        operands[1],
1674                                                        operands[2], p));
1675    DONE;
1679 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1680  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1681       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682                             (match_operand:VQW 1 "register_operand" "w")
1683                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1684                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685                             (match_operand:VQW 2 "register_operand" "w")
1686                             (match_dup 3)))))]
1687   "TARGET_SIMD"
1688   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1689   [(set_attr "type" "neon_mul_<Vetype>_long")]
1692 (define_expand "vec_widen_<su>mult_hi_<mode>"
1693   [(match_operand:<VWIDE> 0 "register_operand" "")
1694    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1695    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1696  "TARGET_SIMD"
1698    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1699    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1700                                                        operands[1],
1701                                                        operands[2], p));
1702    DONE;
1707 ;; FP vector operations.
1708 ;; AArch64 AdvSIMD supports single-precision (32-bit) and 
1709 ;; double-precision (64-bit) floating-point data types and arithmetic as
1710 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable 
1711 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1713 ;; Floating-point operations can raise an exception.  Vectorizing such
1714 ;; operations are safe because of reasons explained below.
1716 ;; ARMv8 permits an extension to enable trapped floating-point
1717 ;; exception handling, however this is an optional feature.  In the
1718 ;; event of a floating-point exception being raised by vectorised
1719 ;; code then:
1720 ;; 1.  If trapped floating-point exceptions are available, then a trap
1721 ;;     will be taken when any lane raises an enabled exception.  A trap
1722 ;;     handler may determine which lane raised the exception.
1723 ;; 2.  Alternatively a sticky exception flag is set in the
1724 ;;     floating-point status register (FPSR).  Software may explicitly
1725 ;;     test the exception flags, in which case the tests will either
1726 ;;     prevent vectorisation, allowing precise identification of the
1727 ;;     failing operation, or if tested outside of vectorisable regions
1728 ;;     then the specific operation and lane are not of interest.
1730 ;; FP arithmetic operations.
1732 (define_insn "add<mode>3"
1733  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1734        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1735                    (match_operand:VHSDF 2 "register_operand" "w")))]
1736  "TARGET_SIMD"
1737  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1738   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1741 (define_insn "sub<mode>3"
1742  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1743        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1744                     (match_operand:VHSDF 2 "register_operand" "w")))]
1745  "TARGET_SIMD"
1746  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1747   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1750 (define_insn "mul<mode>3"
1751  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1752        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1753                    (match_operand:VHSDF 2 "register_operand" "w")))]
1754  "TARGET_SIMD"
1755  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1756   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1759 (define_expand "div<mode>3"
1760  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1761        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1762                   (match_operand:VHSDF 2 "register_operand" "w")))]
1763  "TARGET_SIMD"
1765   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1766     DONE;
1768   operands[1] = force_reg (<MODE>mode, operands[1]);
1771 (define_insn "*div<mode>3"
1772  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1774                  (match_operand:VHSDF 2 "register_operand" "w")))]
1775  "TARGET_SIMD"
1776  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1777   [(set_attr "type" "neon_fp_div_<stype><q>")]
1780 (define_insn "neg<mode>2"
1781  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1782        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1783  "TARGET_SIMD"
1784  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1785   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1788 (define_insn "abs<mode>2"
1789  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1790        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1791  "TARGET_SIMD"
1792  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1793   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1796 (define_insn "fma<mode>4"
1797   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1798        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1799                   (match_operand:VHSDF 2 "register_operand" "w")
1800                   (match_operand:VHSDF 3 "register_operand" "0")))]
1801   "TARGET_SIMD"
1802  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1803   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1806 (define_insn "*aarch64_fma4_elt<mode>"
1807   [(set (match_operand:VDQF 0 "register_operand" "=w")
1808     (fma:VDQF
1809       (vec_duplicate:VDQF
1810         (vec_select:<VEL>
1811           (match_operand:VDQF 1 "register_operand" "<h_con>")
1812           (parallel [(match_operand:SI 2 "immediate_operand")])))
1813       (match_operand:VDQF 3 "register_operand" "w")
1814       (match_operand:VDQF 4 "register_operand" "0")))]
1815   "TARGET_SIMD"
1816   {
1817     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1818     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1819   }
1820   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1823 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1824   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1825     (fma:VDQSF
1826       (vec_duplicate:VDQSF
1827         (vec_select:<VEL>
1828           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1829           (parallel [(match_operand:SI 2 "immediate_operand")])))
1830       (match_operand:VDQSF 3 "register_operand" "w")
1831       (match_operand:VDQSF 4 "register_operand" "0")))]
1832   "TARGET_SIMD"
1833   {
1834     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1835     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1836   }
1837   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1840 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1841   [(set (match_operand:VMUL 0 "register_operand" "=w")
1842     (fma:VMUL
1843       (vec_duplicate:VMUL
1844           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1845       (match_operand:VMUL 2 "register_operand" "w")
1846       (match_operand:VMUL 3 "register_operand" "0")))]
1847   "TARGET_SIMD"
1848   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1849   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1852 (define_insn "*aarch64_fma4_elt_to_64v2df"
1853   [(set (match_operand:DF 0 "register_operand" "=w")
1854     (fma:DF
1855         (vec_select:DF
1856           (match_operand:V2DF 1 "register_operand" "w")
1857           (parallel [(match_operand:SI 2 "immediate_operand")]))
1858       (match_operand:DF 3 "register_operand" "w")
1859       (match_operand:DF 4 "register_operand" "0")))]
1860   "TARGET_SIMD"
1861   {
1862     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1863     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1864   }
1865   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1868 (define_insn "fnma<mode>4"
1869   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1870         (fma:VHSDF
1871           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1872           (match_operand:VHSDF 2 "register_operand" "w")
1873           (match_operand:VHSDF 3 "register_operand" "0")))]
1874   "TARGET_SIMD"
1875   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1876   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1879 (define_insn "*aarch64_fnma4_elt<mode>"
1880   [(set (match_operand:VDQF 0 "register_operand" "=w")
1881     (fma:VDQF
1882       (neg:VDQF
1883         (match_operand:VDQF 3 "register_operand" "w"))
1884       (vec_duplicate:VDQF
1885         (vec_select:<VEL>
1886           (match_operand:VDQF 1 "register_operand" "<h_con>")
1887           (parallel [(match_operand:SI 2 "immediate_operand")])))
1888       (match_operand:VDQF 4 "register_operand" "0")))]
1889   "TARGET_SIMD"
1890   {
1891     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1892     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1893   }
1894   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1897 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1898   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1899     (fma:VDQSF
1900       (neg:VDQSF
1901         (match_operand:VDQSF 3 "register_operand" "w"))
1902       (vec_duplicate:VDQSF
1903         (vec_select:<VEL>
1904           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1905           (parallel [(match_operand:SI 2 "immediate_operand")])))
1906       (match_operand:VDQSF 4 "register_operand" "0")))]
1907   "TARGET_SIMD"
1908   {
1909     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1910     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1911   }
1912   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1915 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1916   [(set (match_operand:VMUL 0 "register_operand" "=w")
1917     (fma:VMUL
1918       (neg:VMUL
1919         (match_operand:VMUL 2 "register_operand" "w"))
1920       (vec_duplicate:VMUL
1921         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1922       (match_operand:VMUL 3 "register_operand" "0")))]
1923   "TARGET_SIMD"
1924   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1925   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1928 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1929   [(set (match_operand:DF 0 "register_operand" "=w")
1930     (fma:DF
1931       (vec_select:DF
1932         (match_operand:V2DF 1 "register_operand" "w")
1933         (parallel [(match_operand:SI 2 "immediate_operand")]))
1934       (neg:DF
1935         (match_operand:DF 3 "register_operand" "w"))
1936       (match_operand:DF 4 "register_operand" "0")))]
1937   "TARGET_SIMD"
1938   {
1939     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1940     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1941   }
1942   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1945 ;; Vector versions of the floating-point frint patterns.
1946 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1947 (define_insn "<frint_pattern><mode>2"
1948   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1949         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1950                        FRINT))]
1951   "TARGET_SIMD"
1952   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1953   [(set_attr "type" "neon_fp_round_<stype><q>")]
1956 ;; Vector versions of the fcvt standard patterns.
1957 ;; Expands to lbtrunc, lround, lceil, lfloor
1958 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1959   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1960         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1961                                [(match_operand:VHSDF 1 "register_operand" "w")]
1962                                FCVT)))]
1963   "TARGET_SIMD"
1964   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1965   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1968 ;; HF Scalar variants of related SIMD instructions.
1969 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1970   [(set (match_operand:HI 0 "register_operand" "=w")
1971         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1972                       FCVT)))]
1973   "TARGET_SIMD_F16INST"
1974   "fcvt<frint_suffix><su>\t%h0, %h1"
1975   [(set_attr "type" "neon_fp_to_int_s")]
1978 (define_insn "<optab>_trunchfhi2"
1979   [(set (match_operand:HI 0 "register_operand" "=w")
1980         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1981   "TARGET_SIMD_F16INST"
1982   "fcvtz<su>\t%h0, %h1"
1983   [(set_attr "type" "neon_fp_to_int_s")]
1986 (define_insn "<optab>hihf2"
1987   [(set (match_operand:HF 0 "register_operand" "=w")
1988         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1989   "TARGET_SIMD_F16INST"
1990   "<su_optab>cvtf\t%h0, %h1"
1991   [(set_attr "type" "neon_int_to_fp_s")]
1994 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1995   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1996         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1997                                [(mult:VDQF
1998          (match_operand:VDQF 1 "register_operand" "w")
1999          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2000                                UNSPEC_FRINTZ)))]
2001   "TARGET_SIMD
2002    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2003                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2004   {
2005     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2006     char buf[64];
2007     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2008     output_asm_insn (buf, operands);
2009     return "";
2010   }
2011   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2014 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2015   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2016         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2017                                [(match_operand:VHSDF 1 "register_operand")]
2018                                 UNSPEC_FRINTZ)))]
2019   "TARGET_SIMD"
2020   {})
2022 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2023   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2024         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2025                                [(match_operand:VHSDF 1 "register_operand")]
2026                                 UNSPEC_FRINTZ)))]
2027   "TARGET_SIMD"
2028   {})
2030 (define_expand "ftrunc<VHSDF:mode>2"
2031   [(set (match_operand:VHSDF 0 "register_operand")
2032         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2033                        UNSPEC_FRINTZ))]
2034   "TARGET_SIMD"
2035   {})
2037 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2038   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2039         (FLOATUORS:VHSDF
2040           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2041   "TARGET_SIMD"
2042   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2043   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2046 ;; Conversions between vectors of floats and doubles.
2047 ;; Contains a mix of patterns to match standard pattern names
2048 ;; and those for intrinsics.
2050 ;; Float widening operations.
2052 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2053   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2054         (float_extend:<VWIDE> (vec_select:<VHALF>
2055                                (match_operand:VQ_HSF 1 "register_operand" "w")
2056                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2057                             )))]
2058   "TARGET_SIMD"
2059   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2060   [(set_attr "type" "neon_fp_cvt_widen_s")]
2063 ;; Convert between fixed-point and floating-point (vector modes)
2065 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2066   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2067         (unspec:<VHSDF:FCVT_TARGET>
2068           [(match_operand:VHSDF 1 "register_operand" "w")
2069            (match_operand:SI 2 "immediate_operand" "i")]
2070          FCVT_F2FIXED))]
2071   "TARGET_SIMD"
2072   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2073   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2076 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2077   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2078         (unspec:<VDQ_HSDI:FCVT_TARGET>
2079           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2080            (match_operand:SI 2 "immediate_operand" "i")]
2081          FCVT_FIXED2F))]
2082   "TARGET_SIMD"
2083   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2084   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2087 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2088 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2089 ;; the meaning of HI and LO changes depending on the target endianness.
2090 ;; While elsewhere we map the higher numbered elements of a vector to
2091 ;; the lower architectural lanes of the vector, for these patterns we want
2092 ;; to always treat "hi" as referring to the higher architectural lanes.
2093 ;; Consequently, while the patterns below look inconsistent with our
2094 ;; other big-endian patterns their behavior is as required.
2096 (define_expand "vec_unpacks_lo_<mode>"
2097   [(match_operand:<VWIDE> 0 "register_operand" "")
2098    (match_operand:VQ_HSF 1 "register_operand" "")]
2099   "TARGET_SIMD"
2100   {
2101     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2102     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2103                                                        operands[1], p));
2104     DONE;
2105   }
2108 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2109   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2110         (float_extend:<VWIDE> (vec_select:<VHALF>
2111                                (match_operand:VQ_HSF 1 "register_operand" "w")
2112                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2113                             )))]
2114   "TARGET_SIMD"
2115   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2116   [(set_attr "type" "neon_fp_cvt_widen_s")]
2119 (define_expand "vec_unpacks_hi_<mode>"
2120   [(match_operand:<VWIDE> 0 "register_operand" "")
2121    (match_operand:VQ_HSF 1 "register_operand" "")]
2122   "TARGET_SIMD"
2123   {
2124     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2125     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2126                                                        operands[1], p));
2127     DONE;
2128   }
2130 (define_insn "aarch64_float_extend_lo_<Vwide>"
2131   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2132         (float_extend:<VWIDE>
2133           (match_operand:VDF 1 "register_operand" "w")))]
2134   "TARGET_SIMD"
2135   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2136   [(set_attr "type" "neon_fp_cvt_widen_s")]
2139 ;; Float narrowing operations.
2141 (define_insn "aarch64_float_truncate_lo_<mode>"
2142   [(set (match_operand:VDF 0 "register_operand" "=w")
2143       (float_truncate:VDF
2144         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2145   "TARGET_SIMD"
2146   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2147   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2150 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2151   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2152     (vec_concat:<VDBL>
2153       (match_operand:VDF 1 "register_operand" "0")
2154       (float_truncate:VDF
2155         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2156   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2157   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2158   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2161 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2162   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2163     (vec_concat:<VDBL>
2164       (float_truncate:VDF
2165         (match_operand:<VWIDE> 2 "register_operand" "w"))
2166       (match_operand:VDF 1 "register_operand" "0")))]
2167   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2168   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2169   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2172 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2173   [(match_operand:<VDBL> 0 "register_operand" "=w")
2174    (match_operand:VDF 1 "register_operand" "0")
2175    (match_operand:<VWIDE> 2 "register_operand" "w")]
2176   "TARGET_SIMD"
2178   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2179                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2180                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2181   emit_insn (gen (operands[0], operands[1], operands[2]));
2182   DONE;
2186 (define_expand "vec_pack_trunc_v2df"
2187   [(set (match_operand:V4SF 0 "register_operand")
2188       (vec_concat:V4SF
2189         (float_truncate:V2SF
2190             (match_operand:V2DF 1 "register_operand"))
2191         (float_truncate:V2SF
2192             (match_operand:V2DF 2 "register_operand"))
2193           ))]
2194   "TARGET_SIMD"
2195   {
2196     rtx tmp = gen_reg_rtx (V2SFmode);
2197     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2198     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2200     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2201     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2202                                                    tmp, operands[hi]));
2203     DONE;
2204   }
2207 (define_expand "vec_pack_trunc_df"
2208   [(set (match_operand:V2SF 0 "register_operand")
2209       (vec_concat:V2SF
2210         (float_truncate:SF
2211             (match_operand:DF 1 "register_operand"))
2212         (float_truncate:SF
2213             (match_operand:DF 2 "register_operand"))
2214           ))]
2215   "TARGET_SIMD"
2216   {
2217     rtx tmp = gen_reg_rtx (V2SFmode);
2218     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2219     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2221     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2222     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2223     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2224     DONE;
2225   }
2228 ;; FP Max/Min
2229 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2230 ;; expression like:
2231 ;;      a = (b < c) ? b : c;
2232 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2233 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2234 ;; -ffast-math.
2236 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2237 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2238 ;; operand will be returned when both operands are zero (i.e. they may not
2239 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2240 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2241 ;; NaNs.
2243 (define_insn "<su><maxmin><mode>3"
2244   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2245         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2246                        (match_operand:VHSDF 2 "register_operand" "w")))]
2247   "TARGET_SIMD"
2248   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2249   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2252 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2253 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2254 ;; which implement the IEEE fmax ()/fmin () functions.
2255 (define_insn "<maxmin_uns><mode>3"
2256   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2257        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2258                       (match_operand:VHSDF 2 "register_operand" "w")]
2259                       FMAXMIN_UNS))]
2260   "TARGET_SIMD"
2261   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2262   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2265 ;; 'across lanes' add.
2267 (define_expand "reduc_plus_scal_<mode>"
2268   [(match_operand:<VEL> 0 "register_operand" "=w")
2269    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2270                UNSPEC_ADDV)]
2271   "TARGET_SIMD"
2272   {
2273     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2274     rtx scratch = gen_reg_rtx (<MODE>mode);
2275     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2276     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2277     DONE;
2278   }
2281 (define_insn "aarch64_faddp<mode>"
2282  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2283        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2284                       (match_operand:VHSDF 2 "register_operand" "w")]
2285         UNSPEC_FADDV))]
2286  "TARGET_SIMD"
2287  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2288   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2291 (define_insn "aarch64_reduc_plus_internal<mode>"
2292  [(set (match_operand:VDQV 0 "register_operand" "=w")
2293        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2294                     UNSPEC_ADDV))]
2295  "TARGET_SIMD"
2296  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2297   [(set_attr "type" "neon_reduc_add<q>")]
2300 (define_insn "aarch64_reduc_plus_internalv2si"
2301  [(set (match_operand:V2SI 0 "register_operand" "=w")
2302        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2303                     UNSPEC_ADDV))]
2304  "TARGET_SIMD"
2305  "addp\\t%0.2s, %1.2s, %1.2s"
2306   [(set_attr "type" "neon_reduc_add")]
2309 (define_insn "reduc_plus_scal_<mode>"
2310  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2311        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2312                    UNSPEC_FADDV))]
2313  "TARGET_SIMD"
2314  "faddp\\t%<Vetype>0, %1.<Vtype>"
2315   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2318 (define_expand "reduc_plus_scal_v4sf"
2319  [(set (match_operand:SF 0 "register_operand")
2320        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2321                     UNSPEC_FADDV))]
2322  "TARGET_SIMD"
2324   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2325   rtx scratch = gen_reg_rtx (V4SFmode);
2326   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2327   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2328   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2329   DONE;
2332 (define_insn "clrsb<mode>2"
2333   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2334         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2335   "TARGET_SIMD"
2336   "cls\\t%0.<Vtype>, %1.<Vtype>"
2337   [(set_attr "type" "neon_cls<q>")]
2340 (define_insn "clz<mode>2"
2341  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2342        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2343  "TARGET_SIMD"
2344  "clz\\t%0.<Vtype>, %1.<Vtype>"
2345   [(set_attr "type" "neon_cls<q>")]
2348 (define_insn "popcount<mode>2"
2349   [(set (match_operand:VB 0 "register_operand" "=w")
2350         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2351   "TARGET_SIMD"
2352   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2353   [(set_attr "type" "neon_cnt<q>")]
2356 ;; 'across lanes' max and min ops.
2358 ;; Template for outputting a scalar, so we can create __builtins which can be
2359 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2360 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2361   [(match_operand:<VEL> 0 "register_operand")
2362    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2363                   FMAXMINV)]
2364   "TARGET_SIMD"
2365   {
2366     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2367     rtx scratch = gen_reg_rtx (<MODE>mode);
2368     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2369                                                               operands[1]));
2370     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2371     DONE;
2372   }
2375 ;; Likewise for integer cases, signed and unsigned.
2376 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2377   [(match_operand:<VEL> 0 "register_operand")
2378    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2379                     MAXMINV)]
2380   "TARGET_SIMD"
2381   {
2382     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2383     rtx scratch = gen_reg_rtx (<MODE>mode);
2384     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2385                                                               operands[1]));
2386     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2387     DONE;
2388   }
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2393        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2394                     MAXMINV))]
2395  "TARGET_SIMD"
2396  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2397   [(set_attr "type" "neon_reduc_minmax<q>")]
2400 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2401  [(set (match_operand:V2SI 0 "register_operand" "=w")
2402        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2403                     MAXMINV))]
2404  "TARGET_SIMD"
2405  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2406   [(set_attr "type" "neon_reduc_minmax")]
2409 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2410  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2411        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2412                       FMAXMINV))]
2413  "TARGET_SIMD"
2414  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2415   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2418 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2419 ;; allocation.
2420 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2421 ;; to select.
2423 ;; Thus our BSL is of the form:
2424 ;;   op0 = bsl (mask, op2, op3)
2425 ;; We can use any of:
2427 ;;   if (op0 = mask)
2428 ;;     bsl mask, op1, op2
2429 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2430 ;;     bit op0, op2, mask
2431 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2432 ;;     bif op0, op1, mask
2434 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2435 ;; Some forms of straight-line code may generate the equivalent form
2436 ;; in *aarch64_simd_bsl<mode>_alt.
2438 (define_insn "aarch64_simd_bsl<mode>_internal"
2439   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2440         (xor:VDQ_I
2441            (and:VDQ_I
2442              (xor:VDQ_I
2443                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2444                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2445              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2446           (match_dup:<V_INT_EQUIV> 3)
2447         ))]
2448   "TARGET_SIMD"
2449   "@
2450   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2451   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2452   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2453   [(set_attr "type" "neon_bsl<q>")]
2456 ;; We need this form in addition to the above pattern to match the case
2457 ;; when combine tries merging three insns such that the second operand of
2458 ;; the outer XOR matches the second operand of the inner XOR rather than
2459 ;; the first.  The two are equivalent but since recog doesn't try all
2460 ;; permutations of commutative operations, we have to have a separate pattern.
2462 (define_insn "*aarch64_simd_bsl<mode>_alt"
2463   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2464         (xor:VDQ_I
2465            (and:VDQ_I
2466              (xor:VDQ_I
2467                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2468                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2469               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2470           (match_dup:<V_INT_EQUIV> 2)))]
2471   "TARGET_SIMD"
2472   "@
2473   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2474   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2475   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2476   [(set_attr "type" "neon_bsl<q>")]
2479 ;; DImode is special, we want to avoid computing operations which are
2480 ;; more naturally computed in general purpose registers in the vector
2481 ;; registers.  If we do that, we need to move all three operands from general
2482 ;; purpose registers to vector registers, then back again.  However, we
2483 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2484 ;; optimizations based on the component operations of a BSL.
2486 ;; That means we need a splitter back to the individual operations, if they
2487 ;; would be better calculated on the integer side.
2489 (define_insn_and_split "aarch64_simd_bsldi_internal"
2490   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2491         (xor:DI
2492            (and:DI
2493              (xor:DI
2494                (match_operand:DI 3 "register_operand" "w,0,w,r")
2495                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2496              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2497           (match_dup:DI 3)
2498         ))]
2499   "TARGET_SIMD"
2500   "@
2501   bsl\\t%0.8b, %2.8b, %3.8b
2502   bit\\t%0.8b, %2.8b, %1.8b
2503   bif\\t%0.8b, %3.8b, %1.8b
2504   #"
2505   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2506   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2508   /* Split back to individual operations.  If we're before reload, and
2509      able to create a temporary register, do so.  If we're after reload,
2510      we've got an early-clobber destination register, so use that.
2511      Otherwise, we can't create pseudos and we can't yet guarantee that
2512      operands[0] is safe to write, so FAIL to split.  */
2514   rtx scratch;
2515   if (reload_completed)
2516     scratch = operands[0];
2517   else if (can_create_pseudo_p ())
2518     scratch = gen_reg_rtx (DImode);
2519   else
2520     FAIL;
2522   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2523   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2524   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2525   DONE;
2527   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2528    (set_attr "length" "4,4,4,12")]
2531 (define_insn_and_split "aarch64_simd_bsldi_alt"
2532   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2533         (xor:DI
2534            (and:DI
2535              (xor:DI
2536                (match_operand:DI 3 "register_operand" "w,w,0,r")
2537                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2538              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2539           (match_dup:DI 2)
2540         ))]
2541   "TARGET_SIMD"
2542   "@
2543   bsl\\t%0.8b, %3.8b, %2.8b
2544   bit\\t%0.8b, %3.8b, %1.8b
2545   bif\\t%0.8b, %2.8b, %1.8b
2546   #"
2547   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2548   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2550   /* Split back to individual operations.  If we're before reload, and
2551      able to create a temporary register, do so.  If we're after reload,
2552      we've got an early-clobber destination register, so use that.
2553      Otherwise, we can't create pseudos and we can't yet guarantee that
2554      operands[0] is safe to write, so FAIL to split.  */
2556   rtx scratch;
2557   if (reload_completed)
2558     scratch = operands[0];
2559   else if (can_create_pseudo_p ())
2560     scratch = gen_reg_rtx (DImode);
2561   else
2562     FAIL;
2564   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2565   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2566   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2567   DONE;
2569   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2570    (set_attr "length" "4,4,4,12")]
2573 (define_expand "aarch64_simd_bsl<mode>"
2574   [(match_operand:VALLDIF 0 "register_operand")
2575    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2576    (match_operand:VALLDIF 2 "register_operand")
2577    (match_operand:VALLDIF 3 "register_operand")]
2578  "TARGET_SIMD"
2580   /* We can't alias operands together if they have different modes.  */
2581   rtx tmp = operands[0];
2582   if (FLOAT_MODE_P (<MODE>mode))
2583     {
2584       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2585       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2586       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2587     }
2588   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2589   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2590                                                          operands[1],
2591                                                          operands[2],
2592                                                          operands[3]));
2593   if (tmp != operands[0])
2594     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2596   DONE;
2599 (define_expand "vcond_mask_<mode><v_int_equiv>"
2600   [(match_operand:VALLDI 0 "register_operand")
2601    (match_operand:VALLDI 1 "nonmemory_operand")
2602    (match_operand:VALLDI 2 "nonmemory_operand")
2603    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2604   "TARGET_SIMD"
2606   /* If we have (a = (P) ? -1 : 0);
2607      Then we can simply move the generated mask (result must be int).  */
2608   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2609       && operands[2] == CONST0_RTX (<MODE>mode))
2610     emit_move_insn (operands[0], operands[3]);
2611   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2612   else if (operands[1] == CONST0_RTX (<MODE>mode)
2613            && operands[2] == CONSTM1_RTX (<MODE>mode))
2614     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2615   else
2616     {
2617       if (!REG_P (operands[1]))
2618         operands[1] = force_reg (<MODE>mode, operands[1]);
2619       if (!REG_P (operands[2]))
2620         operands[2] = force_reg (<MODE>mode, operands[2]);
2621       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2622                                              operands[1], operands[2]));
2623     }
2625   DONE;
2628 ;; Patterns comparing two vectors to produce a mask.
2630 (define_expand "vec_cmp<mode><mode>"
2631   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2632           (match_operator 1 "comparison_operator"
2633             [(match_operand:VSDQ_I_DI 2 "register_operand")
2634              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2635   "TARGET_SIMD"
2637   rtx mask = operands[0];
2638   enum rtx_code code = GET_CODE (operands[1]);
2640   switch (code)
2641     {
2642     case NE:
2643     case LE:
2644     case LT:
2645     case GE:
2646     case GT:
2647     case EQ:
2648       if (operands[3] == CONST0_RTX (<MODE>mode))
2649         break;
2651       /* Fall through.  */
2652     default:
2653       if (!REG_P (operands[3]))
2654         operands[3] = force_reg (<MODE>mode, operands[3]);
2656       break;
2657     }
2659   switch (code)
2660     {
2661     case LT:
2662       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2663       break;
2665     case GE:
2666       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2667       break;
2669     case LE:
2670       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2671       break;
2673     case GT:
2674       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2675       break;
2677     case LTU:
2678       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2679       break;
2681     case GEU:
2682       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2683       break;
2685     case LEU:
2686       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2687       break;
2689     case GTU:
2690       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2691       break;
2693     case NE:
2694       /* Handle NE as !EQ.  */
2695       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2696       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2697       break;
2699     case EQ:
2700       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2701       break;
2703     default:
2704       gcc_unreachable ();
2705     }
2707   DONE;
2710 (define_expand "vec_cmp<mode><v_int_equiv>"
2711   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2712         (match_operator 1 "comparison_operator"
2713             [(match_operand:VDQF 2 "register_operand")
2714              (match_operand:VDQF 3 "nonmemory_operand")]))]
2715   "TARGET_SIMD"
2717   int use_zero_form = 0;
2718   enum rtx_code code = GET_CODE (operands[1]);
2719   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2721   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2723   switch (code)
2724     {
2725     case LE:
2726     case LT:
2727     case GE:
2728     case GT:
2729     case EQ:
2730       if (operands[3] == CONST0_RTX (<MODE>mode))
2731         {
2732           use_zero_form = 1;
2733           break;
2734         }
2735       /* Fall through.  */
2736     default:
2737       if (!REG_P (operands[3]))
2738         operands[3] = force_reg (<MODE>mode, operands[3]);
2740       break;
2741     }
2743   switch (code)
2744     {
2745     case LT:
2746       if (use_zero_form)
2747         {
2748           comparison = gen_aarch64_cmlt<mode>;
2749           break;
2750         }
2751       /* Fall through.  */
2752     case UNLT:
2753       std::swap (operands[2], operands[3]);
2754       /* Fall through.  */
2755     case UNGT:
2756     case GT:
2757       comparison = gen_aarch64_cmgt<mode>;
2758       break;
2759     case LE:
2760       if (use_zero_form)
2761         {
2762           comparison = gen_aarch64_cmle<mode>;
2763           break;
2764         }
2765       /* Fall through.  */
2766     case UNLE:
2767       std::swap (operands[2], operands[3]);
2768       /* Fall through.  */
2769     case UNGE:
2770     case GE:
2771       comparison = gen_aarch64_cmge<mode>;
2772       break;
2773     case NE:
2774     case EQ:
2775       comparison = gen_aarch64_cmeq<mode>;
2776       break;
2777     case UNEQ:
2778     case ORDERED:
2779     case UNORDERED:
2780     case LTGT:
2781       break;
2782     default:
2783       gcc_unreachable ();
2784     }
2786   switch (code)
2787     {
2788     case UNGE:
2789     case UNGT:
2790     case UNLE:
2791     case UNLT:
2792       {
2793         /* All of the above must not raise any FP exceptions.  Thus we first
2794            check each operand for NaNs and force any elements containing NaN to
2795            zero before using them in the compare.
2796            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2797                                      (cm<cc> (isnan (a) ? 0.0 : a,
2798                                               isnan (b) ? 0.0 : b))
2799            We use the following transformations for doing the comparisions:
2800            a UNGE b -> a GE b
2801            a UNGT b -> a GT b
2802            a UNLE b -> b GE a
2803            a UNLT b -> b GT a.  */
2805         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2806         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2807         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2808         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2809         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2810         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2811         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2812                                           lowpart_subreg (<V_INT_EQUIV>mode,
2813                                                           operands[2],
2814                                                           <MODE>mode)));
2815         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2816                                           lowpart_subreg (<V_INT_EQUIV>mode,
2817                                                           operands[3],
2818                                                           <MODE>mode)));
2819         gcc_assert (comparison != NULL);
2820         emit_insn (comparison (operands[0],
2821                                lowpart_subreg (<MODE>mode,
2822                                                tmp0, <V_INT_EQUIV>mode),
2823                                lowpart_subreg (<MODE>mode,
2824                                                tmp1, <V_INT_EQUIV>mode)));
2825         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2826       }
2827       break;
2829     case LT:
2830     case LE:
2831     case GT:
2832     case GE:
2833     case EQ:
2834     case NE:
2835       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2836          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2837          a GE b -> a GE b
2838          a GT b -> a GT b
2839          a LE b -> b GE a
2840          a LT b -> b GT a
2841          a EQ b -> a EQ b
2842          a NE b -> ~(a EQ b)  */
2843       gcc_assert (comparison != NULL);
2844       emit_insn (comparison (operands[0], operands[2], operands[3]));
2845       if (code == NE)
2846         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2847       break;
2849     case LTGT:
2850       /* LTGT is not guranteed to not generate a FP exception.  So let's
2851          go the faster way : ((a > b) || (b > a)).  */
2852       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2853                                          operands[2], operands[3]));
2854       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2855       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2856       break;
2858     case ORDERED:
2859     case UNORDERED:
2860     case UNEQ:
2861       /* cmeq (a, a) & cmeq (b, b).  */
2862       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2863                                          operands[2], operands[2]));
2864       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2865       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2867       if (code == UNORDERED)
2868         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2869       else if (code == UNEQ)
2870         {
2871           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2872           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2873         }
2874       break;
2876     default:
2877       gcc_unreachable ();
2878     }
2880   DONE;
2883 (define_expand "vec_cmpu<mode><mode>"
2884   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2885           (match_operator 1 "comparison_operator"
2886             [(match_operand:VSDQ_I_DI 2 "register_operand")
2887              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2888   "TARGET_SIMD"
2890   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2891                                       operands[2], operands[3]));
2892   DONE;
2895 (define_expand "vcond<mode><mode>"
2896   [(set (match_operand:VALLDI 0 "register_operand")
2897         (if_then_else:VALLDI
2898           (match_operator 3 "comparison_operator"
2899             [(match_operand:VALLDI 4 "register_operand")
2900              (match_operand:VALLDI 5 "nonmemory_operand")])
2901           (match_operand:VALLDI 1 "nonmemory_operand")
2902           (match_operand:VALLDI 2 "nonmemory_operand")))]
2903   "TARGET_SIMD"
2905   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2906   enum rtx_code code = GET_CODE (operands[3]);
2908   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2909      it as well as switch operands 1/2 in order to avoid the additional
2910      NOT instruction.  */
2911   if (code == NE)
2912     {
2913       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2914                                     operands[4], operands[5]);
2915       std::swap (operands[1], operands[2]);
2916     }
2917   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2918                                              operands[4], operands[5]));
2919   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2920                                                  operands[2], mask));
2922   DONE;
2925 (define_expand "vcond<v_cmp_mixed><mode>"
2926   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2927         (if_then_else:<V_cmp_mixed>
2928           (match_operator 3 "comparison_operator"
2929             [(match_operand:VDQF_COND 4 "register_operand")
2930              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2931           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2932           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2933   "TARGET_SIMD"
2935   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2936   enum rtx_code code = GET_CODE (operands[3]);
2938   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2939      it as well as switch operands 1/2 in order to avoid the additional
2940      NOT instruction.  */
2941   if (code == NE)
2942     {
2943       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2944                                     operands[4], operands[5]);
2945       std::swap (operands[1], operands[2]);
2946     }
2947   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2948                                              operands[4], operands[5]));
2949   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2950                                                 operands[0], operands[1],
2951                                                 operands[2], mask));
2953   DONE;
2956 (define_expand "vcondu<mode><mode>"
2957   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2958         (if_then_else:VSDQ_I_DI
2959           (match_operator 3 "comparison_operator"
2960             [(match_operand:VSDQ_I_DI 4 "register_operand")
2961              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2962           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2963           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2964   "TARGET_SIMD"
2966   rtx mask = gen_reg_rtx (<MODE>mode);
2967   enum rtx_code code = GET_CODE (operands[3]);
2969   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2970      it as well as switch operands 1/2 in order to avoid the additional
2971      NOT instruction.  */
2972   if (code == NE)
2973     {
2974       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2975                                     operands[4], operands[5]);
2976       std::swap (operands[1], operands[2]);
2977     }
2978   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2979                                       operands[4], operands[5]));
2980   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2981                                                  operands[2], mask));
2982   DONE;
2985 (define_expand "vcondu<mode><v_cmp_mixed>"
2986   [(set (match_operand:VDQF 0 "register_operand")
2987         (if_then_else:VDQF
2988           (match_operator 3 "comparison_operator"
2989             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2990              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2991           (match_operand:VDQF 1 "nonmemory_operand")
2992           (match_operand:VDQF 2 "nonmemory_operand")))]
2993   "TARGET_SIMD"
2995   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2996   enum rtx_code code = GET_CODE (operands[3]);
2998   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2999      it as well as switch operands 1/2 in order to avoid the additional
3000      NOT instruction.  */
3001   if (code == NE)
3002     {
3003       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3004                                     operands[4], operands[5]);
3005       std::swap (operands[1], operands[2]);
3006     }
3007   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3008                                                   mask, operands[3],
3009                                                   operands[4], operands[5]));
3010   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3011                                                  operands[2], mask));
3012   DONE;
3015 ;; Patterns for AArch64 SIMD Intrinsics.
3017 ;; Lane extraction with sign extension to general purpose register.
3018 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3019   [(set (match_operand:GPI 0 "register_operand" "=r")
3020         (sign_extend:GPI
3021           (vec_select:<VEL>
3022             (match_operand:VDQQH 1 "register_operand" "w")
3023             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3024   "TARGET_SIMD"
3025   {
3026     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3027     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3028   }
3029   [(set_attr "type" "neon_to_gp<q>")]\r
3032 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"\r
3033   [(set (match_operand:GPI 0 "register_operand" "=r")\r
3034         (zero_extend:GPI\r
3035           (vec_select:<VEL>\r
3036             (match_operand:VDQQH 1 "register_operand" "w")\r
3037             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]\r
3038   "TARGET_SIMD"\r
3039   {\r
3040     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,\r
3041                                            INTVAL (operands[2]));\r
3042     return "umov\\t%w0, %1.<Vetype>[%2]";\r
3043   }\r
3044   [(set_attr "type" "neon_to_gp<q>")]\r
3047 ;; Lane extraction of a value, neither sign nor zero extension
3048 ;; is guaranteed so upper bits should be considered undefined.
3049 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3050 (define_insn "aarch64_get_lane<mode>"
3051   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3052         (vec_select:<VEL>
3053           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3054           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3055   "TARGET_SIMD"
3056   {
3057     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3058     switch (which_alternative)
3059       {
3060         case 0:
3061           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3062         case 1:
3063           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3064         case 2:
3065           return "st1\\t{%1.<Vetype>}[%2], %0";
3066         default:
3067           gcc_unreachable ();
3068       }
3069   }
3070   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3073 (define_insn "load_pair_lanes<mode>"
3074   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3075         (vec_concat:<VDBL>
3076            (match_operand:VDC 1 "memory_operand" "Utq")
3077            (match_operand:VDC 2 "memory_operand" "m")))]
3078   "TARGET_SIMD && !STRICT_ALIGNMENT
3079    && rtx_equal_p (XEXP (operands[2], 0),
3080                    plus_constant (Pmode,
3081                                   XEXP (operands[1], 0),
3082                                   GET_MODE_SIZE (<MODE>mode)))"
3083   "ldr\\t%q0, %1"
3084   [(set_attr "type" "neon_load1_1reg_q")]
3087 (define_insn "store_pair_lanes<mode>"
3088   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3089         (vec_concat:<VDBL>
3090            (match_operand:VDC 1 "register_operand" "w, r")
3091            (match_operand:VDC 2 "register_operand" "w, r")))]
3092   "TARGET_SIMD"
3093   "@
3094    stp\\t%d1, %d2, %y0
3095    stp\\t%x1, %x2, %y0"
3096   [(set_attr "type" "neon_stp, store_16")]
3099 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3100 ;; dest vector.
3102 (define_insn "*aarch64_combinez<mode>"
3103   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3104         (vec_concat:<VDBL>
3105           (match_operand:VDC 1 "general_operand" "w,?r,m")
3106           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3107   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3108   "@
3109    mov\\t%0.8b, %1.8b
3110    fmov\t%d0, %1
3111    ldr\\t%d0, %1"
3112   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3113    (set_attr "arch" "simd,fp,simd")]
3116 (define_insn "*aarch64_combinez_be<mode>"
3117   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3118         (vec_concat:<VDBL>
3119           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3120           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3121   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3122   "@
3123    mov\\t%0.8b, %1.8b
3124    fmov\t%d0, %1
3125    ldr\\t%d0, %1"
3126   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3127    (set_attr "arch" "simd,fp,simd")]
3130 (define_expand "aarch64_combine<mode>"
3131   [(match_operand:<VDBL> 0 "register_operand")
3132    (match_operand:VDC 1 "register_operand")
3133    (match_operand:VDC 2 "register_operand")]
3134   "TARGET_SIMD"
3136   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3138   DONE;
3142 (define_expand "@aarch64_simd_combine<mode>"
3143   [(match_operand:<VDBL> 0 "register_operand")
3144    (match_operand:VDC 1 "register_operand")
3145    (match_operand:VDC 2 "register_operand")]
3146   "TARGET_SIMD"
3147   {
3148     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3149     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3150     DONE;
3151   }
3152 [(set_attr "type" "multiple")]
3155 ;; <su><addsub>l<q>.
3157 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3158  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3159        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3160                            (match_operand:VQW 1 "register_operand" "w")
3161                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3162                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3163                            (match_operand:VQW 2 "register_operand" "w")
3164                            (match_dup 3)))))]
3165   "TARGET_SIMD"
3166   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3167   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3170 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3171  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3172        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3173                            (match_operand:VQW 1 "register_operand" "w")
3174                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3175                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3176                            (match_operand:VQW 2 "register_operand" "w")
3177                            (match_dup 3)))))]
3178   "TARGET_SIMD"
3179   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3180   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3184 (define_expand "aarch64_saddl2<mode>"
3185   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186    (match_operand:VQW 1 "register_operand" "w")
3187    (match_operand:VQW 2 "register_operand" "w")]
3188   "TARGET_SIMD"
3190   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3191   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3192                                                   operands[2], p));
3193   DONE;
3196 (define_expand "aarch64_uaddl2<mode>"
3197   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198    (match_operand:VQW 1 "register_operand" "w")
3199    (match_operand:VQW 2 "register_operand" "w")]
3200   "TARGET_SIMD"
3202   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3203   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3204                                                   operands[2], p));
3205   DONE;
3208 (define_expand "aarch64_ssubl2<mode>"
3209   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3210    (match_operand:VQW 1 "register_operand" "w")
3211    (match_operand:VQW 2 "register_operand" "w")]
3212   "TARGET_SIMD"
3214   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3215   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3216                                                 operands[2], p));
3217   DONE;
3220 (define_expand "aarch64_usubl2<mode>"
3221   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3222    (match_operand:VQW 1 "register_operand" "w")
3223    (match_operand:VQW 2 "register_operand" "w")]
3224   "TARGET_SIMD"
3226   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3227   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3228                                                 operands[2], p));
3229   DONE;
3232 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3233  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3234        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3235                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3236                        (ANY_EXTEND:<VWIDE>
3237                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3238   "TARGET_SIMD"
3239   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3240   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3243 ;; <su><addsub>w<q>.
3245 (define_expand "widen_ssum<mode>3"
3246   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3247         (plus:<VDBLW> (sign_extend:<VDBLW> 
3248                         (match_operand:VQW 1 "register_operand" ""))
3249                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3250   "TARGET_SIMD"
3251   {
3252     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3253     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3255     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3256                                                 operands[1], p));
3257     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3258     DONE;
3259   }
3262 (define_expand "widen_ssum<mode>3"
3263   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3264         (plus:<VWIDE> (sign_extend:<VWIDE>
3265                         (match_operand:VD_BHSI 1 "register_operand" ""))
3266                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3267   "TARGET_SIMD"
3269   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3270   DONE;
3273 (define_expand "widen_usum<mode>3"
3274   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3275         (plus:<VDBLW> (zero_extend:<VDBLW> 
3276                         (match_operand:VQW 1 "register_operand" ""))
3277                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3278   "TARGET_SIMD"
3279   {
3280     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3281     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3283     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3284                                                  operands[1], p));
3285     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3286     DONE;
3287   }
3290 (define_expand "widen_usum<mode>3"
3291   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3292         (plus:<VWIDE> (zero_extend:<VWIDE>
3293                         (match_operand:VD_BHSI 1 "register_operand" ""))
3294                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3295   "TARGET_SIMD"
3297   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3298   DONE;
3301 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3302   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3303         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3304           (ANY_EXTEND:<VWIDE>
3305             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3306   "TARGET_SIMD"
3307   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308   [(set_attr "type" "neon_sub_widen")]
3311 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3312   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3313         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3314           (ANY_EXTEND:<VWIDE>
3315             (vec_select:<VHALF>
3316               (match_operand:VQW 2 "register_operand" "w")
3317               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3318   "TARGET_SIMD"
3319   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3320   [(set_attr "type" "neon_sub_widen")]
3323 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3324   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3325         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3326           (ANY_EXTEND:<VWIDE>
3327             (vec_select:<VHALF>
3328               (match_operand:VQW 2 "register_operand" "w")
3329               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3330   "TARGET_SIMD"
3331   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3332   [(set_attr "type" "neon_sub_widen")]
3335 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3336   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3337         (plus:<VWIDE>
3338           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3339           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3340   "TARGET_SIMD"
3341   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3342   [(set_attr "type" "neon_add_widen")]
3345 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3346   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3347         (plus:<VWIDE>
3348           (ANY_EXTEND:<VWIDE>
3349             (vec_select:<VHALF>
3350               (match_operand:VQW 2 "register_operand" "w")
3351               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3352           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3353   "TARGET_SIMD"
3354   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3355   [(set_attr "type" "neon_add_widen")]
3358 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3359   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3360         (plus:<VWIDE>
3361           (ANY_EXTEND:<VWIDE>
3362             (vec_select:<VHALF>
3363               (match_operand:VQW 2 "register_operand" "w")
3364               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3365           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3366   "TARGET_SIMD"
3367   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3368   [(set_attr "type" "neon_add_widen")]
3371 (define_expand "aarch64_saddw2<mode>"
3372   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3373    (match_operand:<VWIDE> 1 "register_operand" "w")
3374    (match_operand:VQW 2 "register_operand" "w")]
3375   "TARGET_SIMD"
3377   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3378   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3379                                                 operands[2], p));
3380   DONE;
3383 (define_expand "aarch64_uaddw2<mode>"
3384   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3385    (match_operand:<VWIDE> 1 "register_operand" "w")
3386    (match_operand:VQW 2 "register_operand" "w")]
3387   "TARGET_SIMD"
3389   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3390   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3391                                                 operands[2], p));
3392   DONE;
3396 (define_expand "aarch64_ssubw2<mode>"
3397   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3398    (match_operand:<VWIDE> 1 "register_operand" "w")
3399    (match_operand:VQW 2 "register_operand" "w")]
3400   "TARGET_SIMD"
3402   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3403   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3404                                                 operands[2], p));
3405   DONE;
3408 (define_expand "aarch64_usubw2<mode>"
3409   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3410    (match_operand:<VWIDE> 1 "register_operand" "w")
3411    (match_operand:VQW 2 "register_operand" "w")]
3412   "TARGET_SIMD"
3414   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3415   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3416                                                 operands[2], p));
3417   DONE;
3420 ;; <su><r>h<addsub>.
3422 (define_expand "<u>avg<mode>3_floor"
3423   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3424         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3425                           (match_operand:VDQ_BHSI 2 "register_operand")]
3426                          HADD))]
3427   "TARGET_SIMD"
3430 (define_expand "<u>avg<mode>3_ceil"
3431   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3432         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3433                           (match_operand:VDQ_BHSI 2 "register_operand")]
3434                          RHADD))]
3435   "TARGET_SIMD"
3438 (define_insn "aarch64_<sur>h<addsub><mode>"
3439   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3440         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3441                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3442                      HADDSUB))]
3443   "TARGET_SIMD"
3444   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3445   [(set_attr "type" "neon_<addsub>_halve<q>")]
3448 ;; <r><addsub>hn<q>.
3450 (define_insn "aarch64_<sur><addsub>hn<mode>"
3451   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3452         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3453                             (match_operand:VQN 2 "register_operand" "w")]
3454                            ADDSUBHN))]
3455   "TARGET_SIMD"
3456   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3457   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3460 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3461   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3462         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3463                              (match_operand:VQN 2 "register_operand" "w")
3464                              (match_operand:VQN 3 "register_operand" "w")]
3465                             ADDSUBHN2))]
3466   "TARGET_SIMD"
3467   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3468   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3471 ;; pmul.
3473 (define_insn "aarch64_pmul<mode>"
3474   [(set (match_operand:VB 0 "register_operand" "=w")
3475         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3476                     (match_operand:VB 2 "register_operand" "w")]
3477                    UNSPEC_PMUL))]
3478  "TARGET_SIMD"
3479  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3480   [(set_attr "type" "neon_mul_<Vetype><q>")]
3483 ;; fmulx.
3485 (define_insn "aarch64_fmulx<mode>"
3486   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3487         (unspec:VHSDF_HSDF
3488           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3489            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3490            UNSPEC_FMULX))]
3491  "TARGET_SIMD"
3492  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3493  [(set_attr "type" "neon_fp_mul_<stype>")]
3496 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3498 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3499   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3500         (unspec:VDQSF
3501          [(match_operand:VDQSF 1 "register_operand" "w")
3502           (vec_duplicate:VDQSF
3503            (vec_select:<VEL>
3504             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3505             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3506          UNSPEC_FMULX))]
3507   "TARGET_SIMD"
3508   {
3509     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3510     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3511   }
3512   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3515 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3517 (define_insn "*aarch64_mulx_elt<mode>"
3518   [(set (match_operand:VDQF 0 "register_operand" "=w")
3519         (unspec:VDQF
3520          [(match_operand:VDQF 1 "register_operand" "w")
3521           (vec_duplicate:VDQF
3522            (vec_select:<VEL>
3523             (match_operand:VDQF 2 "register_operand" "w")
3524             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3525          UNSPEC_FMULX))]
3526   "TARGET_SIMD"
3527   {
3528     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3529     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3530   }
3531   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3534 ;; vmulxq_lane
3536 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3537   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3538         (unspec:VHSDF
3539          [(match_operand:VHSDF 1 "register_operand" "w")
3540           (vec_duplicate:VHSDF
3541             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3542          UNSPEC_FMULX))]
3543   "TARGET_SIMD"
3544   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3545   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3548 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3549 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3550 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3552 (define_insn "*aarch64_vgetfmulx<mode>"
3553   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3554         (unspec:<VEL>
3555          [(match_operand:<VEL> 1 "register_operand" "w")
3556           (vec_select:<VEL>
3557            (match_operand:VDQF 2 "register_operand" "w")
3558             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3559          UNSPEC_FMULX))]
3560   "TARGET_SIMD"
3561   {
3562     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3563     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3564   }
3565   [(set_attr "type" "fmul<Vetype>")]
3567 ;; <su>q<addsub>
3569 (define_insn "aarch64_<su_optab><optab><mode>"
3570   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3571         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3572                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3573   "TARGET_SIMD"
3574   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3575   [(set_attr "type" "neon_<optab><q>")]
3578 ;; suqadd and usqadd
3580 (define_insn "aarch64_<sur>qadd<mode>"
3581   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3582         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3583                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3584                        USSUQADD))]
3585   "TARGET_SIMD"
3586   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3587   [(set_attr "type" "neon_qadd<q>")]
3590 ;; sqmovun
3592 (define_insn "aarch64_sqmovun<mode>"
3593   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3594         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3595                             UNSPEC_SQXTUN))]
3596    "TARGET_SIMD"
3597    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3598    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3601 ;; sqmovn and uqmovn
3603 (define_insn "aarch64_<sur>qmovn<mode>"
3604   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3605         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3606                             SUQMOVN))]
3607   "TARGET_SIMD"
3608   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3609    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3612 ;; <su>q<absneg>
3614 (define_insn "aarch64_s<optab><mode>"
3615   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3616         (UNQOPS:VSDQ_I
3617           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3618   "TARGET_SIMD"
3619   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3620   [(set_attr "type" "neon_<optab><q>")]
3623 ;; sq<r>dmulh.
3625 (define_insn "aarch64_sq<r>dmulh<mode>"
3626   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3627         (unspec:VSDQ_HSI
3628           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3629            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3630          VQDMULH))]
3631   "TARGET_SIMD"
3632   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3633   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3636 ;; sq<r>dmulh_lane
3638 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3639   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3640         (unspec:VDQHS
3641           [(match_operand:VDQHS 1 "register_operand" "w")
3642            (vec_select:<VEL>
3643              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3644              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3645          VQDMULH))]
3646   "TARGET_SIMD"
3647   "*
3648    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3649    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3650   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3653 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3654   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3655         (unspec:VDQHS
3656           [(match_operand:VDQHS 1 "register_operand" "w")
3657            (vec_select:<VEL>
3658              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3659              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3660          VQDMULH))]
3661   "TARGET_SIMD"
3662   "*
3663    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3664    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3665   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3668 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3669   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3670         (unspec:SD_HSI
3671           [(match_operand:SD_HSI 1 "register_operand" "w")
3672            (vec_select:<VEL>
3673              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3674              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3675          VQDMULH))]
3676   "TARGET_SIMD"
3677   "*
3678    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3679    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3680   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3683 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3684   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3685         (unspec:SD_HSI
3686           [(match_operand:SD_HSI 1 "register_operand" "w")
3687            (vec_select:<VEL>
3688              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3689              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3690          VQDMULH))]
3691   "TARGET_SIMD"
3692   "*
3693    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3694    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3695   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3698 ;; sqrdml[as]h.
3700 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3701   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3702         (unspec:VSDQ_HSI
3703           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3704            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3705            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3706           SQRDMLH_AS))]
3707    "TARGET_SIMD_RDMA"
3708    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3709    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3712 ;; sqrdml[as]h_lane.
3714 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3715   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3716         (unspec:VDQHS
3717           [(match_operand:VDQHS 1 "register_operand" "0")
3718            (match_operand:VDQHS 2 "register_operand" "w")
3719            (vec_select:<VEL>
3720              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3721              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3722           SQRDMLH_AS))]
3723    "TARGET_SIMD_RDMA"
3724    {
3725      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3726      return
3727       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3728    }
3729    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3732 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3733   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3734         (unspec:SD_HSI
3735           [(match_operand:SD_HSI 1 "register_operand" "0")
3736            (match_operand:SD_HSI 2 "register_operand" "w")
3737            (vec_select:<VEL>
3738              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3739              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3740           SQRDMLH_AS))]
3741    "TARGET_SIMD_RDMA"
3742    {
3743      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3744      return
3745       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3746    }
3747    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3750 ;; sqrdml[as]h_laneq.
3752 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3753   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3754         (unspec:VDQHS
3755           [(match_operand:VDQHS 1 "register_operand" "0")
3756            (match_operand:VDQHS 2 "register_operand" "w")
3757            (vec_select:<VEL>
3758              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3759              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3760           SQRDMLH_AS))]
3761    "TARGET_SIMD_RDMA"
3762    {
3763      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3764      return
3765       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3766    }
3767    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3770 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3771   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3772         (unspec:SD_HSI
3773           [(match_operand:SD_HSI 1 "register_operand" "0")
3774            (match_operand:SD_HSI 2 "register_operand" "w")
3775            (vec_select:<VEL>
3776              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3777              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3778           SQRDMLH_AS))]
3779    "TARGET_SIMD_RDMA"
3780    {
3781      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3782      return
3783       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3784    }
3785    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3788 ;; vqdml[sa]l
3790 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3791   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3792         (SBINQOPS:<VWIDE>
3793           (match_operand:<VWIDE> 1 "register_operand" "0")
3794           (ss_ashift:<VWIDE>
3795               (mult:<VWIDE>
3796                 (sign_extend:<VWIDE>
3797                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3798                 (sign_extend:<VWIDE>
3799                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3800               (const_int 1))))]
3801   "TARGET_SIMD"
3802   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3803   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3806 ;; vqdml[sa]l_lane
3808 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3809   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3810         (SBINQOPS:<VWIDE>
3811           (match_operand:<VWIDE> 1 "register_operand" "0")
3812           (ss_ashift:<VWIDE>
3813             (mult:<VWIDE>
3814               (sign_extend:<VWIDE>
3815                 (match_operand:VD_HSI 2 "register_operand" "w"))
3816               (sign_extend:<VWIDE>
3817                 (vec_duplicate:VD_HSI
3818                   (vec_select:<VEL>
3819                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3820                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3821               ))
3822             (const_int 1))))]
3823   "TARGET_SIMD"
3824   {
3825     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3826     return
3827       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3828   }
3829   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3832 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3833   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3834         (SBINQOPS:<VWIDE>
3835           (match_operand:<VWIDE> 1 "register_operand" "0")
3836           (ss_ashift:<VWIDE>
3837             (mult:<VWIDE>
3838               (sign_extend:<VWIDE>
3839                 (match_operand:VD_HSI 2 "register_operand" "w"))
3840               (sign_extend:<VWIDE>
3841                 (vec_duplicate:VD_HSI
3842                   (vec_select:<VEL>
3843                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3844                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3845               ))
3846             (const_int 1))))]
3847   "TARGET_SIMD"
3848   {
3849     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3850     return
3851       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3852   }
3853   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3856 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3857   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3858         (SBINQOPS:<VWIDE>
3859           (match_operand:<VWIDE> 1 "register_operand" "0")
3860           (ss_ashift:<VWIDE>
3861             (mult:<VWIDE>
3862               (sign_extend:<VWIDE>
3863                 (match_operand:SD_HSI 2 "register_operand" "w"))
3864               (sign_extend:<VWIDE>
3865                 (vec_select:<VEL>
3866                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3867                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3868               )
3869             (const_int 1))))]
3870   "TARGET_SIMD"
3871   {
3872     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3873     return
3874       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3875   }
3876   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3879 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3880   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3881         (SBINQOPS:<VWIDE>
3882           (match_operand:<VWIDE> 1 "register_operand" "0")
3883           (ss_ashift:<VWIDE>
3884             (mult:<VWIDE>
3885               (sign_extend:<VWIDE>
3886                 (match_operand:SD_HSI 2 "register_operand" "w"))
3887               (sign_extend:<VWIDE>
3888                 (vec_select:<VEL>
3889                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3890                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3891               )
3892             (const_int 1))))]
3893   "TARGET_SIMD"
3894   {
3895     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3896     return
3897       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3898   }
3899   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3902 ;; vqdml[sa]l_n
3904 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3905   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3906         (SBINQOPS:<VWIDE>
3907           (match_operand:<VWIDE> 1 "register_operand" "0")
3908           (ss_ashift:<VWIDE>
3909               (mult:<VWIDE>
3910                 (sign_extend:<VWIDE>
3911                       (match_operand:VD_HSI 2 "register_operand" "w"))
3912                 (sign_extend:<VWIDE>
3913                   (vec_duplicate:VD_HSI
3914                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3915               (const_int 1))))]
3916   "TARGET_SIMD"
3917   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3918   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3921 ;; sqdml[as]l2
3923 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3924   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3925         (SBINQOPS:<VWIDE>
3926          (match_operand:<VWIDE> 1 "register_operand" "0")
3927          (ss_ashift:<VWIDE>
3928              (mult:<VWIDE>
3929                (sign_extend:<VWIDE>
3930                  (vec_select:<VHALF>
3931                      (match_operand:VQ_HSI 2 "register_operand" "w")
3932                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3933                (sign_extend:<VWIDE>
3934                  (vec_select:<VHALF>
3935                      (match_operand:VQ_HSI 3 "register_operand" "w")
3936                      (match_dup 4))))
3937              (const_int 1))))]
3938   "TARGET_SIMD"
3939   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3940   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3943 (define_expand "aarch64_sqdmlal2<mode>"
3944   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3945    (match_operand:<VWIDE> 1 "register_operand" "w")
3946    (match_operand:VQ_HSI 2 "register_operand" "w")
3947    (match_operand:VQ_HSI 3 "register_operand" "w")]
3948   "TARGET_SIMD"
3950   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3951   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3952                                                   operands[2], operands[3], p));
3953   DONE;
3956 (define_expand "aarch64_sqdmlsl2<mode>"
3957   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3958    (match_operand:<VWIDE> 1 "register_operand" "w")
3959    (match_operand:VQ_HSI 2 "register_operand" "w")
3960    (match_operand:VQ_HSI 3 "register_operand" "w")]
3961   "TARGET_SIMD"
3963   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3964   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3965                                                   operands[2], operands[3], p));
3966   DONE;
3969 ;; vqdml[sa]l2_lane
3971 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3972   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3973         (SBINQOPS:<VWIDE>
3974           (match_operand:<VWIDE> 1 "register_operand" "0")
3975           (ss_ashift:<VWIDE>
3976               (mult:<VWIDE>
3977                 (sign_extend:<VWIDE>
3978                   (vec_select:<VHALF>
3979                     (match_operand:VQ_HSI 2 "register_operand" "w")
3980                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3981                 (sign_extend:<VWIDE>
3982                   (vec_duplicate:<VHALF>
3983                     (vec_select:<VEL>
3984                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3985                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3986                     ))))
3987               (const_int 1))))]
3988   "TARGET_SIMD"
3989   {
3990     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3991     return
3992      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3993   }
3994   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3997 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3998   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3999         (SBINQOPS:<VWIDE>
4000           (match_operand:<VWIDE> 1 "register_operand" "0")
4001           (ss_ashift:<VWIDE>
4002               (mult:<VWIDE>
4003                 (sign_extend:<VWIDE>
4004                   (vec_select:<VHALF>
4005                     (match_operand:VQ_HSI 2 "register_operand" "w")
4006                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4007                 (sign_extend:<VWIDE>
4008                   (vec_duplicate:<VHALF>
4009                     (vec_select:<VEL>
4010                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4011                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4012                     ))))
4013               (const_int 1))))]
4014   "TARGET_SIMD"
4015   {
4016     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4017     return
4018      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4019   }
4020   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4023 (define_expand "aarch64_sqdmlal2_lane<mode>"
4024   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4025    (match_operand:<VWIDE> 1 "register_operand" "w")
4026    (match_operand:VQ_HSI 2 "register_operand" "w")
4027    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4028    (match_operand:SI 4 "immediate_operand" "i")]
4029   "TARGET_SIMD"
4031   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4032   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4033                                                        operands[2], operands[3],
4034                                                        operands[4], p));
4035   DONE;
4038 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4039   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4040    (match_operand:<VWIDE> 1 "register_operand" "w")
4041    (match_operand:VQ_HSI 2 "register_operand" "w")
4042    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4043    (match_operand:SI 4 "immediate_operand" "i")]
4044   "TARGET_SIMD"
4046   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4047   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4048                                                        operands[2], operands[3],
4049                                                        operands[4], p));
4050   DONE;
4053 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4054   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4055    (match_operand:<VWIDE> 1 "register_operand" "w")
4056    (match_operand:VQ_HSI 2 "register_operand" "w")
4057    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4058    (match_operand:SI 4 "immediate_operand" "i")]
4059   "TARGET_SIMD"
4061   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4062   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4063                                                        operands[2], operands[3],
4064                                                        operands[4], p));
4065   DONE;
4068 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4069   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4070    (match_operand:<VWIDE> 1 "register_operand" "w")
4071    (match_operand:VQ_HSI 2 "register_operand" "w")
4072    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4073    (match_operand:SI 4 "immediate_operand" "i")]
4074   "TARGET_SIMD"
4076   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4077   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4078                                                        operands[2], operands[3],
4079                                                        operands[4], p));
4080   DONE;
4083 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4084   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4085         (SBINQOPS:<VWIDE>
4086           (match_operand:<VWIDE> 1 "register_operand" "0")
4087           (ss_ashift:<VWIDE>
4088             (mult:<VWIDE>
4089               (sign_extend:<VWIDE>
4090                 (vec_select:<VHALF>
4091                   (match_operand:VQ_HSI 2 "register_operand" "w")
4092                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4093               (sign_extend:<VWIDE>
4094                 (vec_duplicate:<VHALF>
4095                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4096             (const_int 1))))]
4097   "TARGET_SIMD"
4098   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4099   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4102 (define_expand "aarch64_sqdmlal2_n<mode>"
4103   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4104    (match_operand:<VWIDE> 1 "register_operand" "w")
4105    (match_operand:VQ_HSI 2 "register_operand" "w")
4106    (match_operand:<VEL> 3 "register_operand" "w")]
4107   "TARGET_SIMD"
4109   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4110   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4111                                                     operands[2], operands[3],
4112                                                     p));
4113   DONE;
4116 (define_expand "aarch64_sqdmlsl2_n<mode>"
4117   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4118    (match_operand:<VWIDE> 1 "register_operand" "w")
4119    (match_operand:VQ_HSI 2 "register_operand" "w")
4120    (match_operand:<VEL> 3 "register_operand" "w")]
4121   "TARGET_SIMD"
4123   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4124   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4125                                                     operands[2], operands[3],
4126                                                     p));
4127   DONE;
4130 ;; vqdmull
4132 (define_insn "aarch64_sqdmull<mode>"
4133   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4134         (ss_ashift:<VWIDE>
4135              (mult:<VWIDE>
4136                (sign_extend:<VWIDE>
4137                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4138                (sign_extend:<VWIDE>
4139                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4140              (const_int 1)))]
4141   "TARGET_SIMD"
4142   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4143   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4146 ;; vqdmull_lane
4148 (define_insn "aarch64_sqdmull_lane<mode>"
4149   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4150         (ss_ashift:<VWIDE>
4151              (mult:<VWIDE>
4152                (sign_extend:<VWIDE>
4153                  (match_operand:VD_HSI 1 "register_operand" "w"))
4154                (sign_extend:<VWIDE>
4155                  (vec_duplicate:VD_HSI
4156                    (vec_select:<VEL>
4157                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4158                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4159                ))
4160              (const_int 1)))]
4161   "TARGET_SIMD"
4162   {
4163     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4164     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4165   }
4166   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4169 (define_insn "aarch64_sqdmull_laneq<mode>"
4170   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4171         (ss_ashift:<VWIDE>
4172              (mult:<VWIDE>
4173                (sign_extend:<VWIDE>
4174                  (match_operand:VD_HSI 1 "register_operand" "w"))
4175                (sign_extend:<VWIDE>
4176                  (vec_duplicate:VD_HSI
4177                    (vec_select:<VEL>
4178                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4179                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4180                ))
4181              (const_int 1)))]
4182   "TARGET_SIMD"
4183   {
4184     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4185     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4186   }
4187   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4190 (define_insn "aarch64_sqdmull_lane<mode>"
4191   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4192         (ss_ashift:<VWIDE>
4193              (mult:<VWIDE>
4194                (sign_extend:<VWIDE>
4195                  (match_operand:SD_HSI 1 "register_operand" "w"))
4196                (sign_extend:<VWIDE>
4197                  (vec_select:<VEL>
4198                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4199                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4200                ))
4201              (const_int 1)))]
4202   "TARGET_SIMD"
4203   {
4204     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4205     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4206   }
4207   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4210 (define_insn "aarch64_sqdmull_laneq<mode>"
4211   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4212         (ss_ashift:<VWIDE>
4213              (mult:<VWIDE>
4214                (sign_extend:<VWIDE>
4215                  (match_operand:SD_HSI 1 "register_operand" "w"))
4216                (sign_extend:<VWIDE>
4217                  (vec_select:<VEL>
4218                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4219                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4220                ))
4221              (const_int 1)))]
4222   "TARGET_SIMD"
4223   {
4224     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4225     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4226   }
4227   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4230 ;; vqdmull_n
4232 (define_insn "aarch64_sqdmull_n<mode>"
4233   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234         (ss_ashift:<VWIDE>
4235              (mult:<VWIDE>
4236                (sign_extend:<VWIDE>
4237                  (match_operand:VD_HSI 1 "register_operand" "w"))
4238                (sign_extend:<VWIDE>
4239                  (vec_duplicate:VD_HSI
4240                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4241                )
4242              (const_int 1)))]
4243   "TARGET_SIMD"
4244   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4245   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4248 ;; vqdmull2
4252 (define_insn "aarch64_sqdmull2<mode>_internal"
4253   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4254         (ss_ashift:<VWIDE>
4255              (mult:<VWIDE>
4256                (sign_extend:<VWIDE>
4257                  (vec_select:<VHALF>
4258                    (match_operand:VQ_HSI 1 "register_operand" "w")
4259                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4260                (sign_extend:<VWIDE>
4261                  (vec_select:<VHALF>
4262                    (match_operand:VQ_HSI 2 "register_operand" "w")
4263                    (match_dup 3)))
4264                )
4265              (const_int 1)))]
4266   "TARGET_SIMD"
4267   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4268   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4271 (define_expand "aarch64_sqdmull2<mode>"
4272   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4273    (match_operand:VQ_HSI 1 "register_operand" "w")
4274    (match_operand:VQ_HSI 2 "register_operand" "w")]
4275   "TARGET_SIMD"
4277   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4278   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4279                                                   operands[2], p));
4280   DONE;
4283 ;; vqdmull2_lane
4285 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4286   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4287         (ss_ashift:<VWIDE>
4288              (mult:<VWIDE>
4289                (sign_extend:<VWIDE>
4290                  (vec_select:<VHALF>
4291                    (match_operand:VQ_HSI 1 "register_operand" "w")
4292                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4293                (sign_extend:<VWIDE>
4294                  (vec_duplicate:<VHALF>
4295                    (vec_select:<VEL>
4296                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4297                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4298                ))
4299              (const_int 1)))]
4300   "TARGET_SIMD"
4301   {
4302     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4303     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4304   }
4305   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4308 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4309   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4310         (ss_ashift:<VWIDE>
4311              (mult:<VWIDE>
4312                (sign_extend:<VWIDE>
4313                  (vec_select:<VHALF>
4314                    (match_operand:VQ_HSI 1 "register_operand" "w")
4315                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4316                (sign_extend:<VWIDE>
4317                  (vec_duplicate:<VHALF>
4318                    (vec_select:<VEL>
4319                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4320                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4321                ))
4322              (const_int 1)))]
4323   "TARGET_SIMD"
4324   {
4325     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4326     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4327   }
4328   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4331 (define_expand "aarch64_sqdmull2_lane<mode>"
4332   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4333    (match_operand:VQ_HSI 1 "register_operand" "w")
4334    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4335    (match_operand:SI 3 "immediate_operand" "i")]
4336   "TARGET_SIMD"
4338   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4339   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4340                                                        operands[2], operands[3],
4341                                                        p));
4342   DONE;
4345 (define_expand "aarch64_sqdmull2_laneq<mode>"
4346   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4347    (match_operand:VQ_HSI 1 "register_operand" "w")
4348    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4349    (match_operand:SI 3 "immediate_operand" "i")]
4350   "TARGET_SIMD"
4352   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4353   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4354                                                        operands[2], operands[3],
4355                                                        p));
4356   DONE;
4359 ;; vqdmull2_n
4361 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4362   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4363         (ss_ashift:<VWIDE>
4364              (mult:<VWIDE>
4365                (sign_extend:<VWIDE>
4366                  (vec_select:<VHALF>
4367                    (match_operand:VQ_HSI 1 "register_operand" "w")
4368                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4369                (sign_extend:<VWIDE>
4370                  (vec_duplicate:<VHALF>
4371                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4372                )
4373              (const_int 1)))]
4374   "TARGET_SIMD"
4375   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4376   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4379 (define_expand "aarch64_sqdmull2_n<mode>"
4380   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4381    (match_operand:VQ_HSI 1 "register_operand" "w")
4382    (match_operand:<VEL> 2 "register_operand" "w")]
4383   "TARGET_SIMD"
4385   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4386   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4387                                                     operands[2], p));
4388   DONE;
4391 ;; vshl
4393 (define_insn "aarch64_<sur>shl<mode>"
4394   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4395         (unspec:VSDQ_I_DI
4396           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4397            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4398          VSHL))]
4399   "TARGET_SIMD"
4400   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4401   [(set_attr "type" "neon_shift_reg<q>")]
4405 ;; vqshl
4407 (define_insn "aarch64_<sur>q<r>shl<mode>"
4408   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4409         (unspec:VSDQ_I
4410           [(match_operand:VSDQ_I 1 "register_operand" "w")
4411            (match_operand:VSDQ_I 2 "register_operand" "w")]
4412          VQSHL))]
4413   "TARGET_SIMD"
4414   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4415   [(set_attr "type" "neon_sat_shift_reg<q>")]
4418 ;; vshll_n
4420 (define_insn "aarch64_<sur>shll_n<mode>"
4421   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4422         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4423                          (match_operand:SI 2
4424                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4425                          VSHLL))]
4426   "TARGET_SIMD"
4427   {
4428     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4429       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4430     else
4431       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4432   }
4433   [(set_attr "type" "neon_shift_imm_long")]
4436 ;; vshll_high_n
4438 (define_insn "aarch64_<sur>shll2_n<mode>"
4439   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4440         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4441                          (match_operand:SI 2 "immediate_operand" "i")]
4442                          VSHLL))]
4443   "TARGET_SIMD"
4444   {
4445     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4446       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4447     else
4448       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4449   }
4450   [(set_attr "type" "neon_shift_imm_long")]
4453 ;; vrshr_n
4455 (define_insn "aarch64_<sur>shr_n<mode>"
4456   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4457         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4458                            (match_operand:SI 2
4459                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4460                           VRSHR_N))]
4461   "TARGET_SIMD"
4462   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4463   [(set_attr "type" "neon_sat_shift_imm<q>")]
4466 ;; v(r)sra_n
4468 (define_insn "aarch64_<sur>sra_n<mode>"
4469   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4470         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4471                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4472                        (match_operand:SI 3
4473                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4474                       VSRA))]
4475   "TARGET_SIMD"
4476   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4477   [(set_attr "type" "neon_shift_acc<q>")]
4480 ;; vs<lr>i_n
4482 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4483   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4484         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4485                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4486                        (match_operand:SI 3
4487                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4488                       VSLRI))]
4489   "TARGET_SIMD"
4490   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4491   [(set_attr "type" "neon_shift_imm<q>")]
4494 ;; vqshl(u)
4496 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4497   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4498         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4499                        (match_operand:SI 2
4500                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4501                       VQSHL_N))]
4502   "TARGET_SIMD"
4503   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4504   [(set_attr "type" "neon_sat_shift_imm<q>")]
4508 ;; vq(r)shr(u)n_n
4510 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4511   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4512         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4513                             (match_operand:SI 2
4514                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4515                            VQSHRN_N))]
4516   "TARGET_SIMD"
4517   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4518   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4522 ;; cm(eq|ge|gt|lt|le)
4523 ;; Note, we have constraints for Dz and Z as different expanders
4524 ;; have different ideas of what should be passed to this pattern.
4526 (define_insn "aarch64_cm<optab><mode>"
4527   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4528         (neg:<V_INT_EQUIV>
4529           (COMPARISONS:<V_INT_EQUIV>
4530             (match_operand:VDQ_I 1 "register_operand" "w,w")
4531             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4532           )))]
4533   "TARGET_SIMD"
4534   "@
4535   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4536   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4537   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4540 (define_insn_and_split "aarch64_cm<optab>di"
4541   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4542         (neg:DI
4543           (COMPARISONS:DI
4544             (match_operand:DI 1 "register_operand" "w,w,r")
4545             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4546           )))
4547      (clobber (reg:CC CC_REGNUM))]
4548   "TARGET_SIMD"
4549   "#"
4550   "&& reload_completed"
4551   [(set (match_operand:DI 0 "register_operand")
4552         (neg:DI
4553           (COMPARISONS:DI
4554             (match_operand:DI 1 "register_operand")
4555             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4556           )))]
4557   {
4558     /* If we are in the general purpose register file,
4559        we split to a sequence of comparison and store.  */
4560     if (GP_REGNUM_P (REGNO (operands[0]))
4561         && GP_REGNUM_P (REGNO (operands[1])))
4562       {
4563         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4564         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4565         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4566         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4567         DONE;
4568       }
4569     /* Otherwise, we expand to a similar pattern which does not
4570        clobber CC_REGNUM.  */
4571   }
4572   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4575 (define_insn "*aarch64_cm<optab>di"
4576   [(set (match_operand:DI 0 "register_operand" "=w,w")
4577         (neg:DI
4578           (COMPARISONS:DI
4579             (match_operand:DI 1 "register_operand" "w,w")
4580             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4581           )))]
4582   "TARGET_SIMD && reload_completed"
4583   "@
4584   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4585   cm<optab>\t%d0, %d1, #0"
4586   [(set_attr "type" "neon_compare, neon_compare_zero")]
4589 ;; cm(hs|hi)
4591 (define_insn "aarch64_cm<optab><mode>"
4592   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4593         (neg:<V_INT_EQUIV>
4594           (UCOMPARISONS:<V_INT_EQUIV>
4595             (match_operand:VDQ_I 1 "register_operand" "w")
4596             (match_operand:VDQ_I 2 "register_operand" "w")
4597           )))]
4598   "TARGET_SIMD"
4599   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4600   [(set_attr "type" "neon_compare<q>")]
4603 (define_insn_and_split "aarch64_cm<optab>di"
4604   [(set (match_operand:DI 0 "register_operand" "=w,r")
4605         (neg:DI
4606           (UCOMPARISONS:DI
4607             (match_operand:DI 1 "register_operand" "w,r")
4608             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4609           )))
4610     (clobber (reg:CC CC_REGNUM))]
4611   "TARGET_SIMD"
4612   "#"
4613   "&& reload_completed"
4614   [(set (match_operand:DI 0 "register_operand")
4615         (neg:DI
4616           (UCOMPARISONS:DI
4617             (match_operand:DI 1 "register_operand")
4618             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4619           )))]
4620   {
4621     /* If we are in the general purpose register file,
4622        we split to a sequence of comparison and store.  */
4623     if (GP_REGNUM_P (REGNO (operands[0]))
4624         && GP_REGNUM_P (REGNO (operands[1])))
4625       {
4626         machine_mode mode = CCmode;
4627         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4628         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4629         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4630         DONE;
4631       }
4632     /* Otherwise, we expand to a similar pattern which does not
4633        clobber CC_REGNUM.  */
4634   }
4635   [(set_attr "type" "neon_compare,multiple")]
4638 (define_insn "*aarch64_cm<optab>di"
4639   [(set (match_operand:DI 0 "register_operand" "=w")
4640         (neg:DI
4641           (UCOMPARISONS:DI
4642             (match_operand:DI 1 "register_operand" "w")
4643             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4644           )))]
4645   "TARGET_SIMD && reload_completed"
4646   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4647   [(set_attr "type" "neon_compare")]
4650 ;; cmtst
4652 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4653 ;; we don't have any insns using ne, and aarch64_vcond outputs
4654 ;; not (neg (eq (and x y) 0))
4655 ;; which is rewritten by simplify_rtx as
4656 ;; plus (eq (and x y) 0) -1.
4658 (define_insn "aarch64_cmtst<mode>"
4659   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4660         (plus:<V_INT_EQUIV>
4661           (eq:<V_INT_EQUIV>
4662             (and:VDQ_I
4663               (match_operand:VDQ_I 1 "register_operand" "w")
4664               (match_operand:VDQ_I 2 "register_operand" "w"))
4665             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4666           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4667   ]
4668   "TARGET_SIMD"
4669   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4670   [(set_attr "type" "neon_tst<q>")]
4673 (define_insn_and_split "aarch64_cmtstdi"
4674   [(set (match_operand:DI 0 "register_operand" "=w,r")
4675         (neg:DI
4676           (ne:DI
4677             (and:DI
4678               (match_operand:DI 1 "register_operand" "w,r")
4679               (match_operand:DI 2 "register_operand" "w,r"))
4680             (const_int 0))))
4681     (clobber (reg:CC CC_REGNUM))]
4682   "TARGET_SIMD"
4683   "#"
4684   "&& reload_completed"
4685   [(set (match_operand:DI 0 "register_operand")
4686         (neg:DI
4687           (ne:DI
4688             (and:DI
4689               (match_operand:DI 1 "register_operand")
4690               (match_operand:DI 2 "register_operand"))
4691             (const_int 0))))]
4692   {
4693     /* If we are in the general purpose register file,
4694        we split to a sequence of comparison and store.  */
4695     if (GP_REGNUM_P (REGNO (operands[0]))
4696         && GP_REGNUM_P (REGNO (operands[1])))
4697       {
4698         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4699         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4700         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4701         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4702         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4703         DONE;
4704       }
4705     /* Otherwise, we expand to a similar pattern which does not
4706        clobber CC_REGNUM.  */
4707   }
4708   [(set_attr "type" "neon_tst,multiple")]
4711 (define_insn "*aarch64_cmtstdi"
4712   [(set (match_operand:DI 0 "register_operand" "=w")
4713         (neg:DI
4714           (ne:DI
4715             (and:DI
4716               (match_operand:DI 1 "register_operand" "w")
4717               (match_operand:DI 2 "register_operand" "w"))
4718             (const_int 0))))]
4719   "TARGET_SIMD"
4720   "cmtst\t%d0, %d1, %d2"
4721   [(set_attr "type" "neon_tst")]
4724 ;; fcm(eq|ge|gt|le|lt)
4726 (define_insn "aarch64_cm<optab><mode>"
4727   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4728         (neg:<V_INT_EQUIV>
4729           (COMPARISONS:<V_INT_EQUIV>
4730             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4731             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4732           )))]
4733   "TARGET_SIMD"
4734   "@
4735   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4736   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4737   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4740 ;; fac(ge|gt)
4741 ;; Note we can also handle what would be fac(le|lt) by
4742 ;; generating fac(ge|gt).
4744 (define_insn "aarch64_fac<optab><mode>"
4745   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4746         (neg:<V_INT_EQUIV>
4747           (FAC_COMPARISONS:<V_INT_EQUIV>
4748             (abs:VHSDF_HSDF
4749               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4750             (abs:VHSDF_HSDF
4751               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4752   )))]
4753   "TARGET_SIMD"
4754   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4755   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4758 ;; addp
4760 (define_insn "aarch64_addp<mode>"
4761   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4762         (unspec:VD_BHSI
4763           [(match_operand:VD_BHSI 1 "register_operand" "w")
4764            (match_operand:VD_BHSI 2 "register_operand" "w")]
4765           UNSPEC_ADDP))]
4766   "TARGET_SIMD"
4767   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4768   [(set_attr "type" "neon_reduc_add<q>")]
4771 (define_insn "aarch64_addpdi"
4772   [(set (match_operand:DI 0 "register_operand" "=w")
4773         (unspec:DI
4774           [(match_operand:V2DI 1 "register_operand" "w")]
4775           UNSPEC_ADDP))]
4776   "TARGET_SIMD"
4777   "addp\t%d0, %1.2d"
4778   [(set_attr "type" "neon_reduc_add")]
4781 ;; sqrt
4783 (define_expand "sqrt<mode>2"
4784   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4785         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4786   "TARGET_SIMD"
4788   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4789     DONE;
4792 (define_insn "*sqrt<mode>2"
4793   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4794         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4795   "TARGET_SIMD"
4796   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4797   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4800 ;; Patterns for vector struct loads and stores.
4802 (define_insn "aarch64_simd_ld2<mode>"
4803   [(set (match_operand:OI 0 "register_operand" "=w")
4804         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4805                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4806                    UNSPEC_LD2))]
4807   "TARGET_SIMD"
4808   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4809   [(set_attr "type" "neon_load2_2reg<q>")]
4812 (define_insn "aarch64_simd_ld2r<mode>"
4813   [(set (match_operand:OI 0 "register_operand" "=w")
4814        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4815                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4816                   UNSPEC_LD2_DUP))]
4817   "TARGET_SIMD"
4818   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4819   [(set_attr "type" "neon_load2_all_lanes<q>")]
4822 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4823   [(set (match_operand:OI 0 "register_operand" "=w")
4824         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4825                     (match_operand:OI 2 "register_operand" "0")
4826                     (match_operand:SI 3 "immediate_operand" "i")
4827                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4828                    UNSPEC_LD2_LANE))]
4829   "TARGET_SIMD"
4830   {
4831     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4832     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4833   }
4834   [(set_attr "type" "neon_load2_one_lane")]
4837 (define_expand "vec_load_lanesoi<mode>"
4838   [(set (match_operand:OI 0 "register_operand" "=w")
4839         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4840                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4841                    UNSPEC_LD2))]
4842   "TARGET_SIMD"
4844   if (BYTES_BIG_ENDIAN)
4845     {
4846       rtx tmp = gen_reg_rtx (OImode);
4847       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4848       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4849       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4850     }
4851   else
4852     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4853   DONE;
4856 (define_insn "aarch64_simd_st2<mode>"
4857   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4858         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4859                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4860                    UNSPEC_ST2))]
4861   "TARGET_SIMD"
4862   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4863   [(set_attr "type" "neon_store2_2reg<q>")]
4866 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4867 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4868   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4869         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4870                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4871                     (match_operand:SI 2 "immediate_operand" "i")]
4872                    UNSPEC_ST2_LANE))]
4873   "TARGET_SIMD"
4874   {
4875     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4876     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4877   }
4878   [(set_attr "type" "neon_store2_one_lane<q>")]
4881 (define_expand "vec_store_lanesoi<mode>"
4882   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4883         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4884                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4885                    UNSPEC_ST2))]
4886   "TARGET_SIMD"
4888   if (BYTES_BIG_ENDIAN)
4889     {
4890       rtx tmp = gen_reg_rtx (OImode);
4891       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4892       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4893       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4894     }
4895   else
4896     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4897   DONE;
4900 (define_insn "aarch64_simd_ld3<mode>"
4901   [(set (match_operand:CI 0 "register_operand" "=w")
4902         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4903                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4904                    UNSPEC_LD3))]
4905   "TARGET_SIMD"
4906   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4907   [(set_attr "type" "neon_load3_3reg<q>")]
4910 (define_insn "aarch64_simd_ld3r<mode>"
4911   [(set (match_operand:CI 0 "register_operand" "=w")
4912        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4913                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4914                   UNSPEC_LD3_DUP))]
4915   "TARGET_SIMD"
4916   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4917   [(set_attr "type" "neon_load3_all_lanes<q>")]
4920 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4921   [(set (match_operand:CI 0 "register_operand" "=w")
4922         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4923                     (match_operand:CI 2 "register_operand" "0")
4924                     (match_operand:SI 3 "immediate_operand" "i")
4925                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926                    UNSPEC_LD3_LANE))]
4927   "TARGET_SIMD"
4929     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4930     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4932   [(set_attr "type" "neon_load3_one_lane")]
4935 (define_expand "vec_load_lanesci<mode>"
4936   [(set (match_operand:CI 0 "register_operand" "=w")
4937         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4938                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4939                    UNSPEC_LD3))]
4940   "TARGET_SIMD"
4942   if (BYTES_BIG_ENDIAN)
4943     {
4944       rtx tmp = gen_reg_rtx (CImode);
4945       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4946       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4947       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4948     }
4949   else
4950     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4951   DONE;
4954 (define_insn "aarch64_simd_st3<mode>"
4955   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4956         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4957                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4958                    UNSPEC_ST3))]
4959   "TARGET_SIMD"
4960   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4961   [(set_attr "type" "neon_store3_3reg<q>")]
4964 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4965 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4966   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4967         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4968                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4969                      (match_operand:SI 2 "immediate_operand" "i")]
4970                     UNSPEC_ST3_LANE))]
4971   "TARGET_SIMD"
4972   {
4973     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4974     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4975   }
4976   [(set_attr "type" "neon_store3_one_lane<q>")]
4979 (define_expand "vec_store_lanesci<mode>"
4980   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4981         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4982                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4983                    UNSPEC_ST3))]
4984   "TARGET_SIMD"
4986   if (BYTES_BIG_ENDIAN)
4987     {
4988       rtx tmp = gen_reg_rtx (CImode);
4989       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4990       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4991       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4992     }
4993   else
4994     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4995   DONE;
4998 (define_insn "aarch64_simd_ld4<mode>"
4999   [(set (match_operand:XI 0 "register_operand" "=w")
5000         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5001                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5002                    UNSPEC_LD4))]
5003   "TARGET_SIMD"
5004   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5005   [(set_attr "type" "neon_load4_4reg<q>")]
5008 (define_insn "aarch64_simd_ld4r<mode>"
5009   [(set (match_operand:XI 0 "register_operand" "=w")
5010        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5011                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5012                   UNSPEC_LD4_DUP))]
5013   "TARGET_SIMD"
5014   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5015   [(set_attr "type" "neon_load4_all_lanes<q>")]
5018 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5019   [(set (match_operand:XI 0 "register_operand" "=w")
5020         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5021                     (match_operand:XI 2 "register_operand" "0")
5022                     (match_operand:SI 3 "immediate_operand" "i")
5023                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5024                    UNSPEC_LD4_LANE))]
5025   "TARGET_SIMD"
5027     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5028     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5030   [(set_attr "type" "neon_load4_one_lane")]
5033 (define_expand "vec_load_lanesxi<mode>"
5034   [(set (match_operand:XI 0 "register_operand" "=w")
5035         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5036                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5037                    UNSPEC_LD4))]
5038   "TARGET_SIMD"
5040   if (BYTES_BIG_ENDIAN)
5041     {
5042       rtx tmp = gen_reg_rtx (XImode);
5043       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5044       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5045       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5046     }
5047   else
5048     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5049   DONE;
5052 (define_insn "aarch64_simd_st4<mode>"
5053   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5054         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5055                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5056                    UNSPEC_ST4))]
5057   "TARGET_SIMD"
5058   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5059   [(set_attr "type" "neon_store4_4reg<q>")]
5062 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5063 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5064   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5065         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5066                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5067                      (match_operand:SI 2 "immediate_operand" "i")]
5068                     UNSPEC_ST4_LANE))]
5069   "TARGET_SIMD"
5070   {
5071     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5072     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5073   }
5074   [(set_attr "type" "neon_store4_one_lane<q>")]
5077 (define_expand "vec_store_lanesxi<mode>"
5078   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5079         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5080                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5081                    UNSPEC_ST4))]
5082   "TARGET_SIMD"
5084   if (BYTES_BIG_ENDIAN)
5085     {
5086       rtx tmp = gen_reg_rtx (XImode);
5087       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5088       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5089       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5090     }
5091   else
5092     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5093   DONE;
5096 (define_insn_and_split "aarch64_rev_reglist<mode>"
5097 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5098         (unspec:VSTRUCT
5099                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5100                     (match_operand:V16QI 2 "register_operand" "w")]
5101                    UNSPEC_REV_REGLIST))]
5102   "TARGET_SIMD"
5103   "#"
5104   "&& reload_completed"
5105   [(const_int 0)]
5107   int i;
5108   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5109   for (i = 0; i < nregs; i++)
5110     {
5111       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5112       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5113       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5114     }
5115   DONE;
5117   [(set_attr "type" "neon_tbl1_q")
5118    (set_attr "length" "<insn_count>")]
5121 ;; Reload patterns for AdvSIMD register list operands.
5123 (define_expand "mov<mode>"
5124   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5125         (match_operand:VSTRUCT 1 "general_operand" ""))]
5126   "TARGET_SIMD"
5128   if (can_create_pseudo_p ())
5129     {
5130       if (GET_CODE (operands[0]) != REG)
5131         operands[1] = force_reg (<MODE>mode, operands[1]);
5132     }
5136 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5137   [(match_operand:CI 0 "register_operand" "=w")
5138    (match_operand:DI 1 "register_operand" "r")
5139    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5140   "TARGET_SIMD"
5142   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5143   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5144   DONE;
5147 (define_insn "aarch64_ld1_x3_<mode>"
5148   [(set (match_operand:CI 0 "register_operand" "=w")
5149         (unspec:CI
5150           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5151            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5152   "TARGET_SIMD"
5153   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5154   [(set_attr "type" "neon_load1_3reg<q>")]
5157 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5158   [(match_operand:DI 0 "register_operand" "")
5159    (match_operand:OI 1 "register_operand" "")
5160    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5161   "TARGET_SIMD"
5163   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5164   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5165   DONE;
5168 (define_insn "aarch64_st1_x2_<mode>"
5169    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5170          (unspec:OI
5171           [(match_operand:OI 1 "register_operand" "w")
5172           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5173   "TARGET_SIMD"
5174   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5175   [(set_attr "type" "neon_store1_2reg<q>")]
5178 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5179   [(match_operand:DI 0 "register_operand" "")
5180    (match_operand:CI 1 "register_operand" "")
5181    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5182   "TARGET_SIMD"
5184   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5185   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5186   DONE;
5189 (define_insn "aarch64_st1_x3_<mode>"
5190    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5191         (unspec:CI
5192          [(match_operand:CI 1 "register_operand" "w")
5193           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5194   "TARGET_SIMD"
5195   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5196   [(set_attr "type" "neon_store1_3reg<q>")]
5199 (define_insn "*aarch64_mov<mode>"
5200   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5201         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5202   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5203    && (register_operand (operands[0], <MODE>mode)
5204        || register_operand (operands[1], <MODE>mode))"
5205   "@
5206    #
5207    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5208    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5209   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5210                      neon_load<nregs>_<nregs>reg_q")
5211    (set_attr "length" "<insn_count>,4,4")]
5214 (define_insn "aarch64_be_ld1<mode>"
5215   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5216         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5217                              "aarch64_simd_struct_operand" "Utv")]
5218         UNSPEC_LD1))]
5219   "TARGET_SIMD"
5220   "ld1\\t{%0<Vmtype>}, %1"
5221   [(set_attr "type" "neon_load1_1reg<q>")]
5224 (define_insn "aarch64_be_st1<mode>"
5225   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5226         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5227         UNSPEC_ST1))]
5228   "TARGET_SIMD"
5229   "st1\\t{%1<Vmtype>}, %0"
5230   [(set_attr "type" "neon_store1_1reg<q>")]
5233 (define_insn "*aarch64_be_movoi"
5234   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5235         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5236   "TARGET_SIMD && BYTES_BIG_ENDIAN
5237    && (register_operand (operands[0], OImode)
5238        || register_operand (operands[1], OImode))"
5239   "@
5240    #
5241    stp\\t%q1, %R1, %0
5242    ldp\\t%q0, %R0, %1"
5243   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5244    (set_attr "length" "8,4,4")]
5247 (define_insn "*aarch64_be_movci"
5248   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5249         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5250   "TARGET_SIMD && BYTES_BIG_ENDIAN
5251    && (register_operand (operands[0], CImode)
5252        || register_operand (operands[1], CImode))"
5253   "#"
5254   [(set_attr "type" "multiple")
5255    (set_attr "length" "12,4,4")]
5258 (define_insn "*aarch64_be_movxi"
5259   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5260         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5261   "TARGET_SIMD && BYTES_BIG_ENDIAN
5262    && (register_operand (operands[0], XImode)
5263        || register_operand (operands[1], XImode))"
5264   "#"
5265   [(set_attr "type" "multiple")
5266    (set_attr "length" "16,4,4")]
5269 (define_split
5270   [(set (match_operand:OI 0 "register_operand")
5271         (match_operand:OI 1 "register_operand"))]
5272   "TARGET_SIMD && reload_completed"
5273   [(const_int 0)]
5275   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5276   DONE;
5279 (define_split
5280   [(set (match_operand:CI 0 "nonimmediate_operand")
5281         (match_operand:CI 1 "general_operand"))]
5282   "TARGET_SIMD && reload_completed"
5283   [(const_int 0)]
5285   if (register_operand (operands[0], CImode)
5286       && register_operand (operands[1], CImode))
5287     {
5288       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5289       DONE;
5290     }
5291   else if (BYTES_BIG_ENDIAN)
5292     {
5293       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5294                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5295       emit_move_insn (gen_lowpart (V16QImode,
5296                                    simplify_gen_subreg (TImode, operands[0],
5297                                                         CImode, 32)),
5298                       gen_lowpart (V16QImode,
5299                                    simplify_gen_subreg (TImode, operands[1],
5300                                                         CImode, 32)));
5301       DONE;
5302     }
5303   else
5304     FAIL;
5307 (define_split
5308   [(set (match_operand:XI 0 "nonimmediate_operand")
5309         (match_operand:XI 1 "general_operand"))]
5310   "TARGET_SIMD && reload_completed"
5311   [(const_int 0)]
5313   if (register_operand (operands[0], XImode)
5314       && register_operand (operands[1], XImode))
5315     {
5316       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5317       DONE;
5318     }
5319   else if (BYTES_BIG_ENDIAN)
5320     {
5321       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5322                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5323       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5324                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5325       DONE;
5326     }
5327   else
5328     FAIL;
5331 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5332   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5333    (match_operand:DI 1 "register_operand" "w")
5334    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5335   "TARGET_SIMD"
5337   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5338   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5339                      * <VSTRUCT:nregs>);
5341   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5342                                                                 mem));
5343   DONE;
5346 (define_insn "aarch64_ld2<mode>_dreg"
5347   [(set (match_operand:OI 0 "register_operand" "=w")
5348         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5349                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5350                    UNSPEC_LD2_DREG))]
5351   "TARGET_SIMD"
5352   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5353   [(set_attr "type" "neon_load2_2reg<q>")]
5356 (define_insn "aarch64_ld2<mode>_dreg"
5357   [(set (match_operand:OI 0 "register_operand" "=w")
5358         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5359                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5360                    UNSPEC_LD2_DREG))]
5361   "TARGET_SIMD"
5362   "ld1\\t{%S0.1d - %T0.1d}, %1"
5363   [(set_attr "type" "neon_load1_2reg<q>")]
5366 (define_insn "aarch64_ld3<mode>_dreg"
5367   [(set (match_operand:CI 0 "register_operand" "=w")
5368         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5369                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5370                    UNSPEC_LD3_DREG))]
5371   "TARGET_SIMD"
5372   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5373   [(set_attr "type" "neon_load3_3reg<q>")]
5376 (define_insn "aarch64_ld3<mode>_dreg"
5377   [(set (match_operand:CI 0 "register_operand" "=w")
5378         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5379                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5380                    UNSPEC_LD3_DREG))]
5381   "TARGET_SIMD"
5382   "ld1\\t{%S0.1d - %U0.1d}, %1"
5383   [(set_attr "type" "neon_load1_3reg<q>")]
5386 (define_insn "aarch64_ld4<mode>_dreg"
5387   [(set (match_operand:XI 0 "register_operand" "=w")
5388         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5389                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5390                    UNSPEC_LD4_DREG))]
5391   "TARGET_SIMD"
5392   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5393   [(set_attr "type" "neon_load4_4reg<q>")]
5396 (define_insn "aarch64_ld4<mode>_dreg"
5397   [(set (match_operand:XI 0 "register_operand" "=w")
5398         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5399                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5400                    UNSPEC_LD4_DREG))]
5401   "TARGET_SIMD"
5402   "ld1\\t{%S0.1d - %V0.1d}, %1"
5403   [(set_attr "type" "neon_load1_4reg<q>")]
5406 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5407  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5408   (match_operand:DI 1 "register_operand" "r")
5409   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5410   "TARGET_SIMD"
5412   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5413   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5415   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5416   DONE;
5419 (define_expand "aarch64_ld1<VALL_F16:mode>"
5420  [(match_operand:VALL_F16 0 "register_operand")
5421   (match_operand:DI 1 "register_operand")]
5422   "TARGET_SIMD"
5424   machine_mode mode = <VALL_F16:MODE>mode;
5425   rtx mem = gen_rtx_MEM (mode, operands[1]);
5427   if (BYTES_BIG_ENDIAN)
5428     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5429   else
5430     emit_move_insn (operands[0], mem);
5431   DONE;
5434 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5435  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5436   (match_operand:DI 1 "register_operand" "r")
5437   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5438   "TARGET_SIMD"
5440   machine_mode mode = <VSTRUCT:MODE>mode;
5441   rtx mem = gen_rtx_MEM (mode, operands[1]);
5443   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5444   DONE;
5447 (define_expand "aarch64_ld1x2<VQ:mode>"
5448  [(match_operand:OI 0 "register_operand" "=w")
5449   (match_operand:DI 1 "register_operand" "r")
5450   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5451   "TARGET_SIMD"
5453   machine_mode mode = OImode;
5454   rtx mem = gen_rtx_MEM (mode, operands[1]);
5456   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5457   DONE;
5460 (define_expand "aarch64_ld1x2<VDC:mode>"
5461  [(match_operand:OI 0 "register_operand" "=w")
5462   (match_operand:DI 1 "register_operand" "r")
5463   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5464   "TARGET_SIMD"
5466   machine_mode mode = OImode;
5467   rtx mem = gen_rtx_MEM (mode, operands[1]);
5469   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5470   DONE;
5474 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5475   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5476         (match_operand:DI 1 "register_operand" "w")
5477         (match_operand:VSTRUCT 2 "register_operand" "0")
5478         (match_operand:SI 3 "immediate_operand" "i")
5479         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5480   "TARGET_SIMD"
5482   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5483   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5484                      * <VSTRUCT:nregs>);
5486   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5487   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5488         operands[0], mem, operands[2], operands[3]));
5489   DONE;
5492 ;; Expanders for builtins to extract vector registers from large
5493 ;; opaque integer modes.
5495 ;; D-register list.
5497 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5498  [(match_operand:VDC 0 "register_operand" "=w")
5499   (match_operand:VSTRUCT 1 "register_operand" "w")
5500   (match_operand:SI 2 "immediate_operand" "i")]
5501   "TARGET_SIMD"
5503   int part = INTVAL (operands[2]);
5504   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5505   int offset = part * 16;
5507   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5508   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5509   DONE;
5512 ;; Q-register list.
5514 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5515  [(match_operand:VQ 0 "register_operand" "=w")
5516   (match_operand:VSTRUCT 1 "register_operand" "w")
5517   (match_operand:SI 2 "immediate_operand" "i")]
5518   "TARGET_SIMD"
5520   int part = INTVAL (operands[2]);
5521   int offset = part * 16;
5523   emit_move_insn (operands[0],
5524                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5525   DONE;
5528 ;; Permuted-store expanders for neon intrinsics.
5530 ;; Permute instructions
5532 ;; vec_perm support
5534 (define_expand "vec_perm<mode>"
5535   [(match_operand:VB 0 "register_operand")
5536    (match_operand:VB 1 "register_operand")
5537    (match_operand:VB 2 "register_operand")
5538    (match_operand:VB 3 "register_operand")]
5539   "TARGET_SIMD"
5541   aarch64_expand_vec_perm (operands[0], operands[1],
5542                            operands[2], operands[3], <nunits>);
5543   DONE;
5546 (define_insn "aarch64_tbl1<mode>"
5547   [(set (match_operand:VB 0 "register_operand" "=w")
5548         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5549                     (match_operand:VB 2 "register_operand" "w")]
5550                    UNSPEC_TBL))]
5551   "TARGET_SIMD"
5552   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5553   [(set_attr "type" "neon_tbl1<q>")]
5556 ;; Two source registers.
5558 (define_insn "aarch64_tbl2v16qi"
5559   [(set (match_operand:V16QI 0 "register_operand" "=w")
5560         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5561                        (match_operand:V16QI 2 "register_operand" "w")]
5562                       UNSPEC_TBL))]
5563   "TARGET_SIMD"
5564   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5565   [(set_attr "type" "neon_tbl2_q")]
5568 (define_insn "aarch64_tbl3<mode>"
5569   [(set (match_operand:VB 0 "register_operand" "=w")
5570         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5571                       (match_operand:VB 2 "register_operand" "w")]
5572                       UNSPEC_TBL))]
5573   "TARGET_SIMD"
5574   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5575   [(set_attr "type" "neon_tbl3")]
5578 (define_insn "aarch64_tbx4<mode>"
5579   [(set (match_operand:VB 0 "register_operand" "=w")
5580         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5581                       (match_operand:OI 2 "register_operand" "w")
5582                       (match_operand:VB 3 "register_operand" "w")]
5583                       UNSPEC_TBX))]
5584   "TARGET_SIMD"
5585   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5586   [(set_attr "type" "neon_tbl4")]
5589 ;; Three source registers.
5591 (define_insn "aarch64_qtbl3<mode>"
5592   [(set (match_operand:VB 0 "register_operand" "=w")
5593         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5594                       (match_operand:VB 2 "register_operand" "w")]
5595                       UNSPEC_TBL))]
5596   "TARGET_SIMD"
5597   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5598   [(set_attr "type" "neon_tbl3")]
5601 (define_insn "aarch64_qtbx3<mode>"
5602   [(set (match_operand:VB 0 "register_operand" "=w")
5603         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5604                       (match_operand:CI 2 "register_operand" "w")
5605                       (match_operand:VB 3 "register_operand" "w")]
5606                       UNSPEC_TBX))]
5607   "TARGET_SIMD"
5608   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5609   [(set_attr "type" "neon_tbl3")]
5612 ;; Four source registers.
5614 (define_insn "aarch64_qtbl4<mode>"
5615   [(set (match_operand:VB 0 "register_operand" "=w")
5616         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5617                       (match_operand:VB 2 "register_operand" "w")]
5618                       UNSPEC_TBL))]
5619   "TARGET_SIMD"
5620   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5621   [(set_attr "type" "neon_tbl4")]
5624 (define_insn "aarch64_qtbx4<mode>"
5625   [(set (match_operand:VB 0 "register_operand" "=w")
5626         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5627                       (match_operand:XI 2 "register_operand" "w")
5628                       (match_operand:VB 3 "register_operand" "w")]
5629                       UNSPEC_TBX))]
5630   "TARGET_SIMD"
5631   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5632   [(set_attr "type" "neon_tbl4")]
5635 (define_insn_and_split "aarch64_combinev16qi"
5636   [(set (match_operand:OI 0 "register_operand" "=w")
5637         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5638                     (match_operand:V16QI 2 "register_operand" "w")]
5639                    UNSPEC_CONCAT))]
5640   "TARGET_SIMD"
5641   "#"
5642   "&& reload_completed"
5643   [(const_int 0)]
5645   aarch64_split_combinev16qi (operands);
5646   DONE;
5648 [(set_attr "type" "multiple")]
5651 ;; This instruction's pattern is generated directly by
5652 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5653 ;; need corresponding changes there.
5654 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5655   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5656         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5657                           (match_operand:VALL_F16 2 "register_operand" "w")]
5658          PERMUTE))]
5659   "TARGET_SIMD"
5660   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5661   [(set_attr "type" "neon_permute<q>")]
5664 ;; This instruction's pattern is generated directly by
5665 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5666 ;; need corresponding changes there.  Note that the immediate (third)
5667 ;; operand is a lane index not a byte index.
5668 (define_insn "aarch64_ext<mode>"
5669   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5670         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5671                           (match_operand:VALL_F16 2 "register_operand" "w")
5672                           (match_operand:SI 3 "immediate_operand" "i")]
5673          UNSPEC_EXT))]
5674   "TARGET_SIMD"
5676   operands[3] = GEN_INT (INTVAL (operands[3])
5677       * GET_MODE_UNIT_SIZE (<MODE>mode));
5678   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5680   [(set_attr "type" "neon_ext<q>")]
5683 ;; This instruction's pattern is generated directly by
5684 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5685 ;; need corresponding changes there.
5686 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5687   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5688         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5689                     REVERSE))]
5690   "TARGET_SIMD"
5691   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5692   [(set_attr "type" "neon_rev<q>")]
5695 (define_insn "aarch64_st2<mode>_dreg"
5696   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5697         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5698                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5699                    UNSPEC_ST2))]
5700   "TARGET_SIMD"
5701   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5702   [(set_attr "type" "neon_store2_2reg")]
5705 (define_insn "aarch64_st2<mode>_dreg"
5706   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5707         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5708                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5709                    UNSPEC_ST2))]
5710   "TARGET_SIMD"
5711   "st1\\t{%S1.1d - %T1.1d}, %0"
5712   [(set_attr "type" "neon_store1_2reg")]
5715 (define_insn "aarch64_st3<mode>_dreg"
5716   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5717         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5718                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5719                    UNSPEC_ST3))]
5720   "TARGET_SIMD"
5721   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5722   [(set_attr "type" "neon_store3_3reg")]
5725 (define_insn "aarch64_st3<mode>_dreg"
5726   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5727         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5728                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5729                    UNSPEC_ST3))]
5730   "TARGET_SIMD"
5731   "st1\\t{%S1.1d - %U1.1d}, %0"
5732   [(set_attr "type" "neon_store1_3reg")]
5735 (define_insn "aarch64_st4<mode>_dreg"
5736   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5737         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5738                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5739                    UNSPEC_ST4))]
5740   "TARGET_SIMD"
5741   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5742   [(set_attr "type" "neon_store4_4reg")]
5745 (define_insn "aarch64_st4<mode>_dreg"
5746   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5747         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5748                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5749                    UNSPEC_ST4))]
5750   "TARGET_SIMD"
5751   "st1\\t{%S1.1d - %V1.1d}, %0"
5752   [(set_attr "type" "neon_store1_4reg")]
5755 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5756  [(match_operand:DI 0 "register_operand" "r")
5757   (match_operand:VSTRUCT 1 "register_operand" "w")
5758   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5759   "TARGET_SIMD"
5761   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5762   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5764   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5765   DONE;
5768 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5769  [(match_operand:DI 0 "register_operand" "r")
5770   (match_operand:VSTRUCT 1 "register_operand" "w")
5771   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5772   "TARGET_SIMD"
5774   machine_mode mode = <VSTRUCT:MODE>mode;
5775   rtx mem = gen_rtx_MEM (mode, operands[0]);
5777   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5778   DONE;
5781 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5782  [(match_operand:DI 0 "register_operand" "r")
5783   (match_operand:VSTRUCT 1 "register_operand" "w")
5784   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5785   (match_operand:SI 2 "immediate_operand")]
5786   "TARGET_SIMD"
5788   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5789   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5790                      * <VSTRUCT:nregs>);
5792   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5793                 mem, operands[1], operands[2]));
5794   DONE;
5797 (define_expand "aarch64_st1<VALL_F16:mode>"
5798  [(match_operand:DI 0 "register_operand")
5799   (match_operand:VALL_F16 1 "register_operand")]
5800   "TARGET_SIMD"
5802   machine_mode mode = <VALL_F16:MODE>mode;
5803   rtx mem = gen_rtx_MEM (mode, operands[0]);
5805   if (BYTES_BIG_ENDIAN)
5806     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5807   else
5808     emit_move_insn (mem, operands[1]);
5809   DONE;
5812 ;; Expander for builtins to insert vector registers into large
5813 ;; opaque integer modes.
5815 ;; Q-register list.  We don't need a D-reg inserter as we zero
5816 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5818 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5819  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5820   (match_operand:VSTRUCT 1 "register_operand" "0")
5821   (match_operand:VQ 2 "register_operand" "w")
5822   (match_operand:SI 3 "immediate_operand" "i")]
5823   "TARGET_SIMD"
5825   int part = INTVAL (operands[3]);
5826   int offset = part * 16;
5828   emit_move_insn (operands[0], operands[1]);
5829   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5830                   operands[2]);
5831   DONE;
5834 ;; Standard pattern name vec_init<mode><Vel>.
5836 (define_expand "vec_init<mode><Vel>"
5837   [(match_operand:VALL_F16 0 "register_operand" "")
5838    (match_operand 1 "" "")]
5839   "TARGET_SIMD"
5841   aarch64_expand_vector_init (operands[0], operands[1]);
5842   DONE;
5845 (define_insn "*aarch64_simd_ld1r<mode>"
5846   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5847         (vec_duplicate:VALL_F16
5848           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5849   "TARGET_SIMD"
5850   "ld1r\\t{%0.<Vtype>}, %1"
5851   [(set_attr "type" "neon_load1_all_lanes")]
5854 (define_insn "aarch64_simd_ld1<mode>_x2"
5855   [(set (match_operand:OI 0 "register_operand" "=w")
5856         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5857                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5858                    UNSPEC_LD1))]
5859   "TARGET_SIMD"
5860   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5861   [(set_attr "type" "neon_load1_2reg<q>")]
5864 (define_insn "aarch64_simd_ld1<mode>_x2"
5865   [(set (match_operand:OI 0 "register_operand" "=w")
5866         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5867                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5868                    UNSPEC_LD1))]
5869   "TARGET_SIMD"
5870   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5871   [(set_attr "type" "neon_load1_2reg<q>")]
5875 (define_insn "@aarch64_frecpe<mode>"
5876   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5877         (unspec:VHSDF_HSDF
5878          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
5879          UNSPEC_FRECPE))]
5880   "TARGET_SIMD"
5881   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5882   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5885 (define_insn "aarch64_frecpx<mode>"
5886   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5887         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5888          UNSPEC_FRECPX))]
5889   "TARGET_SIMD"
5890   "frecpx\t%<s>0, %<s>1"
5891   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
5894 (define_insn "@aarch64_frecps<mode>"
5895   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5896         (unspec:VHSDF_HSDF
5897           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5898           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5899           UNSPEC_FRECPS))]
5900   "TARGET_SIMD"
5901   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5902   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5905 (define_insn "aarch64_urecpe<mode>"
5906   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5907         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5908                 UNSPEC_URECPE))]
5909  "TARGET_SIMD"
5910  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5911   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5913 ;; Standard pattern name vec_extract<mode><Vel>.
5915 (define_expand "vec_extract<mode><Vel>"
5916   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5917    (match_operand:VALL_F16 1 "register_operand" "")
5918    (match_operand:SI 2 "immediate_operand" "")]
5919   "TARGET_SIMD"
5921     emit_insn
5922       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5923     DONE;
5926 ;; aes
5928 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5929   [(set (match_operand:V16QI 0 "register_operand" "=w")
5930         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5931                        (match_operand:V16QI 2 "register_operand" "w")]
5932          CRYPTO_AES))]
5933   "TARGET_SIMD && TARGET_AES"
5934   "aes<aes_op>\\t%0.16b, %2.16b"
5935   [(set_attr "type" "crypto_aese")]
5938 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5939   [(set (match_operand:V16QI 0 "register_operand" "=w")
5940         (unspec:V16QI [(xor:V16QI
5941                         (match_operand:V16QI 1 "register_operand" "%0")
5942                         (match_operand:V16QI 2 "register_operand" "w"))
5943                        (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5944                        CRYPTO_AES))]
5945   "TARGET_SIMD && TARGET_AES"
5946   "aes<aes_op>\\t%0.16b, %2.16b"
5947   [(set_attr "type" "crypto_aese")]
5950 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5951   [(set (match_operand:V16QI 0 "register_operand" "=w")
5952         (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5953         (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5954                    (match_operand:V16QI 2 "register_operand" "w"))]
5955         CRYPTO_AES))]
5956   "TARGET_SIMD && TARGET_AES"
5957   "aes<aes_op>\\t%0.16b, %2.16b"
5958   [(set_attr "type" "crypto_aese")]
5961 ;; When AES/AESMC fusion is enabled we want the register allocation to
5962 ;; look like:
5963 ;;    AESE Vn, _
5964 ;;    AESMC Vn, Vn
5965 ;; So prefer to tie operand 1 to operand 0 when fusing.
5967 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5968   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5969         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5970          CRYPTO_AESMC))]
5971   "TARGET_SIMD && TARGET_AES"
5972   "aes<aesmc_op>\\t%0.16b, %1.16b"
5973   [(set_attr "type" "crypto_aesmc")
5974    (set_attr_alternative "enabled"
5975      [(if_then_else (match_test
5976                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5977                      (const_string "yes" )
5978                      (const_string "no"))
5979       (const_string "yes")])]
5982 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5983 ;; and enforce the register dependency without scheduling or register
5984 ;; allocation messing up the order or introducing moves inbetween.
5985 ;;  Mash the two together during combine.
5987 (define_insn "*aarch64_crypto_aese_fused"
5988   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5989         (unspec:V16QI
5990           [(unspec:V16QI
5991             [(match_operand:V16QI 1 "register_operand" "0")
5992              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5993           ] UNSPEC_AESMC))]
5994   "TARGET_SIMD && TARGET_AES
5995    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5996   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5997   [(set_attr "type" "crypto_aese")
5998    (set_attr "length" "8")]
6001 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6002 ;; and enforce the register dependency without scheduling or register
6003 ;; allocation messing up the order or introducing moves inbetween.
6004 ;;  Mash the two together during combine.
6006 (define_insn "*aarch64_crypto_aesd_fused"
6007   [(set (match_operand:V16QI 0 "register_operand" "=&w")
6008         (unspec:V16QI
6009           [(unspec:V16QI
6010             [(match_operand:V16QI 1 "register_operand" "0")
6011              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6012           ] UNSPEC_AESIMC))]
6013   "TARGET_SIMD && TARGET_AES
6014    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6015   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6016   [(set_attr "type" "crypto_aese")
6017    (set_attr "length" "8")]
6020 ;; sha1
6022 (define_insn "aarch64_crypto_sha1hsi"
6023   [(set (match_operand:SI 0 "register_operand" "=w")
6024         (unspec:SI [(match_operand:SI 1
6025                        "register_operand" "w")]
6026          UNSPEC_SHA1H))]
6027   "TARGET_SIMD && TARGET_SHA2"
6028   "sha1h\\t%s0, %s1"
6029   [(set_attr "type" "crypto_sha1_fast")]
6032 (define_insn "aarch64_crypto_sha1hv4si"
6033   [(set (match_operand:SI 0 "register_operand" "=w")
6034         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6035                      (parallel [(const_int 0)]))]
6036          UNSPEC_SHA1H))]
6037   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6038   "sha1h\\t%s0, %s1"
6039   [(set_attr "type" "crypto_sha1_fast")]
6042 (define_insn "aarch64_be_crypto_sha1hv4si"
6043   [(set (match_operand:SI 0 "register_operand" "=w")
6044         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6045                      (parallel [(const_int 3)]))]
6046          UNSPEC_SHA1H))]
6047   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6048   "sha1h\\t%s0, %s1"
6049   [(set_attr "type" "crypto_sha1_fast")]
6052 (define_insn "aarch64_crypto_sha1su1v4si"
6053   [(set (match_operand:V4SI 0 "register_operand" "=w")
6054         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6055                       (match_operand:V4SI 2 "register_operand" "w")]
6056          UNSPEC_SHA1SU1))]
6057   "TARGET_SIMD && TARGET_SHA2"
6058   "sha1su1\\t%0.4s, %2.4s"
6059   [(set_attr "type" "crypto_sha1_fast")]
6062 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6063   [(set (match_operand:V4SI 0 "register_operand" "=w")
6064         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6065                       (match_operand:SI 2 "register_operand" "w")
6066                       (match_operand:V4SI 3 "register_operand" "w")]
6067          CRYPTO_SHA1))]
6068   "TARGET_SIMD && TARGET_SHA2"
6069   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6070   [(set_attr "type" "crypto_sha1_slow")]
6073 (define_insn "aarch64_crypto_sha1su0v4si"
6074   [(set (match_operand:V4SI 0 "register_operand" "=w")
6075         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6076                       (match_operand:V4SI 2 "register_operand" "w")
6077                       (match_operand:V4SI 3 "register_operand" "w")]
6078          UNSPEC_SHA1SU0))]
6079   "TARGET_SIMD && TARGET_SHA2"
6080   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6081   [(set_attr "type" "crypto_sha1_xor")]
6084 ;; sha256
6086 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6087   [(set (match_operand:V4SI 0 "register_operand" "=w")
6088         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6089                       (match_operand:V4SI 2 "register_operand" "w")
6090                       (match_operand:V4SI 3 "register_operand" "w")]
6091          CRYPTO_SHA256))]
6092   "TARGET_SIMD && TARGET_SHA2"
6093   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6094   [(set_attr "type" "crypto_sha256_slow")]
6097 (define_insn "aarch64_crypto_sha256su0v4si"
6098   [(set (match_operand:V4SI 0 "register_operand" "=w")
6099         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6100                       (match_operand:V4SI 2 "register_operand" "w")]
6101          UNSPEC_SHA256SU0))]
6102   "TARGET_SIMD && TARGET_SHA2"
6103   "sha256su0\\t%0.4s, %2.4s"
6104   [(set_attr "type" "crypto_sha256_fast")]
6107 (define_insn "aarch64_crypto_sha256su1v4si"
6108   [(set (match_operand:V4SI 0 "register_operand" "=w")
6109         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6110                       (match_operand:V4SI 2 "register_operand" "w")
6111                       (match_operand:V4SI 3 "register_operand" "w")]
6112          UNSPEC_SHA256SU1))]
6113   "TARGET_SIMD && TARGET_SHA2"
6114   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6115   [(set_attr "type" "crypto_sha256_slow")]
6118 ;; sha512
6120 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6121   [(set (match_operand:V2DI 0 "register_operand" "=w")
6122         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6123                       (match_operand:V2DI 2 "register_operand" "w")
6124                       (match_operand:V2DI 3 "register_operand" "w")]
6125          CRYPTO_SHA512))]
6126   "TARGET_SIMD && TARGET_SHA3"
6127   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6128   [(set_attr "type" "crypto_sha512")]
6131 (define_insn "aarch64_crypto_sha512su0qv2di"
6132   [(set (match_operand:V2DI 0 "register_operand" "=w")
6133         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6134                       (match_operand:V2DI 2 "register_operand" "w")]
6135          UNSPEC_SHA512SU0))]
6136   "TARGET_SIMD && TARGET_SHA3"
6137   "sha512su0\\t%0.2d, %2.2d"
6138   [(set_attr "type" "crypto_sha512")]
6141 (define_insn "aarch64_crypto_sha512su1qv2di"
6142   [(set (match_operand:V2DI 0 "register_operand" "=w")
6143         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6144                       (match_operand:V2DI 2 "register_operand" "w")
6145                       (match_operand:V2DI 3 "register_operand" "w")]
6146          UNSPEC_SHA512SU1))]
6147   "TARGET_SIMD && TARGET_SHA3"
6148   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6149   [(set_attr "type" "crypto_sha512")]
6152 ;; sha3
6154 (define_insn "eor3q<mode>4"
6155   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6156         (xor:VQ_I
6157          (xor:VQ_I
6158           (match_operand:VQ_I 2 "register_operand" "w")
6159           (match_operand:VQ_I 3 "register_operand" "w"))
6160          (match_operand:VQ_I 1 "register_operand" "w")))]
6161   "TARGET_SIMD && TARGET_SHA3"
6162   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6163   [(set_attr "type" "crypto_sha3")]
6166 (define_insn "aarch64_rax1qv2di"
6167   [(set (match_operand:V2DI 0 "register_operand" "=w")
6168         (xor:V2DI
6169          (rotate:V2DI
6170           (match_operand:V2DI 2 "register_operand" "w")
6171           (const_int 1))
6172          (match_operand:V2DI 1 "register_operand" "w")))]
6173   "TARGET_SIMD && TARGET_SHA3"
6174   "rax1\\t%0.2d, %1.2d, %2.2d"
6175   [(set_attr "type" "crypto_sha3")]
6178 (define_insn "aarch64_xarqv2di"
6179   [(set (match_operand:V2DI 0 "register_operand" "=w")
6180         (rotatert:V2DI
6181          (xor:V2DI
6182           (match_operand:V2DI 1 "register_operand" "%w")
6183           (match_operand:V2DI 2 "register_operand" "w"))
6184          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6185   "TARGET_SIMD && TARGET_SHA3"
6186   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6187   [(set_attr "type" "crypto_sha3")]
6190 (define_insn "bcaxq<mode>4"
6191   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6192         (xor:VQ_I
6193          (and:VQ_I
6194           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6195           (match_operand:VQ_I 2 "register_operand" "w"))
6196          (match_operand:VQ_I 1 "register_operand" "w")))]
6197   "TARGET_SIMD && TARGET_SHA3"
6198   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6199   [(set_attr "type" "crypto_sha3")]
6202 ;; SM3
6204 (define_insn "aarch64_sm3ss1qv4si"
6205   [(set (match_operand:V4SI 0 "register_operand" "=w")
6206         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6207                       (match_operand:V4SI 2 "register_operand" "w")
6208                       (match_operand:V4SI 3 "register_operand" "w")]
6209          UNSPEC_SM3SS1))]
6210   "TARGET_SIMD && TARGET_SM4"
6211   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6212   [(set_attr "type" "crypto_sm3")]
6216 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6217   [(set (match_operand:V4SI 0 "register_operand" "=w")
6218         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6219                       (match_operand:V4SI 2 "register_operand" "w")
6220                       (match_operand:V4SI 3 "register_operand" "w")
6221                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6222          CRYPTO_SM3TT))]
6223   "TARGET_SIMD && TARGET_SM4"
6224   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6225   [(set_attr "type" "crypto_sm3")]
6228 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6229   [(set (match_operand:V4SI 0 "register_operand" "=w")
6230         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6231                       (match_operand:V4SI 2 "register_operand" "w")
6232                       (match_operand:V4SI 3 "register_operand" "w")]
6233          CRYPTO_SM3PART))]
6234   "TARGET_SIMD && TARGET_SM4"
6235   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6236   [(set_attr "type" "crypto_sm3")]
6239 ;; SM4
6241 (define_insn "aarch64_sm4eqv4si"
6242   [(set (match_operand:V4SI 0 "register_operand" "=w")
6243         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6244                       (match_operand:V4SI 2 "register_operand" "w")]
6245          UNSPEC_SM4E))]
6246   "TARGET_SIMD && TARGET_SM4"
6247   "sm4e\\t%0.4s, %2.4s"
6248   [(set_attr "type" "crypto_sm4")]
6251 (define_insn "aarch64_sm4ekeyqv4si"
6252   [(set (match_operand:V4SI 0 "register_operand" "=w")
6253         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6254                       (match_operand:V4SI 2 "register_operand" "w")]
6255          UNSPEC_SM4EKEY))]
6256   "TARGET_SIMD && TARGET_SM4"
6257   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6258   [(set_attr "type" "crypto_sm4")]
6261 ;; fp16fml
6263 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6264   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6265         (unspec:VDQSF
6266          [(match_operand:VDQSF 1 "register_operand" "0")
6267           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6268           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6269          VFMLA16_LOW))]
6270   "TARGET_F16FML"
6272   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6273                                             <nunits> * 2, false);
6274   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6275                                             <nunits> * 2, false);
6277   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6278                                                                 operands[1],
6279                                                                 operands[2],
6280                                                                 operands[3],
6281                                                                 p1, p2));
6282   DONE;
6286 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6287   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6288         (unspec:VDQSF
6289          [(match_operand:VDQSF 1 "register_operand" "0")
6290           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6291           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6292          VFMLA16_HIGH))]
6293   "TARGET_F16FML"
6295   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6296   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6298   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6299                                                                  operands[1],
6300                                                                  operands[2],
6301                                                                  operands[3],
6302                                                                  p1, p2));
6303   DONE;
6306 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6307   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6308         (fma:VDQSF
6309          (float_extend:VDQSF
6310           (vec_select:<VFMLA_SEL_W>
6311            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6312            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6313          (float_extend:VDQSF
6314           (vec_select:<VFMLA_SEL_W>
6315            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6316            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6317          (match_operand:VDQSF 1 "register_operand" "0")))]
6318   "TARGET_F16FML"
6319   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6320   [(set_attr "type" "neon_fp_mul_s")]
6323 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6324   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6325         (fma:VDQSF
6326          (float_extend:VDQSF
6327           (neg:<VFMLA_SEL_W>
6328            (vec_select:<VFMLA_SEL_W>
6329             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6330             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6331          (float_extend:VDQSF
6332           (vec_select:<VFMLA_SEL_W>
6333            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6334            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6335          (match_operand:VDQSF 1 "register_operand" "0")))]
6336   "TARGET_F16FML"
6337   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6338   [(set_attr "type" "neon_fp_mul_s")]
6341 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6342   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6343         (fma:VDQSF
6344          (float_extend:VDQSF
6345           (vec_select:<VFMLA_SEL_W>
6346            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6347            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6348          (float_extend:VDQSF
6349           (vec_select:<VFMLA_SEL_W>
6350            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6351            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6352          (match_operand:VDQSF 1 "register_operand" "0")))]
6353   "TARGET_F16FML"
6354   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6355   [(set_attr "type" "neon_fp_mul_s")]
6358 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6359   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6360         (fma:VDQSF
6361          (float_extend:VDQSF
6362           (neg:<VFMLA_SEL_W>
6363            (vec_select:<VFMLA_SEL_W>
6364             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6365             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6366          (float_extend:VDQSF
6367           (vec_select:<VFMLA_SEL_W>
6368            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6369            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6370          (match_operand:VDQSF 1 "register_operand" "0")))]
6371   "TARGET_F16FML"
6372   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6373   [(set_attr "type" "neon_fp_mul_s")]
6376 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6377   [(set (match_operand:V2SF 0 "register_operand" "")
6378         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6379                            (match_operand:V4HF 2 "register_operand" "")
6380                            (match_operand:V4HF 3 "register_operand" "")
6381                            (match_operand:SI 4 "aarch64_imm2" "")]
6382          VFMLA16_LOW))]
6383   "TARGET_F16FML"
6385     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6386     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6388     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6389                                                             operands[1],
6390                                                             operands[2],
6391                                                             operands[3],
6392                                                             p1, lane));
6393     DONE;
6397 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6398   [(set (match_operand:V2SF 0 "register_operand" "")
6399         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6400                            (match_operand:V4HF 2 "register_operand" "")
6401                            (match_operand:V4HF 3 "register_operand" "")
6402                            (match_operand:SI 4 "aarch64_imm2" "")]
6403          VFMLA16_HIGH))]
6404   "TARGET_F16FML"
6406     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6407     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6409     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6410                                                              operands[1],
6411                                                              operands[2],
6412                                                              operands[3],
6413                                                              p1, lane));
6414     DONE;
6417 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6418   [(set (match_operand:V2SF 0 "register_operand" "=w")
6419         (fma:V2SF
6420          (float_extend:V2SF
6421            (vec_select:V2HF
6422             (match_operand:V4HF 2 "register_operand" "w")
6423             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6424          (float_extend:V2SF
6425            (vec_duplicate:V2HF
6426             (vec_select:HF
6427              (match_operand:V4HF 3 "register_operand" "x")
6428              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6429          (match_operand:V2SF 1 "register_operand" "0")))]
6430   "TARGET_F16FML"
6431   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6432   [(set_attr "type" "neon_fp_mul_s")]
6435 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6436   [(set (match_operand:V2SF 0 "register_operand" "=w")
6437         (fma:V2SF
6438          (float_extend:V2SF
6439           (neg:V2HF
6440            (vec_select:V2HF
6441             (match_operand:V4HF 2 "register_operand" "w")
6442             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6443          (float_extend:V2SF
6444           (vec_duplicate:V2HF
6445            (vec_select:HF
6446             (match_operand:V4HF 3 "register_operand" "x")
6447             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6448          (match_operand:V2SF 1 "register_operand" "0")))]
6449   "TARGET_F16FML"
6450   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6451   [(set_attr "type" "neon_fp_mul_s")]
6454 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6455   [(set (match_operand:V2SF 0 "register_operand" "=w")
6456         (fma:V2SF
6457          (float_extend:V2SF
6458            (vec_select:V2HF
6459             (match_operand:V4HF 2 "register_operand" "w")
6460             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6461          (float_extend:V2SF
6462            (vec_duplicate:V2HF
6463             (vec_select:HF
6464              (match_operand:V4HF 3 "register_operand" "x")
6465              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6466          (match_operand:V2SF 1 "register_operand" "0")))]
6467   "TARGET_F16FML"
6468   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6469   [(set_attr "type" "neon_fp_mul_s")]
6472 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6473   [(set (match_operand:V2SF 0 "register_operand" "=w")
6474         (fma:V2SF
6475          (float_extend:V2SF
6476            (neg:V2HF
6477             (vec_select:V2HF
6478              (match_operand:V4HF 2 "register_operand" "w")
6479              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6480          (float_extend:V2SF
6481            (vec_duplicate:V2HF
6482             (vec_select:HF
6483              (match_operand:V4HF 3 "register_operand" "x")
6484              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6485          (match_operand:V2SF 1 "register_operand" "0")))]
6486   "TARGET_F16FML"
6487   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6488   [(set_attr "type" "neon_fp_mul_s")]
6491 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6492   [(set (match_operand:V4SF 0 "register_operand" "")
6493         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6494                            (match_operand:V8HF 2 "register_operand" "")
6495                            (match_operand:V8HF 3 "register_operand" "")
6496                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6497          VFMLA16_LOW))]
6498   "TARGET_F16FML"
6500     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6501     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6503     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6504                                                               operands[1],
6505                                                               operands[2],
6506                                                               operands[3],
6507                                                               p1, lane));
6508     DONE;
6511 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6512   [(set (match_operand:V4SF 0 "register_operand" "")
6513         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6514                            (match_operand:V8HF 2 "register_operand" "")
6515                            (match_operand:V8HF 3 "register_operand" "")
6516                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6517          VFMLA16_HIGH))]
6518   "TARGET_F16FML"
6520     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6521     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6523     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6524                                                                operands[1],
6525                                                                operands[2],
6526                                                                operands[3],
6527                                                                p1, lane));
6528     DONE;
6531 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6532   [(set (match_operand:V4SF 0 "register_operand" "=w")
6533         (fma:V4SF
6534          (float_extend:V4SF
6535           (vec_select:V4HF
6536             (match_operand:V8HF 2 "register_operand" "w")
6537             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6538          (float_extend:V4SF
6539           (vec_duplicate:V4HF
6540            (vec_select:HF
6541             (match_operand:V8HF 3 "register_operand" "x")
6542             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6543          (match_operand:V4SF 1 "register_operand" "0")))]
6544   "TARGET_F16FML"
6545   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6546   [(set_attr "type" "neon_fp_mul_s")]
6549 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6550   [(set (match_operand:V4SF 0 "register_operand" "=w")
6551         (fma:V4SF
6552           (float_extend:V4SF
6553            (neg:V4HF
6554             (vec_select:V4HF
6555              (match_operand:V8HF 2 "register_operand" "w")
6556              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6557          (float_extend:V4SF
6558           (vec_duplicate:V4HF
6559            (vec_select:HF
6560             (match_operand:V8HF 3 "register_operand" "x")
6561             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6562          (match_operand:V4SF 1 "register_operand" "0")))]
6563   "TARGET_F16FML"
6564   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6565   [(set_attr "type" "neon_fp_mul_s")]
6568 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6569   [(set (match_operand:V4SF 0 "register_operand" "=w")
6570         (fma:V4SF
6571          (float_extend:V4SF
6572           (vec_select:V4HF
6573             (match_operand:V8HF 2 "register_operand" "w")
6574             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6575          (float_extend:V4SF
6576           (vec_duplicate:V4HF
6577            (vec_select:HF
6578             (match_operand:V8HF 3 "register_operand" "x")
6579             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6580          (match_operand:V4SF 1 "register_operand" "0")))]
6581   "TARGET_F16FML"
6582   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6583   [(set_attr "type" "neon_fp_mul_s")]
6586 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6587   [(set (match_operand:V4SF 0 "register_operand" "=w")
6588         (fma:V4SF
6589          (float_extend:V4SF
6590           (neg:V4HF
6591            (vec_select:V4HF
6592             (match_operand:V8HF 2 "register_operand" "w")
6593             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6594          (float_extend:V4SF
6595           (vec_duplicate:V4HF
6596            (vec_select:HF
6597             (match_operand:V8HF 3 "register_operand" "x")
6598             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6599          (match_operand:V4SF 1 "register_operand" "0")))]
6600   "TARGET_F16FML"
6601   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6602   [(set_attr "type" "neon_fp_mul_s")]
6605 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6606   [(set (match_operand:V2SF 0 "register_operand" "")
6607         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6608                       (match_operand:V4HF 2 "register_operand" "")
6609                       (match_operand:V8HF 3 "register_operand" "")
6610                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6611          VFMLA16_LOW))]
6612   "TARGET_F16FML"
6614     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6615     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6617     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6618                                                              operands[1],
6619                                                              operands[2],
6620                                                              operands[3],
6621                                                              p1, lane));
6622     DONE;
6626 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6627   [(set (match_operand:V2SF 0 "register_operand" "")
6628         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6629                       (match_operand:V4HF 2 "register_operand" "")
6630                       (match_operand:V8HF 3 "register_operand" "")
6631                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6632          VFMLA16_HIGH))]
6633   "TARGET_F16FML"
6635     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6636     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6638     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6639                                                               operands[1],
6640                                                               operands[2],
6641                                                               operands[3],
6642                                                               p1, lane));
6643     DONE;
6647 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6648   [(set (match_operand:V2SF 0 "register_operand" "=w")
6649         (fma:V2SF
6650          (float_extend:V2SF
6651            (vec_select:V2HF
6652             (match_operand:V4HF 2 "register_operand" "w")
6653             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6654          (float_extend:V2SF
6655           (vec_duplicate:V2HF
6656            (vec_select:HF
6657             (match_operand:V8HF 3 "register_operand" "x")
6658             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6659          (match_operand:V2SF 1 "register_operand" "0")))]
6660   "TARGET_F16FML"
6661   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6662   [(set_attr "type" "neon_fp_mul_s")]
6665 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6666   [(set (match_operand:V2SF 0 "register_operand" "=w")
6667         (fma:V2SF
6668          (float_extend:V2SF
6669           (neg:V2HF
6670            (vec_select:V2HF
6671             (match_operand:V4HF 2 "register_operand" "w")
6672             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6673          (float_extend:V2SF
6674           (vec_duplicate:V2HF
6675            (vec_select:HF
6676             (match_operand:V8HF 3 "register_operand" "x")
6677             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6678          (match_operand:V2SF 1 "register_operand" "0")))]
6679   "TARGET_F16FML"
6680   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6681   [(set_attr "type" "neon_fp_mul_s")]
6684 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6685   [(set (match_operand:V2SF 0 "register_operand" "=w")
6686         (fma:V2SF
6687          (float_extend:V2SF
6688            (vec_select:V2HF
6689             (match_operand:V4HF 2 "register_operand" "w")
6690             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6691          (float_extend:V2SF
6692           (vec_duplicate:V2HF
6693            (vec_select:HF
6694             (match_operand:V8HF 3 "register_operand" "x")
6695             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6696          (match_operand:V2SF 1 "register_operand" "0")))]
6697   "TARGET_F16FML"
6698   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6699   [(set_attr "type" "neon_fp_mul_s")]
6702 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6703   [(set (match_operand:V2SF 0 "register_operand" "=w")
6704         (fma:V2SF
6705          (float_extend:V2SF
6706           (neg:V2HF
6707            (vec_select:V2HF
6708             (match_operand:V4HF 2 "register_operand" "w")
6709             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6710          (float_extend:V2SF
6711           (vec_duplicate:V2HF
6712            (vec_select:HF
6713             (match_operand:V8HF 3 "register_operand" "x")
6714             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6715          (match_operand:V2SF 1 "register_operand" "0")))]
6716   "TARGET_F16FML"
6717   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6718   [(set_attr "type" "neon_fp_mul_s")]
6721 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6722   [(set (match_operand:V4SF 0 "register_operand" "")
6723         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6724                       (match_operand:V8HF 2 "register_operand" "")
6725                       (match_operand:V4HF 3 "register_operand" "")
6726                       (match_operand:SI 4 "aarch64_imm2" "")]
6727          VFMLA16_LOW))]
6728   "TARGET_F16FML"
6730     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6731     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6733     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6734                                                              operands[1],
6735                                                              operands[2],
6736                                                              operands[3],
6737                                                              p1, lane));
6738     DONE;
6741 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6742   [(set (match_operand:V4SF 0 "register_operand" "")
6743         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6744                       (match_operand:V8HF 2 "register_operand" "")
6745                       (match_operand:V4HF 3 "register_operand" "")
6746                       (match_operand:SI 4 "aarch64_imm2" "")]
6747          VFMLA16_HIGH))]
6748   "TARGET_F16FML"
6750     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6751     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6753     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6754                                                               operands[1],
6755                                                               operands[2],
6756                                                               operands[3],
6757                                                               p1, lane));
6758     DONE;
6761 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6762   [(set (match_operand:V4SF 0 "register_operand" "=w")
6763         (fma:V4SF
6764          (float_extend:V4SF
6765           (vec_select:V4HF
6766            (match_operand:V8HF 2 "register_operand" "w")
6767            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6768          (float_extend:V4SF
6769           (vec_duplicate:V4HF
6770            (vec_select:HF
6771             (match_operand:V4HF 3 "register_operand" "x")
6772             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6773          (match_operand:V4SF 1 "register_operand" "0")))]
6774   "TARGET_F16FML"
6775   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6776   [(set_attr "type" "neon_fp_mul_s")]
6779 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6780   [(set (match_operand:V4SF 0 "register_operand" "=w")
6781         (fma:V4SF
6782          (float_extend:V4SF
6783           (neg:V4HF
6784            (vec_select:V4HF
6785             (match_operand:V8HF 2 "register_operand" "w")
6786             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6787          (float_extend:V4SF
6788           (vec_duplicate:V4HF
6789            (vec_select:HF
6790             (match_operand:V4HF 3 "register_operand" "x")
6791             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6792          (match_operand:V4SF 1 "register_operand" "0")))]
6793   "TARGET_F16FML"
6794   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6795   [(set_attr "type" "neon_fp_mul_s")]
6798 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6799   [(set (match_operand:V4SF 0 "register_operand" "=w")
6800         (fma:V4SF
6801          (float_extend:V4SF
6802           (vec_select:V4HF
6803            (match_operand:V8HF 2 "register_operand" "w")
6804            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6805          (float_extend:V4SF
6806           (vec_duplicate:V4HF
6807            (vec_select:HF
6808             (match_operand:V4HF 3 "register_operand" "x")
6809             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6810          (match_operand:V4SF 1 "register_operand" "0")))]
6811   "TARGET_F16FML"
6812   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6813   [(set_attr "type" "neon_fp_mul_s")]
6816 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6817   [(set (match_operand:V4SF 0 "register_operand" "=w")
6818         (fma:V4SF
6819          (float_extend:V4SF
6820           (neg:V4HF
6821            (vec_select:V4HF
6822             (match_operand:V8HF 2 "register_operand" "w")
6823             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6824          (float_extend:V4SF
6825           (vec_duplicate:V4HF
6826            (vec_select:HF
6827             (match_operand:V4HF 3 "register_operand" "x")
6828             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6829          (match_operand:V4SF 1 "register_operand" "0")))]
6830   "TARGET_F16FML"
6831   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6832   [(set_attr "type" "neon_fp_mul_s")]
6835 ;; pmull
6837 (define_insn "aarch64_crypto_pmulldi"
6838   [(set (match_operand:TI 0 "register_operand" "=w")
6839         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6840                      (match_operand:DI 2 "register_operand" "w")]
6841                     UNSPEC_PMULL))]
6842  "TARGET_SIMD && TARGET_AES"
6843  "pmull\\t%0.1q, %1.1d, %2.1d"
6844   [(set_attr "type" "crypto_pmull")]
6847 (define_insn "aarch64_crypto_pmullv2di"
6848  [(set (match_operand:TI 0 "register_operand" "=w")
6849        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6850                    (match_operand:V2DI 2 "register_operand" "w")]
6851                   UNSPEC_PMULL2))]
6852   "TARGET_SIMD && TARGET_AES"
6853   "pmull2\\t%0.1q, %1.2d, %2.2d"
6854   [(set_attr "type" "crypto_pmull")]