1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
23 (match_operand:VALL_F16MOV 1 "general_operand"))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand")
43 (match_operand:VALL 1 "general_operand"))]
44 "TARGET_SIMD && !STRICT_ALIGNMENT"
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
105 [(set (match_operand:VDMOV 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VDMOV 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
133 [(set (match_operand:VQMOV 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQMOV 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr\t%q0, %1";
146 return "stp\txzr, xzr, %0";
148 return "str\t%q1, %0";
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
205 [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
219 [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
232 [(set_attr "type" "neon_stp_q")]
237 [(set (match_operand:VQMOV 0 "register_operand" "")
238 (match_operand:VQMOV 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
249 [(set (match_operand:VQMOV 0 "register_operand" "")
250 (match_operand:VQMOV 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
256 aarch64_split_simd_move (operands[0], operands[1]);
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQMOV 0)
262 (match_operand:VQMOV 1))]
265 rtx dst = operands[0];
266 rtx src = operands[1];
268 if (GP_REGNUM_P (REGNO (src)))
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
286 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
292 (define_expand "aarch64_get_half<mode>"
293 [(set (match_operand:<VHALF> 0 "register_operand")
295 (match_operand:VQMOV 1 "register_operand")
296 (match_operand 2 "ascending_int_parallel")))]
300 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
301 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
303 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
304 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
309 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
310 [(set (match_dup 0) (match_dup 1))]
312 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
314 [(set_attr "type" "mov_reg,neon_to_gp<q>")
315 (set_attr "length" "4")]
318 (define_insn "aarch64_simd_mov_from_<mode>high"
319 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
321 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
322 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
327 [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
328 (set_attr "length" "4")]
331 (define_insn "orn<mode>3"
332 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
333 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
334 (match_operand:VDQ_I 2 "register_operand" "w")))]
336 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
337 [(set_attr "type" "neon_logic<q>")]
340 (define_insn "bic<mode>3"
341 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
342 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
343 (match_operand:VDQ_I 2 "register_operand" "w")))]
345 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
346 [(set_attr "type" "neon_logic<q>")]
349 (define_insn "add<mode>3"
350 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
351 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
352 (match_operand:VDQ_I 2 "register_operand" "w")))]
354 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
355 [(set_attr "type" "neon_add<q>")]
358 (define_insn "sub<mode>3"
359 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
360 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
361 (match_operand:VDQ_I 2 "register_operand" "w")))]
363 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
364 [(set_attr "type" "neon_sub<q>")]
367 (define_insn "mul<mode>3"
368 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
369 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
370 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
372 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
373 [(set_attr "type" "neon_mul_<Vetype><q>")]
376 (define_insn "bswap<mode>2"
377 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
378 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
380 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
381 [(set_attr "type" "neon_rev<q>")]
384 (define_insn "aarch64_rbit<mode>"
385 [(set (match_operand:VB 0 "register_operand" "=w")
386 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
389 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
390 [(set_attr "type" "neon_rbit")]
393 (define_expand "ctz<mode>2"
394 [(set (match_operand:VS 0 "register_operand")
395 (ctz:VS (match_operand:VS 1 "register_operand")))]
398 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
399 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
401 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
402 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
407 (define_expand "xorsign<mode>3"
408 [(match_operand:VHSDF 0 "register_operand")
409 (match_operand:VHSDF 1 "register_operand")
410 (match_operand:VHSDF 2 "register_operand")]
414 machine_mode imode = <V_INT_EQUIV>mode;
415 rtx v_bitmask = gen_reg_rtx (imode);
416 rtx op1x = gen_reg_rtx (imode);
417 rtx op2x = gen_reg_rtx (imode);
419 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
420 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
422 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
424 emit_move_insn (v_bitmask,
425 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
426 HOST_WIDE_INT_M1U << bits));
428 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
429 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
430 emit_move_insn (operands[0],
431 lowpart_subreg (<MODE>mode, op1x, imode));
436 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
437 ;; fact that their usage need to guarantee that the source vectors are
438 ;; contiguous. It would be wrong to describe the operation without being able
439 ;; to describe the permute that is also required, but even if that is done
440 ;; the permute would have been created as a LOAD_LANES which means the values
441 ;; in the registers are in the wrong order.
442 (define_insn "aarch64_fcadd<rot><mode>"
443 [(set (match_operand:VHSDF 0 "register_operand" "=w")
444 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
445 (match_operand:VHSDF 2 "register_operand" "w")]
448 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
449 [(set_attr "type" "neon_fcadd")]
452 (define_insn "aarch64_fcmla<rot><mode>"
453 [(set (match_operand:VHSDF 0 "register_operand" "=w")
454 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
455 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
456 (match_operand:VHSDF 3 "register_operand" "w")]
459 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
460 [(set_attr "type" "neon_fcmla")]
464 (define_insn "aarch64_fcmla_lane<rot><mode>"
465 [(set (match_operand:VHSDF 0 "register_operand" "=w")
466 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
467 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
468 (match_operand:VHSDF 3 "register_operand" "w")
469 (match_operand:SI 4 "const_int_operand" "n")]
473 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
474 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
476 [(set_attr "type" "neon_fcmla")]
479 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
480 [(set (match_operand:V4HF 0 "register_operand" "=w")
481 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
482 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
483 (match_operand:V8HF 3 "register_operand" "w")
484 (match_operand:SI 4 "const_int_operand" "n")]
488 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
489 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
491 [(set_attr "type" "neon_fcmla")]
494 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
495 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
496 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
497 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
498 (match_operand:<VHALF> 3 "register_operand" "w")
499 (match_operand:SI 4 "const_int_operand" "n")]
503 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
505 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
506 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
508 [(set_attr "type" "neon_fcmla")]
511 ;; These instructions map to the __builtins for the Dot Product operations.
512 (define_insn "aarch64_<sur>dot<vsi2qi>"
513 [(set (match_operand:VS 0 "register_operand" "=w")
514 (plus:VS (match_operand:VS 1 "register_operand" "0")
515 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
516 (match_operand:<VSI2QI> 3 "register_operand" "w")]
519 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
520 [(set_attr "type" "neon_dot<q>")]
523 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot
524 ;; (vector) Dot Product operation.
525 (define_insn "aarch64_usdot<vsi2qi>"
526 [(set (match_operand:VS 0 "register_operand" "=w")
528 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
529 (match_operand:<VSI2QI> 3 "register_operand" "w")]
531 (match_operand:VS 1 "register_operand" "0")))]
533 "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
534 [(set_attr "type" "neon_dot<q>")]
537 ;; These expands map to the Dot Product optab the vectorizer checks for.
538 ;; The auto-vectorizer expects a dot product builtin that also does an
539 ;; accumulation into the provided register.
540 ;; Given the following pattern
542 ;; for (i=0; i<len; i++) {
548 ;; This can be auto-vectorized to
549 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
551 ;; given enough iterations. However the vectorizer can keep unrolling the loop
552 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
553 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
556 ;; and so the vectorizer provides r, in which the result has to be accumulated.
557 (define_expand "<sur>dot_prod<vsi2qi>"
558 [(set (match_operand:VS 0 "register_operand")
559 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
560 (match_operand:<VSI2QI> 2 "register_operand")]
562 (match_operand:VS 3 "register_operand")))]
566 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
568 emit_insn (gen_rtx_SET (operands[0], operands[3]));
572 ;; These instructions map to the __builtins for the Dot Product
573 ;; indexed operations.
574 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
575 [(set (match_operand:VS 0 "register_operand" "=w")
576 (plus:VS (match_operand:VS 1 "register_operand" "0")
577 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
578 (match_operand:V8QI 3 "register_operand" "<h_con>")
579 (match_operand:SI 4 "immediate_operand" "i")]
583 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
584 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
586 [(set_attr "type" "neon_dot<q>")]
589 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
590 [(set (match_operand:VS 0 "register_operand" "=w")
591 (plus:VS (match_operand:VS 1 "register_operand" "0")
592 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
593 (match_operand:V16QI 3 "register_operand" "<h_con>")
594 (match_operand:SI 4 "immediate_operand" "i")]
598 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
599 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
601 [(set_attr "type" "neon_dot<q>")]
604 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
605 ;; (by element) Dot Product operations.
606 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
607 [(set (match_operand:VS 0 "register_operand" "=w")
609 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
610 (match_operand:VB 3 "register_operand" "w")
611 (match_operand:SI 4 "immediate_operand" "i")]
613 (match_operand:VS 1 "register_operand" "0")))]
616 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
617 int lane = INTVAL (operands[4]);
618 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
619 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
621 [(set_attr "type" "neon_dot<VS:q>")]
624 (define_expand "copysign<mode>3"
625 [(match_operand:VHSDF 0 "register_operand")
626 (match_operand:VHSDF 1 "register_operand")
627 (match_operand:VHSDF 2 "register_operand")]
628 "TARGET_FLOAT && TARGET_SIMD"
630 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
631 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
633 emit_move_insn (v_bitmask,
634 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
635 HOST_WIDE_INT_M1U << bits));
636 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
637 operands[2], operands[1]));
642 (define_insn "*aarch64_mul3_elt<mode>"
643 [(set (match_operand:VMUL 0 "register_operand" "=w")
647 (match_operand:VMUL 1 "register_operand" "<h_con>")
648 (parallel [(match_operand:SI 2 "immediate_operand")])))
649 (match_operand:VMUL 3 "register_operand" "w")))]
652 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
653 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
655 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
658 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
659 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
660 (mult:VMUL_CHANGE_NLANES
661 (vec_duplicate:VMUL_CHANGE_NLANES
663 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
664 (parallel [(match_operand:SI 2 "immediate_operand")])))
665 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
668 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
669 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
671 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
674 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
675 [(set (match_operand:VMUL 0 "register_operand" "=w")
678 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
679 (match_operand:VMUL 2 "register_operand" "w")))]
681 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
682 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
685 (define_insn "@aarch64_rsqrte<mode>"
686 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
687 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
690 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
691 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
693 (define_insn "@aarch64_rsqrts<mode>"
694 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
695 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
696 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
699 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
700 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
702 (define_expand "rsqrt<mode>2"
703 [(set (match_operand:VALLF 0 "register_operand")
704 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
708 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
712 (define_insn "*aarch64_mul3_elt_to_64v2df"
713 [(set (match_operand:DF 0 "register_operand" "=w")
716 (match_operand:V2DF 1 "register_operand" "w")
717 (parallel [(match_operand:SI 2 "immediate_operand")]))
718 (match_operand:DF 3 "register_operand" "w")))]
721 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
722 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
724 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
727 (define_insn "neg<mode>2"
728 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
729 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
731 "neg\t%0.<Vtype>, %1.<Vtype>"
732 [(set_attr "type" "neon_neg<q>")]
735 (define_insn "abs<mode>2"
736 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
737 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
739 "abs\t%0.<Vtype>, %1.<Vtype>"
740 [(set_attr "type" "neon_abs<q>")]
743 ;; The intrinsic version of integer ABS must not be allowed to
744 ;; combine with any operation with an integerated ABS step, such
746 (define_insn "aarch64_abs<mode>"
747 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
749 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
752 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
753 [(set_attr "type" "neon_abs<q>")]
756 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
757 ;; This isn't accurate as ABS treats always its input as a signed value.
758 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
759 ;; Whereas SABD would return 192 (-64 signed) on the above example.
760 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
761 (define_insn "aarch64_<su>abd<mode>_3"
762 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
765 (match_operand:VDQ_BHSI 1 "register_operand" "w")
766 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
771 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
772 [(set_attr "type" "neon_abd<q>")]
775 (define_insn "aarch64_<sur>abdl2<mode>_3"
776 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
777 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
778 (match_operand:VDQV_S 2 "register_operand" "w")]
781 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
782 [(set_attr "type" "neon_abd<q>")]
785 (define_insn "aarch64_<sur>abal<mode>_4"
786 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
787 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
788 (match_operand:VDQV_S 2 "register_operand" "w")
789 (match_operand:<VDBLW> 3 "register_operand" "0")]
792 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
793 [(set_attr "type" "neon_arith_acc<q>")]
796 (define_insn "aarch64_<sur>adalp<mode>_3"
797 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
798 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
799 (match_operand:<VDBLW> 2 "register_operand" "0")]
802 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
803 [(set_attr "type" "neon_reduc_add<q>")]
806 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
807 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
808 ;; reduction of the difference into a V4SI vector and accumulate that into
809 ;; operand 3 before copying that into the result operand 0.
810 ;; Perform that with a sequence of:
811 ;; UABDL2 tmp.8h, op1.16b, op2.16b
812 ;; UABAL tmp.8h, op1.16b, op2.16b
813 ;; UADALP op3.4s, tmp.8h
814 ;; MOV op0, op3 // should be eliminated in later passes.
816 ;; For TARGET_DOTPROD we do:
817 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
818 ;; UABD tmp2.16b, op1.16b, op2.16b
819 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
820 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
822 ;; The signed version just uses the signed variants of the above instructions
823 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
826 (define_expand "<sur>sadv16qi"
827 [(use (match_operand:V4SI 0 "register_operand"))
828 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
829 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
830 (use (match_operand:V4SI 3 "register_operand"))]
835 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
836 rtx abd = gen_reg_rtx (V16QImode);
837 emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
838 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
842 rtx reduc = gen_reg_rtx (V8HImode);
843 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
845 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
846 operands[2], reduc));
847 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
849 emit_move_insn (operands[0], operands[3]);
854 (define_insn "aba<mode>_3"
855 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
856 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
857 (match_operand:VDQ_BHSI 1 "register_operand" "w")
858 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
859 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
861 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
862 [(set_attr "type" "neon_arith_acc<q>")]
865 (define_insn "fabd<mode>3"
866 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
869 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
870 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
872 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
873 [(set_attr "type" "neon_fp_abd_<stype><q>")]
876 ;; For AND (vector, register) and BIC (vector, immediate)
877 (define_insn "and<mode>3"
878 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
879 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
880 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
883 switch (which_alternative)
886 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
888 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
894 [(set_attr "type" "neon_logic<q>")]
897 ;; For ORR (vector, register) and ORR (vector, immediate)
898 (define_insn "ior<mode>3"
899 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
900 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
901 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
904 switch (which_alternative)
907 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
909 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
915 [(set_attr "type" "neon_logic<q>")]
918 (define_insn "xor<mode>3"
919 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
920 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
921 (match_operand:VDQ_I 2 "register_operand" "w")))]
923 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
924 [(set_attr "type" "neon_logic<q>")]
927 (define_insn "one_cmpl<mode>2"
928 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
929 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
931 "not\t%0.<Vbtype>, %1.<Vbtype>"
932 [(set_attr "type" "neon_logic<q>")]
935 (define_insn "aarch64_simd_vec_set<mode>"
936 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
938 (vec_duplicate:VALL_F16
939 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
940 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
941 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
944 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
945 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
946 switch (which_alternative)
949 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
951 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
953 return "ld1\\t{%0.<Vetype>}[%p2], %1";
958 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
961 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
962 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
964 (vec_duplicate:VALL_F16
966 (match_operand:VALL_F16 3 "register_operand" "w")
968 [(match_operand:SI 4 "immediate_operand" "i")])))
969 (match_operand:VALL_F16 1 "register_operand" "0")
970 (match_operand:SI 2 "immediate_operand" "i")))]
973 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
974 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
975 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
977 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
979 [(set_attr "type" "neon_ins<q>")]
982 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
983 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
984 (vec_merge:VALL_F16_NO_V2Q
985 (vec_duplicate:VALL_F16_NO_V2Q
987 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
989 [(match_operand:SI 4 "immediate_operand" "i")])))
990 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
991 (match_operand:SI 2 "immediate_operand" "i")))]
994 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
995 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
996 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
997 INTVAL (operands[4]));
999 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1001 [(set_attr "type" "neon_ins<q>")]
1004 (define_expand "signbit<mode>2"
1005 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1006 (use (match_operand:VDQSF 1 "register_operand"))]
1009 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1010 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1012 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1014 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1019 (define_insn "aarch64_simd_lshr<mode>"
1020 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1021 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1022 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1024 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1025 [(set_attr "type" "neon_shift_imm<q>")]
1028 (define_insn "aarch64_simd_ashr<mode>"
1029 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1030 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1031 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1033 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1034 [(set_attr "type" "neon_shift_imm<q>")]
1037 (define_insn "*aarch64_simd_sra<mode>"
1038 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1041 (match_operand:VDQ_I 1 "register_operand" "w")
1042 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1043 (match_operand:VDQ_I 3 "register_operand" "0")))]
1045 "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1046 [(set_attr "type" "neon_shift_acc<q>")]
1049 (define_insn "aarch64_simd_imm_shl<mode>"
1050 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1051 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1052 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
1054 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1055 [(set_attr "type" "neon_shift_imm<q>")]
1058 (define_insn "aarch64_simd_reg_sshl<mode>"
1059 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1060 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1061 (match_operand:VDQ_I 2 "register_operand" "w")))]
1063 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1064 [(set_attr "type" "neon_shift_reg<q>")]
1067 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1068 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1069 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1070 (match_operand:VDQ_I 2 "register_operand" "w")]
1071 UNSPEC_ASHIFT_UNSIGNED))]
1073 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1074 [(set_attr "type" "neon_shift_reg<q>")]
1077 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1078 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1079 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1080 (match_operand:VDQ_I 2 "register_operand" "w")]
1081 UNSPEC_ASHIFT_SIGNED))]
1083 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1084 [(set_attr "type" "neon_shift_reg<q>")]
1087 (define_expand "ashl<mode>3"
1088 [(match_operand:VDQ_I 0 "register_operand")
1089 (match_operand:VDQ_I 1 "register_operand")
1090 (match_operand:SI 2 "general_operand")]
1093 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1096 if (CONST_INT_P (operands[2]))
1098 shift_amount = INTVAL (operands[2]);
1099 if (shift_amount >= 0 && shift_amount < bit_width)
1101 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1103 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1110 operands[2] = force_reg (SImode, operands[2]);
1113 else if (MEM_P (operands[2]))
1115 operands[2] = force_reg (SImode, operands[2]);
1118 if (REG_P (operands[2]))
1120 rtx tmp = gen_reg_rtx (<MODE>mode);
1121 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1122 convert_to_mode (<VEL>mode,
1125 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1134 (define_expand "lshr<mode>3"
1135 [(match_operand:VDQ_I 0 "register_operand")
1136 (match_operand:VDQ_I 1 "register_operand")
1137 (match_operand:SI 2 "general_operand")]
1140 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1143 if (CONST_INT_P (operands[2]))
1145 shift_amount = INTVAL (operands[2]);
1146 if (shift_amount > 0 && shift_amount <= bit_width)
1148 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1150 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1156 operands[2] = force_reg (SImode, operands[2]);
1158 else if (MEM_P (operands[2]))
1160 operands[2] = force_reg (SImode, operands[2]);
1163 if (REG_P (operands[2]))
1165 rtx tmp = gen_reg_rtx (SImode);
1166 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1167 emit_insn (gen_negsi2 (tmp, operands[2]));
1168 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1169 convert_to_mode (<VEL>mode,
1171 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1181 (define_expand "ashr<mode>3"
1182 [(match_operand:VDQ_I 0 "register_operand")
1183 (match_operand:VDQ_I 1 "register_operand")
1184 (match_operand:SI 2 "general_operand")]
1187 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1190 if (CONST_INT_P (operands[2]))
1192 shift_amount = INTVAL (operands[2]);
1193 if (shift_amount > 0 && shift_amount <= bit_width)
1195 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1197 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1203 operands[2] = force_reg (SImode, operands[2]);
1205 else if (MEM_P (operands[2]))
1207 operands[2] = force_reg (SImode, operands[2]);
1210 if (REG_P (operands[2]))
1212 rtx tmp = gen_reg_rtx (SImode);
1213 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1214 emit_insn (gen_negsi2 (tmp, operands[2]));
1215 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1216 convert_to_mode (<VEL>mode,
1218 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1228 (define_expand "vashl<mode>3"
1229 [(match_operand:VDQ_I 0 "register_operand")
1230 (match_operand:VDQ_I 1 "register_operand")
1231 (match_operand:VDQ_I 2 "register_operand")]
1234 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1239 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1240 ;; Negating individual lanes most certainly offsets the
1241 ;; gain from vectorization.
1242 (define_expand "vashr<mode>3"
1243 [(match_operand:VDQ_BHSI 0 "register_operand")
1244 (match_operand:VDQ_BHSI 1 "register_operand")
1245 (match_operand:VDQ_BHSI 2 "register_operand")]
1248 rtx neg = gen_reg_rtx (<MODE>mode);
1249 emit (gen_neg<mode>2 (neg, operands[2]));
1250 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1256 (define_expand "aarch64_ashr_simddi"
1257 [(match_operand:DI 0 "register_operand")
1258 (match_operand:DI 1 "register_operand")
1259 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1262 /* An arithmetic shift right by 64 fills the result with copies of the sign
1263 bit, just like asr by 63 - however the standard pattern does not handle
1265 if (INTVAL (operands[2]) == 64)
1266 operands[2] = GEN_INT (63);
1267 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1272 (define_expand "vlshr<mode>3"
1273 [(match_operand:VDQ_BHSI 0 "register_operand")
1274 (match_operand:VDQ_BHSI 1 "register_operand")
1275 (match_operand:VDQ_BHSI 2 "register_operand")]
1278 rtx neg = gen_reg_rtx (<MODE>mode);
1279 emit (gen_neg<mode>2 (neg, operands[2]));
1280 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1285 (define_expand "aarch64_lshr_simddi"
1286 [(match_operand:DI 0 "register_operand")
1287 (match_operand:DI 1 "register_operand")
1288 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1291 if (INTVAL (operands[2]) == 64)
1292 emit_move_insn (operands[0], const0_rtx);
1294 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1299 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1300 (define_insn "vec_shr_<mode>"
1301 [(set (match_operand:VD 0 "register_operand" "=w")
1302 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1303 (match_operand:SI 2 "immediate_operand" "i")]
1307 if (BYTES_BIG_ENDIAN)
1308 return "shl %d0, %d1, %2";
1310 return "ushr %d0, %d1, %2";
1312 [(set_attr "type" "neon_shift_imm")]
1315 (define_expand "vec_set<mode>"
1316 [(match_operand:VALL_F16 0 "register_operand")
1317 (match_operand:<VEL> 1 "register_operand")
1318 (match_operand:SI 2 "immediate_operand")]
1321 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1322 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1323 GEN_INT (elem), operands[0]));
1329 (define_insn "aarch64_mla<mode>"
1330 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1331 (plus:VDQ_BHSI (mult:VDQ_BHSI
1332 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1333 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1334 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1336 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1337 [(set_attr "type" "neon_mla_<Vetype><q>")]
1340 (define_insn "*aarch64_mla_elt<mode>"
1341 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1344 (vec_duplicate:VDQHS
1346 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1347 (parallel [(match_operand:SI 2 "immediate_operand")])))
1348 (match_operand:VDQHS 3 "register_operand" "w"))
1349 (match_operand:VDQHS 4 "register_operand" "0")))]
1352 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1353 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1355 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1358 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1359 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1362 (vec_duplicate:VDQHS
1364 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1365 (parallel [(match_operand:SI 2 "immediate_operand")])))
1366 (match_operand:VDQHS 3 "register_operand" "w"))
1367 (match_operand:VDQHS 4 "register_operand" "0")))]
1370 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1371 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1373 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1376 (define_insn "*aarch64_mla_elt_merge<mode>"
1377 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1379 (mult:VDQHS (vec_duplicate:VDQHS
1380 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1381 (match_operand:VDQHS 2 "register_operand" "w"))
1382 (match_operand:VDQHS 3 "register_operand" "0")))]
1384 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1385 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1388 (define_insn "aarch64_mls<mode>"
1389 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1390 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1391 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1392 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1394 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1395 [(set_attr "type" "neon_mla_<Vetype><q>")]
1398 (define_insn "*aarch64_mls_elt<mode>"
1399 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1401 (match_operand:VDQHS 4 "register_operand" "0")
1403 (vec_duplicate:VDQHS
1405 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1406 (parallel [(match_operand:SI 2 "immediate_operand")])))
1407 (match_operand:VDQHS 3 "register_operand" "w"))))]
1410 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1411 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1413 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1416 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1417 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1419 (match_operand:VDQHS 4 "register_operand" "0")
1421 (vec_duplicate:VDQHS
1423 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1424 (parallel [(match_operand:SI 2 "immediate_operand")])))
1425 (match_operand:VDQHS 3 "register_operand" "w"))))]
1428 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1429 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1431 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1434 (define_insn "*aarch64_mls_elt_merge<mode>"
1435 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1437 (match_operand:VDQHS 1 "register_operand" "0")
1438 (mult:VDQHS (vec_duplicate:VDQHS
1439 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1440 (match_operand:VDQHS 3 "register_operand" "w"))))]
1442 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1443 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1446 ;; Max/Min operations.
1447 (define_insn "<su><maxmin><mode>3"
1448 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1449 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1450 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1452 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1453 [(set_attr "type" "neon_minmax<q>")]
1456 (define_expand "<su><maxmin>v2di3"
1457 [(set (match_operand:V2DI 0 "register_operand")
1458 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1459 (match_operand:V2DI 2 "register_operand")))]
1462 enum rtx_code cmp_operator;
1483 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1484 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1485 operands[2], cmp_fmt, operands[1], operands[2]));
1489 ;; Pairwise Integer Max/Min operations.
1490 (define_insn "aarch64_<maxmin_uns>p<mode>"
1491 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1492 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1493 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1496 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1497 [(set_attr "type" "neon_minmax<q>")]
1500 ;; Pairwise FP Max/Min operations.
1501 (define_insn "aarch64_<maxmin_uns>p<mode>"
1502 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1503 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1504 (match_operand:VHSDF 2 "register_operand" "w")]
1507 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1508 [(set_attr "type" "neon_minmax<q>")]
1511 ;; vec_concat gives a new vector with the low elements from operand 1, and
1512 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1513 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1514 ;; What that means, is that the RTL descriptions of the below patterns
1515 ;; need to change depending on endianness.
1517 ;; Move to the low architectural bits of the register.
1518 ;; On little-endian this is { operand, zeroes }
1519 ;; On big-endian this is { zeroes, operand }
1521 (define_insn "move_lo_quad_internal_<mode>"
1522 [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1523 (vec_concat:VQMOV_NO2E
1524 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1525 (vec_duplicate:<VHALF> (const_int 0))))]
1526 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1531 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1532 (set_attr "length" "4")
1533 (set_attr "arch" "simd,fp,simd")]
1536 (define_insn "move_lo_quad_internal_<mode>"
1537 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1539 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1541 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1546 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1547 (set_attr "length" "4")
1548 (set_attr "arch" "simd,fp,simd")]
1551 (define_insn "move_lo_quad_internal_be_<mode>"
1552 [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w")
1553 (vec_concat:VQMOV_NO2E
1554 (vec_duplicate:<VHALF> (const_int 0))
1555 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1556 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1561 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1562 (set_attr "length" "4")
1563 (set_attr "arch" "simd,fp,simd")]
1566 (define_insn "move_lo_quad_internal_be_<mode>"
1567 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1570 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1571 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1576 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1577 (set_attr "length" "4")
1578 (set_attr "arch" "simd,fp,simd")]
1581 (define_expand "move_lo_quad_<mode>"
1582 [(match_operand:VQMOV 0 "register_operand")
1583 (match_operand:VQMOV 1 "register_operand")]
1586 if (BYTES_BIG_ENDIAN)
1587 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1589 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1594 ;; Move operand1 to the high architectural bits of the register, keeping
1595 ;; the low architectural bits of operand2.
1596 ;; For little-endian this is { operand2, operand1 }
1597 ;; For big-endian this is { operand1, operand2 }
1599 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1600 [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1604 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1605 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1606 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1608 ins\\t%0.d[1], %1.d[0]
1610 [(set_attr "type" "neon_ins")]
1613 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1614 [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1616 (match_operand:<VHALF> 1 "register_operand" "w,r")
1619 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1620 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1622 ins\\t%0.d[1], %1.d[0]
1624 [(set_attr "type" "neon_ins")]
1627 (define_expand "move_hi_quad_<mode>"
1628 [(match_operand:VQMOV 0 "register_operand")
1629 (match_operand:<VHALF> 1 "register_operand")]
1632 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1633 if (BYTES_BIG_ENDIAN)
1634 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1637 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1642 ;; Narrowing operations.
1645 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1646 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1647 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1649 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1650 [(set_attr "type" "neon_shift_imm_narrow_q")]
1653 (define_expand "vec_pack_trunc_<mode>"
1654 [(match_operand:<VNARROWD> 0 "register_operand")
1655 (match_operand:VDN 1 "register_operand")
1656 (match_operand:VDN 2 "register_operand")]
1659 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1660 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1661 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1663 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1664 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1665 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1671 (define_insn "vec_pack_trunc_<mode>"
1672 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1673 (vec_concat:<VNARROWQ2>
1674 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1675 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1678 if (BYTES_BIG_ENDIAN)
1679 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1681 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1683 [(set_attr "type" "multiple")
1684 (set_attr "length" "8")]
1687 ;; Widening operations.
1689 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1690 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1691 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1692 (match_operand:VQW 1 "register_operand" "w")
1693 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1696 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1697 [(set_attr "type" "neon_shift_imm_long")]
1700 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1701 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1702 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1703 (match_operand:VQW 1 "register_operand" "w")
1704 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1707 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1708 [(set_attr "type" "neon_shift_imm_long")]
1711 (define_expand "vec_unpack<su>_hi_<mode>"
1712 [(match_operand:<VWIDE> 0 "register_operand")
1713 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1716 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1717 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1723 (define_expand "vec_unpack<su>_lo_<mode>"
1724 [(match_operand:<VWIDE> 0 "register_operand")
1725 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1728 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1729 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1735 ;; Widening arithmetic.
1737 (define_insn "*aarch64_<su>mlal_lo<mode>"
1738 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1741 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1742 (match_operand:VQW 2 "register_operand" "w")
1743 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1744 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1745 (match_operand:VQW 4 "register_operand" "w")
1747 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1749 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1750 [(set_attr "type" "neon_mla_<Vetype>_long")]
1753 (define_insn "*aarch64_<su>mlal_hi<mode>"
1754 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1757 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1758 (match_operand:VQW 2 "register_operand" "w")
1759 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1760 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1761 (match_operand:VQW 4 "register_operand" "w")
1763 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1765 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1766 [(set_attr "type" "neon_mla_<Vetype>_long")]
1769 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1770 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1772 (match_operand:<VWIDE> 1 "register_operand" "0")
1774 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1775 (match_operand:VQW 2 "register_operand" "w")
1776 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1777 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1778 (match_operand:VQW 4 "register_operand" "w")
1781 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1782 [(set_attr "type" "neon_mla_<Vetype>_long")]
1785 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1786 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1788 (match_operand:<VWIDE> 1 "register_operand" "0")
1790 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1791 (match_operand:VQW 2 "register_operand" "w")
1792 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1793 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1794 (match_operand:VQW 4 "register_operand" "w")
1797 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1798 [(set_attr "type" "neon_mla_<Vetype>_long")]
1801 (define_insn "*aarch64_<su>mlal<mode>"
1802 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1806 (match_operand:VD_BHSI 1 "register_operand" "w"))
1808 (match_operand:VD_BHSI 2 "register_operand" "w")))
1809 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1811 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1812 [(set_attr "type" "neon_mla_<Vetype>_long")]
1815 (define_insn "*aarch64_<su>mlsl<mode>"
1816 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1818 (match_operand:<VWIDE> 1 "register_operand" "0")
1821 (match_operand:VD_BHSI 2 "register_operand" "w"))
1823 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1825 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1826 [(set_attr "type" "neon_mla_<Vetype>_long")]
1829 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1830 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1831 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1832 (match_operand:VQW 1 "register_operand" "w")
1833 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1834 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1835 (match_operand:VQW 2 "register_operand" "w")
1838 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1839 [(set_attr "type" "neon_mul_<Vetype>_long")]
1842 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
1843 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1844 (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
1845 (match_operand:VD_BHSI 1 "register_operand" "w"))
1847 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
1849 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1850 [(set_attr "type" "neon_mul_<Vetype>_long")]
1853 (define_expand "vec_widen_<su>mult_lo_<mode>"
1854 [(match_operand:<VWIDE> 0 "register_operand")
1855 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1856 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1859 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1860 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1867 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1868 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1869 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1870 (match_operand:VQW 1 "register_operand" "w")
1871 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1872 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1873 (match_operand:VQW 2 "register_operand" "w")
1876 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1877 [(set_attr "type" "neon_mul_<Vetype>_long")]
1880 (define_expand "vec_widen_<su>mult_hi_<mode>"
1881 [(match_operand:<VWIDE> 0 "register_operand")
1882 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1883 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1886 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1887 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1895 ;; vmull_lane_s16 intrinsics
1896 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
1897 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1900 (match_operand:<VCOND> 1 "register_operand" "w"))
1902 (vec_duplicate:<VCOND>
1904 (match_operand:VDQHS 2 "register_operand" "<vwx>")
1905 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
1908 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
1909 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
1911 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
1914 ;; vmlal_lane_s16 intrinsics
1915 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
1916 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1920 (match_operand:<VCOND> 2 "register_operand" "w"))
1922 (vec_duplicate:<VCOND>
1924 (match_operand:VDQHS 3 "register_operand" "<vwx>")
1925 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
1926 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1929 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1930 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
1932 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
1935 ;; FP vector operations.
1936 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1937 ;; double-precision (64-bit) floating-point data types and arithmetic as
1938 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1939 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1941 ;; Floating-point operations can raise an exception. Vectorizing such
1942 ;; operations are safe because of reasons explained below.
1944 ;; ARMv8 permits an extension to enable trapped floating-point
1945 ;; exception handling, however this is an optional feature. In the
1946 ;; event of a floating-point exception being raised by vectorised
1948 ;; 1. If trapped floating-point exceptions are available, then a trap
1949 ;; will be taken when any lane raises an enabled exception. A trap
1950 ;; handler may determine which lane raised the exception.
1951 ;; 2. Alternatively a sticky exception flag is set in the
1952 ;; floating-point status register (FPSR). Software may explicitly
1953 ;; test the exception flags, in which case the tests will either
1954 ;; prevent vectorisation, allowing precise identification of the
1955 ;; failing operation, or if tested outside of vectorisable regions
1956 ;; then the specific operation and lane are not of interest.
1958 ;; FP arithmetic operations.
1960 (define_insn "add<mode>3"
1961 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1962 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1963 (match_operand:VHSDF 2 "register_operand" "w")))]
1965 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1966 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1969 (define_insn "sub<mode>3"
1970 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1971 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1972 (match_operand:VHSDF 2 "register_operand" "w")))]
1974 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1975 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1978 (define_insn "mul<mode>3"
1979 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1980 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1981 (match_operand:VHSDF 2 "register_operand" "w")))]
1983 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1984 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1987 (define_expand "div<mode>3"
1988 [(set (match_operand:VHSDF 0 "register_operand")
1989 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1990 (match_operand:VHSDF 2 "register_operand")))]
1993 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1996 operands[1] = force_reg (<MODE>mode, operands[1]);
1999 (define_insn "*div<mode>3"
2000 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2001 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2002 (match_operand:VHSDF 2 "register_operand" "w")))]
2004 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2005 [(set_attr "type" "neon_fp_div_<stype><q>")]
2008 (define_insn "neg<mode>2"
2009 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2010 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2012 "fneg\\t%0.<Vtype>, %1.<Vtype>"
2013 [(set_attr "type" "neon_fp_neg_<stype><q>")]
2016 (define_insn "abs<mode>2"
2017 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2018 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2020 "fabs\\t%0.<Vtype>, %1.<Vtype>"
2021 [(set_attr "type" "neon_fp_abs_<stype><q>")]
2024 (define_insn "fma<mode>4"
2025 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2026 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2027 (match_operand:VHSDF 2 "register_operand" "w")
2028 (match_operand:VHSDF 3 "register_operand" "0")))]
2030 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2031 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2034 (define_insn "*aarch64_fma4_elt<mode>"
2035 [(set (match_operand:VDQF 0 "register_operand" "=w")
2039 (match_operand:VDQF 1 "register_operand" "<h_con>")
2040 (parallel [(match_operand:SI 2 "immediate_operand")])))
2041 (match_operand:VDQF 3 "register_operand" "w")
2042 (match_operand:VDQF 4 "register_operand" "0")))]
2045 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2046 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2048 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2051 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2052 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2054 (vec_duplicate:VDQSF
2056 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2057 (parallel [(match_operand:SI 2 "immediate_operand")])))
2058 (match_operand:VDQSF 3 "register_operand" "w")
2059 (match_operand:VDQSF 4 "register_operand" "0")))]
2062 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2063 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2065 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2068 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
2069 [(set (match_operand:VMUL 0 "register_operand" "=w")
2072 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2073 (match_operand:VMUL 2 "register_operand" "w")
2074 (match_operand:VMUL 3 "register_operand" "0")))]
2076 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2077 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2080 (define_insn "*aarch64_fma4_elt_to_64v2df"
2081 [(set (match_operand:DF 0 "register_operand" "=w")
2084 (match_operand:V2DF 1 "register_operand" "w")
2085 (parallel [(match_operand:SI 2 "immediate_operand")]))
2086 (match_operand:DF 3 "register_operand" "w")
2087 (match_operand:DF 4 "register_operand" "0")))]
2090 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2091 return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2093 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2096 (define_insn "fnma<mode>4"
2097 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2099 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2100 (match_operand:VHSDF 2 "register_operand" "w")
2101 (match_operand:VHSDF 3 "register_operand" "0")))]
2103 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2104 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2107 (define_insn "*aarch64_fnma4_elt<mode>"
2108 [(set (match_operand:VDQF 0 "register_operand" "=w")
2111 (match_operand:VDQF 3 "register_operand" "w"))
2114 (match_operand:VDQF 1 "register_operand" "<h_con>")
2115 (parallel [(match_operand:SI 2 "immediate_operand")])))
2116 (match_operand:VDQF 4 "register_operand" "0")))]
2119 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2120 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2122 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2125 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2126 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2129 (match_operand:VDQSF 3 "register_operand" "w"))
2130 (vec_duplicate:VDQSF
2132 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2133 (parallel [(match_operand:SI 2 "immediate_operand")])))
2134 (match_operand:VDQSF 4 "register_operand" "0")))]
2137 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2138 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2140 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2143 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2144 [(set (match_operand:VMUL 0 "register_operand" "=w")
2147 (match_operand:VMUL 2 "register_operand" "w"))
2149 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2150 (match_operand:VMUL 3 "register_operand" "0")))]
2152 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2153 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2156 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2157 [(set (match_operand:DF 0 "register_operand" "=w")
2160 (match_operand:V2DF 1 "register_operand" "w")
2161 (parallel [(match_operand:SI 2 "immediate_operand")]))
2163 (match_operand:DF 3 "register_operand" "w"))
2164 (match_operand:DF 4 "register_operand" "0")))]
2167 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2168 return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2170 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2173 ;; Vector versions of the floating-point frint patterns.
2174 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2175 (define_insn "<frint_pattern><mode>2"
2176 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2177 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2180 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2181 [(set_attr "type" "neon_fp_round_<stype><q>")]
2184 ;; Vector versions of the fcvt standard patterns.
2185 ;; Expands to lbtrunc, lround, lceil, lfloor
2186 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2187 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2188 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2189 [(match_operand:VHSDF 1 "register_operand" "w")]
2192 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2193 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2196 ;; HF Scalar variants of related SIMD instructions.
2197 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2198 [(set (match_operand:HI 0 "register_operand" "=w")
2199 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2201 "TARGET_SIMD_F16INST"
2202 "fcvt<frint_suffix><su>\t%h0, %h1"
2203 [(set_attr "type" "neon_fp_to_int_s")]
2206 (define_insn "<optab>_trunchfhi2"
2207 [(set (match_operand:HI 0 "register_operand" "=w")
2208 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2209 "TARGET_SIMD_F16INST"
2210 "fcvtz<su>\t%h0, %h1"
2211 [(set_attr "type" "neon_fp_to_int_s")]
2214 (define_insn "<optab>hihf2"
2215 [(set (match_operand:HF 0 "register_operand" "=w")
2216 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2217 "TARGET_SIMD_F16INST"
2218 "<su_optab>cvtf\t%h0, %h1"
2219 [(set_attr "type" "neon_int_to_fp_s")]
2222 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2223 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2224 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2226 (match_operand:VDQF 1 "register_operand" "w")
2227 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2230 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2231 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2233 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2235 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2236 output_asm_insn (buf, operands);
2239 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2242 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2243 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2244 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2245 [(match_operand:VHSDF 1 "register_operand")]
2250 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2251 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2252 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2253 [(match_operand:VHSDF 1 "register_operand")]
2258 (define_expand "ftrunc<VHSDF:mode>2"
2259 [(set (match_operand:VHSDF 0 "register_operand")
2260 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2265 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2266 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2268 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2270 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2271 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2274 ;; Conversions between vectors of floats and doubles.
2275 ;; Contains a mix of patterns to match standard pattern names
2276 ;; and those for intrinsics.
2278 ;; Float widening operations.
2280 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2281 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2282 (float_extend:<VWIDE> (vec_select:<VHALF>
2283 (match_operand:VQ_HSF 1 "register_operand" "w")
2284 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2287 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2288 [(set_attr "type" "neon_fp_cvt_widen_s")]
2291 ;; Convert between fixed-point and floating-point (vector modes)
2293 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2294 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2295 (unspec:<VHSDF:FCVT_TARGET>
2296 [(match_operand:VHSDF 1 "register_operand" "w")
2297 (match_operand:SI 2 "immediate_operand" "i")]
2300 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2301 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2304 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2305 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2306 (unspec:<VDQ_HSDI:FCVT_TARGET>
2307 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2308 (match_operand:SI 2 "immediate_operand" "i")]
2311 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2312 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2315 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2316 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2317 ;; the meaning of HI and LO changes depending on the target endianness.
2318 ;; While elsewhere we map the higher numbered elements of a vector to
2319 ;; the lower architectural lanes of the vector, for these patterns we want
2320 ;; to always treat "hi" as referring to the higher architectural lanes.
2321 ;; Consequently, while the patterns below look inconsistent with our
2322 ;; other big-endian patterns their behavior is as required.
2324 (define_expand "vec_unpacks_lo_<mode>"
2325 [(match_operand:<VWIDE> 0 "register_operand")
2326 (match_operand:VQ_HSF 1 "register_operand")]
2329 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2330 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2336 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2337 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2338 (float_extend:<VWIDE> (vec_select:<VHALF>
2339 (match_operand:VQ_HSF 1 "register_operand" "w")
2340 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2343 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2344 [(set_attr "type" "neon_fp_cvt_widen_s")]
2347 (define_expand "vec_unpacks_hi_<mode>"
2348 [(match_operand:<VWIDE> 0 "register_operand")
2349 (match_operand:VQ_HSF 1 "register_operand")]
2352 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2353 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2358 (define_insn "aarch64_float_extend_lo_<Vwide>"
2359 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2360 (float_extend:<VWIDE>
2361 (match_operand:VDF 1 "register_operand" "w")))]
2363 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2364 [(set_attr "type" "neon_fp_cvt_widen_s")]
2367 ;; Float narrowing operations.
2369 (define_insn "aarch64_float_truncate_lo_<mode>"
2370 [(set (match_operand:VDF 0 "register_operand" "=w")
2372 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2374 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2375 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2378 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2379 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2381 (match_operand:VDF 1 "register_operand" "0")
2383 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2384 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2385 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2386 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2389 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2390 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2393 (match_operand:<VWIDE> 2 "register_operand" "w"))
2394 (match_operand:VDF 1 "register_operand" "0")))]
2395 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2396 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2397 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2400 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2401 [(match_operand:<VDBL> 0 "register_operand")
2402 (match_operand:VDF 1 "register_operand")
2403 (match_operand:<VWIDE> 2 "register_operand")]
2406 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2407 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2408 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2409 emit_insn (gen (operands[0], operands[1], operands[2]));
2414 (define_expand "vec_pack_trunc_v2df"
2415 [(set (match_operand:V4SF 0 "register_operand")
2417 (float_truncate:V2SF
2418 (match_operand:V2DF 1 "register_operand"))
2419 (float_truncate:V2SF
2420 (match_operand:V2DF 2 "register_operand"))
2424 rtx tmp = gen_reg_rtx (V2SFmode);
2425 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2426 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2428 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2429 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2430 tmp, operands[hi]));
2435 (define_expand "vec_pack_trunc_df"
2436 [(set (match_operand:V2SF 0 "register_operand")
2439 (match_operand:DF 1 "register_operand"))
2441 (match_operand:DF 2 "register_operand"))
2445 rtx tmp = gen_reg_rtx (V2SFmode);
2446 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2447 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2449 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2450 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2451 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2457 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2459 ;; a = (b < c) ? b : c;
2460 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2461 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2464 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2465 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2466 ;; operand will be returned when both operands are zero (i.e. they may not
2467 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2468 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2471 (define_insn "<su><maxmin><mode>3"
2472 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2473 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2474 (match_operand:VHSDF 2 "register_operand" "w")))]
2476 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2477 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2480 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2481 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2482 ;; which implement the IEEE fmax ()/fmin () functions.
2483 (define_insn "<maxmin_uns><mode>3"
2484 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2485 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2486 (match_operand:VHSDF 2 "register_operand" "w")]
2489 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2490 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2493 ;; 'across lanes' add.
2495 (define_expand "reduc_plus_scal_<mode>"
2496 [(match_operand:<VEL> 0 "register_operand")
2497 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2501 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2502 rtx scratch = gen_reg_rtx (<MODE>mode);
2503 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2504 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2509 (define_insn "aarch64_faddp<mode>"
2510 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2511 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2512 (match_operand:VHSDF 2 "register_operand" "w")]
2515 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2516 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2519 (define_insn "aarch64_reduc_plus_internal<mode>"
2520 [(set (match_operand:VDQV 0 "register_operand" "=w")
2521 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2524 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2525 [(set_attr "type" "neon_reduc_add<q>")]
2528 ;; ADDV with result zero-extended to SI/DImode (for popcount).
2529 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
2530 [(set (match_operand:GPI 0 "register_operand" "=w")
2532 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
2535 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
2536 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
2539 (define_insn "aarch64_reduc_plus_internalv2si"
2540 [(set (match_operand:V2SI 0 "register_operand" "=w")
2541 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2544 "addp\\t%0.2s, %1.2s, %1.2s"
2545 [(set_attr "type" "neon_reduc_add")]
2548 (define_insn "reduc_plus_scal_<mode>"
2549 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2550 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2553 "faddp\\t%<Vetype>0, %1.<Vtype>"
2554 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2557 (define_expand "reduc_plus_scal_v4sf"
2558 [(set (match_operand:SF 0 "register_operand")
2559 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2563 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2564 rtx scratch = gen_reg_rtx (V4SFmode);
2565 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2566 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2567 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2571 (define_insn "clrsb<mode>2"
2572 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2573 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2575 "cls\\t%0.<Vtype>, %1.<Vtype>"
2576 [(set_attr "type" "neon_cls<q>")]
2579 (define_insn "clz<mode>2"
2580 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2581 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2583 "clz\\t%0.<Vtype>, %1.<Vtype>"
2584 [(set_attr "type" "neon_cls<q>")]
2587 (define_insn "popcount<mode>2"
2588 [(set (match_operand:VB 0 "register_operand" "=w")
2589 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2591 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2592 [(set_attr "type" "neon_cnt<q>")]
2595 ;; 'across lanes' max and min ops.
2597 ;; Template for outputting a scalar, so we can create __builtins which can be
2598 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2599 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2600 [(match_operand:<VEL> 0 "register_operand")
2601 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2605 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2606 rtx scratch = gen_reg_rtx (<MODE>mode);
2607 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2609 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2614 ;; Likewise for integer cases, signed and unsigned.
2615 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2616 [(match_operand:<VEL> 0 "register_operand")
2617 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2621 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2622 rtx scratch = gen_reg_rtx (<MODE>mode);
2623 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2625 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2630 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2631 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2632 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2635 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2636 [(set_attr "type" "neon_reduc_minmax<q>")]
2639 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2640 [(set (match_operand:V2SI 0 "register_operand" "=w")
2641 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2644 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2645 [(set_attr "type" "neon_reduc_minmax")]
2648 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2649 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2650 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2653 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2654 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2657 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2659 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2662 ;; Thus our BSL is of the form:
2663 ;; op0 = bsl (mask, op2, op3)
2664 ;; We can use any of:
2667 ;; bsl mask, op1, op2
2668 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2669 ;; bit op0, op2, mask
2670 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2671 ;; bif op0, op1, mask
2673 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2674 ;; Some forms of straight-line code may generate the equivalent form
2675 ;; in *aarch64_simd_bsl<mode>_alt.
2677 (define_insn "aarch64_simd_bsl<mode>_internal"
2678 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2682 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2683 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2684 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2685 (match_dup:<V_INT_EQUIV> 3)
2689 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2690 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2691 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2692 [(set_attr "type" "neon_bsl<q>")]
2695 ;; We need this form in addition to the above pattern to match the case
2696 ;; when combine tries merging three insns such that the second operand of
2697 ;; the outer XOR matches the second operand of the inner XOR rather than
2698 ;; the first. The two are equivalent but since recog doesn't try all
2699 ;; permutations of commutative operations, we have to have a separate pattern.
2701 (define_insn "*aarch64_simd_bsl<mode>_alt"
2702 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2706 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2707 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2708 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2709 (match_dup:<V_INT_EQUIV> 2)))]
2712 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2713 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2714 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2715 [(set_attr "type" "neon_bsl<q>")]
2718 ;; DImode is special, we want to avoid computing operations which are
2719 ;; more naturally computed in general purpose registers in the vector
2720 ;; registers. If we do that, we need to move all three operands from general
2721 ;; purpose registers to vector registers, then back again. However, we
2722 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2723 ;; optimizations based on the component operations of a BSL.
2725 ;; That means we need a splitter back to the individual operations, if they
2726 ;; would be better calculated on the integer side.
2728 (define_insn_and_split "aarch64_simd_bsldi_internal"
2729 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2733 (match_operand:DI 3 "register_operand" "w,0,w,r")
2734 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2735 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2740 bsl\\t%0.8b, %2.8b, %3.8b
2741 bit\\t%0.8b, %2.8b, %1.8b
2742 bif\\t%0.8b, %3.8b, %1.8b
2744 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2745 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2747 /* Split back to individual operations. If we're before reload, and
2748 able to create a temporary register, do so. If we're after reload,
2749 we've got an early-clobber destination register, so use that.
2750 Otherwise, we can't create pseudos and we can't yet guarantee that
2751 operands[0] is safe to write, so FAIL to split. */
2754 if (reload_completed)
2755 scratch = operands[0];
2756 else if (can_create_pseudo_p ())
2757 scratch = gen_reg_rtx (DImode);
2761 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2762 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2763 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2766 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2767 (set_attr "length" "4,4,4,12")]
2770 (define_insn_and_split "aarch64_simd_bsldi_alt"
2771 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2775 (match_operand:DI 3 "register_operand" "w,w,0,r")
2776 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2777 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2782 bsl\\t%0.8b, %3.8b, %2.8b
2783 bit\\t%0.8b, %3.8b, %1.8b
2784 bif\\t%0.8b, %2.8b, %1.8b
2786 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2787 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2789 /* Split back to individual operations. If we're before reload, and
2790 able to create a temporary register, do so. If we're after reload,
2791 we've got an early-clobber destination register, so use that.
2792 Otherwise, we can't create pseudos and we can't yet guarantee that
2793 operands[0] is safe to write, so FAIL to split. */
2796 if (reload_completed)
2797 scratch = operands[0];
2798 else if (can_create_pseudo_p ())
2799 scratch = gen_reg_rtx (DImode);
2803 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2804 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2805 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2808 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2809 (set_attr "length" "4,4,4,12")]
2812 (define_expand "aarch64_simd_bsl<mode>"
2813 [(match_operand:VALLDIF 0 "register_operand")
2814 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2815 (match_operand:VALLDIF 2 "register_operand")
2816 (match_operand:VALLDIF 3 "register_operand")]
2819 /* We can't alias operands together if they have different modes. */
2820 rtx tmp = operands[0];
2821 if (FLOAT_MODE_P (<MODE>mode))
2823 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2824 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2825 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2827 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2828 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2832 if (tmp != operands[0])
2833 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2838 (define_expand "vcond_mask_<mode><v_int_equiv>"
2839 [(match_operand:VALLDI 0 "register_operand")
2840 (match_operand:VALLDI 1 "nonmemory_operand")
2841 (match_operand:VALLDI 2 "nonmemory_operand")
2842 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2845 /* If we have (a = (P) ? -1 : 0);
2846 Then we can simply move the generated mask (result must be int). */
2847 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2848 && operands[2] == CONST0_RTX (<MODE>mode))
2849 emit_move_insn (operands[0], operands[3]);
2850 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2851 else if (operands[1] == CONST0_RTX (<MODE>mode)
2852 && operands[2] == CONSTM1_RTX (<MODE>mode))
2853 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2856 if (!REG_P (operands[1]))
2857 operands[1] = force_reg (<MODE>mode, operands[1]);
2858 if (!REG_P (operands[2]))
2859 operands[2] = force_reg (<MODE>mode, operands[2]);
2860 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2861 operands[1], operands[2]));
2867 ;; Patterns comparing two vectors to produce a mask.
2869 (define_expand "vec_cmp<mode><mode>"
2870 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2871 (match_operator 1 "comparison_operator"
2872 [(match_operand:VSDQ_I_DI 2 "register_operand")
2873 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2876 rtx mask = operands[0];
2877 enum rtx_code code = GET_CODE (operands[1]);
2887 if (operands[3] == CONST0_RTX (<MODE>mode))
2892 if (!REG_P (operands[3]))
2893 operands[3] = force_reg (<MODE>mode, operands[3]);
2901 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2905 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2909 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2913 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2917 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2921 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2925 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2929 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2933 /* Handle NE as !EQ. */
2934 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2935 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2939 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2949 (define_expand "vec_cmp<mode><v_int_equiv>"
2950 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2951 (match_operator 1 "comparison_operator"
2952 [(match_operand:VDQF 2 "register_operand")
2953 (match_operand:VDQF 3 "nonmemory_operand")]))]
2956 int use_zero_form = 0;
2957 enum rtx_code code = GET_CODE (operands[1]);
2958 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2960 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2969 if (operands[3] == CONST0_RTX (<MODE>mode))
2976 if (!REG_P (operands[3]))
2977 operands[3] = force_reg (<MODE>mode, operands[3]);
2987 comparison = gen_aarch64_cmlt<mode>;
2992 std::swap (operands[2], operands[3]);
2996 comparison = gen_aarch64_cmgt<mode>;
3001 comparison = gen_aarch64_cmle<mode>;
3006 std::swap (operands[2], operands[3]);
3010 comparison = gen_aarch64_cmge<mode>;
3014 comparison = gen_aarch64_cmeq<mode>;
3032 /* All of the above must not raise any FP exceptions. Thus we first
3033 check each operand for NaNs and force any elements containing NaN to
3034 zero before using them in the compare.
3035 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
3036 (cm<cc> (isnan (a) ? 0.0 : a,
3037 isnan (b) ? 0.0 : b))
3038 We use the following transformations for doing the comparisions:
3042 a UNLT b -> b GT a. */
3044 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
3045 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
3046 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
3047 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
3048 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
3049 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
3050 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
3051 lowpart_subreg (<V_INT_EQUIV>mode,
3054 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
3055 lowpart_subreg (<V_INT_EQUIV>mode,
3058 gcc_assert (comparison != NULL);
3059 emit_insn (comparison (operands[0],
3060 lowpart_subreg (<MODE>mode,
3061 tmp0, <V_INT_EQUIV>mode),
3062 lowpart_subreg (<MODE>mode,
3063 tmp1, <V_INT_EQUIV>mode)));
3064 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
3074 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
3075 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
3081 a NE b -> ~(a EQ b) */
3082 gcc_assert (comparison != NULL);
3083 emit_insn (comparison (operands[0], operands[2], operands[3]));
3085 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3089 /* LTGT is not guranteed to not generate a FP exception. So let's
3090 go the faster way : ((a > b) || (b > a)). */
3091 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
3092 operands[2], operands[3]));
3093 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
3094 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
3100 /* cmeq (a, a) & cmeq (b, b). */
3101 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
3102 operands[2], operands[2]));
3103 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
3104 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
3106 if (code == UNORDERED)
3107 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3108 else if (code == UNEQ)
3110 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3111 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3122 (define_expand "vec_cmpu<mode><mode>"
3123 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3124 (match_operator 1 "comparison_operator"
3125 [(match_operand:VSDQ_I_DI 2 "register_operand")
3126 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3129 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3130 operands[2], operands[3]));
3134 (define_expand "vcond<mode><mode>"
3135 [(set (match_operand:VALLDI 0 "register_operand")
3136 (if_then_else:VALLDI
3137 (match_operator 3 "comparison_operator"
3138 [(match_operand:VALLDI 4 "register_operand")
3139 (match_operand:VALLDI 5 "nonmemory_operand")])
3140 (match_operand:VALLDI 1 "nonmemory_operand")
3141 (match_operand:VALLDI 2 "nonmemory_operand")))]
3144 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3145 enum rtx_code code = GET_CODE (operands[3]);
3147 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3148 it as well as switch operands 1/2 in order to avoid the additional
3152 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3153 operands[4], operands[5]);
3154 std::swap (operands[1], operands[2]);
3156 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3157 operands[4], operands[5]));
3158 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3159 operands[2], mask));
3164 (define_expand "vcond<v_cmp_mixed><mode>"
3165 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3166 (if_then_else:<V_cmp_mixed>
3167 (match_operator 3 "comparison_operator"
3168 [(match_operand:VDQF_COND 4 "register_operand")
3169 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3170 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3171 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3174 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3175 enum rtx_code code = GET_CODE (operands[3]);
3177 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3178 it as well as switch operands 1/2 in order to avoid the additional
3182 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3183 operands[4], operands[5]);
3184 std::swap (operands[1], operands[2]);
3186 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3187 operands[4], operands[5]));
3188 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3189 operands[0], operands[1],
3190 operands[2], mask));
3195 (define_expand "vcondu<mode><mode>"
3196 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3197 (if_then_else:VSDQ_I_DI
3198 (match_operator 3 "comparison_operator"
3199 [(match_operand:VSDQ_I_DI 4 "register_operand")
3200 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3201 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3202 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3205 rtx mask = gen_reg_rtx (<MODE>mode);
3206 enum rtx_code code = GET_CODE (operands[3]);
3208 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3209 it as well as switch operands 1/2 in order to avoid the additional
3213 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3214 operands[4], operands[5]);
3215 std::swap (operands[1], operands[2]);
3217 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3218 operands[4], operands[5]));
3219 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3220 operands[2], mask));
3224 (define_expand "vcondu<mode><v_cmp_mixed>"
3225 [(set (match_operand:VDQF 0 "register_operand")
3227 (match_operator 3 "comparison_operator"
3228 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3229 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3230 (match_operand:VDQF 1 "nonmemory_operand")
3231 (match_operand:VDQF 2 "nonmemory_operand")))]
3234 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3235 enum rtx_code code = GET_CODE (operands[3]);
3237 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3238 it as well as switch operands 1/2 in order to avoid the additional
3242 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3243 operands[4], operands[5]);
3244 std::swap (operands[1], operands[2]);
3246 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3248 operands[4], operands[5]));
3249 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3250 operands[2], mask));
3254 ;; Patterns for AArch64 SIMD Intrinsics.
3256 ;; Lane extraction with sign extension to general purpose register.
3257 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3258 [(set (match_operand:GPI 0 "register_operand" "=r")
3260 (vec_select:<VDQQH:VEL>
3261 (match_operand:VDQQH 1 "register_operand" "w")
3262 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3265 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3266 INTVAL (operands[2]));
3267 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3269 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3272 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3273 [(set (match_operand:GPI 0 "register_operand" "=r")
3275 (vec_select:<VDQQH:VEL>
3276 (match_operand:VDQQH 1 "register_operand" "w")
3277 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3280 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3281 INTVAL (operands[2]));
3282 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3284 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3287 ;; Lane extraction of a value, neither sign nor zero extension
3288 ;; is guaranteed so upper bits should be considered undefined.
3289 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3290 (define_insn "aarch64_get_lane<mode>"
3291 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3293 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3294 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3297 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3298 switch (which_alternative)
3301 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3303 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3305 return "st1\\t{%1.<Vetype>}[%2], %0";
3310 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3313 (define_insn "load_pair_lanes<mode>"
3314 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3316 (match_operand:VDC 1 "memory_operand" "Utq")
3317 (match_operand:VDC 2 "memory_operand" "m")))]
3318 "TARGET_SIMD && !STRICT_ALIGNMENT
3319 && rtx_equal_p (XEXP (operands[2], 0),
3320 plus_constant (Pmode,
3321 XEXP (operands[1], 0),
3322 GET_MODE_SIZE (<MODE>mode)))"
3324 [(set_attr "type" "neon_load1_1reg_q")]
3327 (define_insn "store_pair_lanes<mode>"
3328 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3330 (match_operand:VDC 1 "register_operand" "w, r")
3331 (match_operand:VDC 2 "register_operand" "w, r")))]
3335 stp\\t%x1, %x2, %y0"
3336 [(set_attr "type" "neon_stp, store_16")]
3339 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3342 (define_insn "@aarch64_combinez<mode>"
3343 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3345 (match_operand:VDC 1 "general_operand" "w,?r,m")
3346 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3347 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3352 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3353 (set_attr "arch" "simd,fp,simd")]
3356 (define_insn "@aarch64_combinez_be<mode>"
3357 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3359 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3360 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3361 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3366 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3367 (set_attr "arch" "simd,fp,simd")]
3370 (define_expand "aarch64_combine<mode>"
3371 [(match_operand:<VDBL> 0 "register_operand")
3372 (match_operand:VDC 1 "register_operand")
3373 (match_operand:VDC 2 "register_operand")]
3376 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3382 (define_expand "@aarch64_simd_combine<mode>"
3383 [(match_operand:<VDBL> 0 "register_operand")
3384 (match_operand:VDC 1 "register_operand")
3385 (match_operand:VDC 2 "register_operand")]
3388 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3389 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3392 [(set_attr "type" "multiple")]
3395 ;; <su><addsub>l<q>.
3397 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3398 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3399 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3400 (match_operand:VQW 1 "register_operand" "w")
3401 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3402 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3403 (match_operand:VQW 2 "register_operand" "w")
3406 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3407 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3410 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3411 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3412 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3413 (match_operand:VQW 1 "register_operand" "w")
3414 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3415 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3416 (match_operand:VQW 2 "register_operand" "w")
3419 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3420 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3424 (define_expand "aarch64_saddl2<mode>"
3425 [(match_operand:<VWIDE> 0 "register_operand")
3426 (match_operand:VQW 1 "register_operand")
3427 (match_operand:VQW 2 "register_operand")]
3430 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3431 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3436 (define_expand "aarch64_uaddl2<mode>"
3437 [(match_operand:<VWIDE> 0 "register_operand")
3438 (match_operand:VQW 1 "register_operand")
3439 (match_operand:VQW 2 "register_operand")]
3442 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3443 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3448 (define_expand "aarch64_ssubl2<mode>"
3449 [(match_operand:<VWIDE> 0 "register_operand")
3450 (match_operand:VQW 1 "register_operand")
3451 (match_operand:VQW 2 "register_operand")]
3454 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3455 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3460 (define_expand "aarch64_usubl2<mode>"
3461 [(match_operand:<VWIDE> 0 "register_operand")
3462 (match_operand:VQW 1 "register_operand")
3463 (match_operand:VQW 2 "register_operand")]
3466 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3467 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3472 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3473 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3474 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3475 (match_operand:VD_BHSI 1 "register_operand" "w"))
3477 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3479 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3480 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3483 ;; <su><addsub>w<q>.
3485 (define_expand "widen_ssum<mode>3"
3486 [(set (match_operand:<VDBLW> 0 "register_operand")
3487 (plus:<VDBLW> (sign_extend:<VDBLW>
3488 (match_operand:VQW 1 "register_operand"))
3489 (match_operand:<VDBLW> 2 "register_operand")))]
3492 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3493 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3495 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3497 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3502 (define_expand "widen_ssum<mode>3"
3503 [(set (match_operand:<VWIDE> 0 "register_operand")
3504 (plus:<VWIDE> (sign_extend:<VWIDE>
3505 (match_operand:VD_BHSI 1 "register_operand"))
3506 (match_operand:<VWIDE> 2 "register_operand")))]
3509 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3513 (define_expand "widen_usum<mode>3"
3514 [(set (match_operand:<VDBLW> 0 "register_operand")
3515 (plus:<VDBLW> (zero_extend:<VDBLW>
3516 (match_operand:VQW 1 "register_operand"))
3517 (match_operand:<VDBLW> 2 "register_operand")))]
3520 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3521 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3523 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3525 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3530 (define_expand "widen_usum<mode>3"
3531 [(set (match_operand:<VWIDE> 0 "register_operand")
3532 (plus:<VWIDE> (zero_extend:<VWIDE>
3533 (match_operand:VD_BHSI 1 "register_operand"))
3534 (match_operand:<VWIDE> 2 "register_operand")))]
3537 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3541 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3542 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3543 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3545 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3547 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3548 [(set_attr "type" "neon_sub_widen")]
3551 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3552 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3553 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3556 (match_operand:VQW 2 "register_operand" "w")
3557 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3559 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3560 [(set_attr "type" "neon_sub_widen")]
3563 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3564 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3565 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3568 (match_operand:VQW 2 "register_operand" "w")
3569 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3571 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3572 [(set_attr "type" "neon_sub_widen")]
3575 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3576 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3578 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3579 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3581 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3582 [(set_attr "type" "neon_add_widen")]
3585 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3586 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3590 (match_operand:VQW 2 "register_operand" "w")
3591 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3592 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3594 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3595 [(set_attr "type" "neon_add_widen")]
3598 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3599 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3603 (match_operand:VQW 2 "register_operand" "w")
3604 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3605 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3607 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3608 [(set_attr "type" "neon_add_widen")]
3611 (define_expand "aarch64_saddw2<mode>"
3612 [(match_operand:<VWIDE> 0 "register_operand")
3613 (match_operand:<VWIDE> 1 "register_operand")
3614 (match_operand:VQW 2 "register_operand")]
3617 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3618 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3623 (define_expand "aarch64_uaddw2<mode>"
3624 [(match_operand:<VWIDE> 0 "register_operand")
3625 (match_operand:<VWIDE> 1 "register_operand")
3626 (match_operand:VQW 2 "register_operand")]
3629 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3630 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3636 (define_expand "aarch64_ssubw2<mode>"
3637 [(match_operand:<VWIDE> 0 "register_operand")
3638 (match_operand:<VWIDE> 1 "register_operand")
3639 (match_operand:VQW 2 "register_operand")]
3642 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3643 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3648 (define_expand "aarch64_usubw2<mode>"
3649 [(match_operand:<VWIDE> 0 "register_operand")
3650 (match_operand:<VWIDE> 1 "register_operand")
3651 (match_operand:VQW 2 "register_operand")]
3654 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3655 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3660 ;; <su><r>h<addsub>.
3662 (define_expand "<u>avg<mode>3_floor"
3663 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3664 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3665 (match_operand:VDQ_BHSI 2 "register_operand")]
3670 (define_expand "<u>avg<mode>3_ceil"
3671 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3672 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3673 (match_operand:VDQ_BHSI 2 "register_operand")]
3678 (define_insn "aarch64_<sur>h<addsub><mode>"
3679 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3680 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3681 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3684 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3685 [(set_attr "type" "neon_<addsub>_halve<q>")]
3688 ;; <r><addsub>hn<q>.
3690 (define_insn "aarch64_<sur><addsub>hn<mode>"
3691 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3692 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3693 (match_operand:VQN 2 "register_operand" "w")]
3696 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3697 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3700 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3701 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3702 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3703 (match_operand:VQN 2 "register_operand" "w")
3704 (match_operand:VQN 3 "register_operand" "w")]
3707 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3708 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3713 (define_insn "aarch64_pmul<mode>"
3714 [(set (match_operand:VB 0 "register_operand" "=w")
3715 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3716 (match_operand:VB 2 "register_operand" "w")]
3719 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3720 [(set_attr "type" "neon_mul_<Vetype><q>")]
3725 (define_insn "aarch64_fmulx<mode>"
3726 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3728 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3729 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3732 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3733 [(set_attr "type" "neon_fp_mul_<stype>")]
3736 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3738 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3739 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3741 [(match_operand:VDQSF 1 "register_operand" "w")
3742 (vec_duplicate:VDQSF
3744 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3745 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3749 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3750 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3752 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3755 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3757 (define_insn "*aarch64_mulx_elt<mode>"
3758 [(set (match_operand:VDQF 0 "register_operand" "=w")
3760 [(match_operand:VDQF 1 "register_operand" "w")
3763 (match_operand:VDQF 2 "register_operand" "w")
3764 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3768 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3769 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3771 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3776 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3777 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3779 [(match_operand:VHSDF 1 "register_operand" "w")
3780 (vec_duplicate:VHSDF
3781 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3784 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3785 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3788 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3789 ;; vmulxd_lane_f64 == vmulx_lane_f64
3790 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3792 (define_insn "*aarch64_vgetfmulx<mode>"
3793 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3795 [(match_operand:<VEL> 1 "register_operand" "w")
3797 (match_operand:VDQF 2 "register_operand" "w")
3798 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3802 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3803 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3805 [(set_attr "type" "fmul<Vetype>")]
3809 (define_insn "aarch64_<su_optab>q<addsub><mode>"
3810 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3811 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3812 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3814 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3815 [(set_attr "type" "neon_q<addsub><q>")]
3818 ;; suqadd and usqadd
3820 (define_insn "aarch64_<sur>qadd<mode>"
3821 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3822 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3823 (match_operand:VSDQ_I 2 "register_operand" "w")]
3826 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3827 [(set_attr "type" "neon_qadd<q>")]
3832 (define_insn "aarch64_sqmovun<mode>"
3833 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3834 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3837 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3838 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3841 ;; sqmovn and uqmovn
3843 (define_insn "aarch64_<sur>qmovn<mode>"
3844 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3845 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3848 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3849 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3854 (define_insn "aarch64_s<optab><mode>"
3855 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3857 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3859 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3860 [(set_attr "type" "neon_<optab><q>")]
3865 (define_insn "aarch64_sq<r>dmulh<mode>"
3866 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3868 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3869 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3872 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3873 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3878 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3879 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3881 [(match_operand:VDQHS 1 "register_operand" "w")
3883 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3884 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3888 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3889 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3890 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3893 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3894 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3896 [(match_operand:VDQHS 1 "register_operand" "w")
3898 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3899 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3903 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3904 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3905 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3908 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3909 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3911 [(match_operand:SD_HSI 1 "register_operand" "w")
3913 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3914 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3918 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3919 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3920 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3923 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3924 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3926 [(match_operand:SD_HSI 1 "register_operand" "w")
3928 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3929 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3933 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3934 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3935 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3940 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3941 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3943 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3944 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3945 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3948 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3949 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3952 ;; sqrdml[as]h_lane.
3954 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3955 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3957 [(match_operand:VDQHS 1 "register_operand" "0")
3958 (match_operand:VDQHS 2 "register_operand" "w")
3960 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3961 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3965 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3967 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3969 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3972 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3973 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3975 [(match_operand:SD_HSI 1 "register_operand" "0")
3976 (match_operand:SD_HSI 2 "register_operand" "w")
3978 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3979 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3983 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3985 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3987 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3990 ;; sqrdml[as]h_laneq.
3992 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3993 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3995 [(match_operand:VDQHS 1 "register_operand" "0")
3996 (match_operand:VDQHS 2 "register_operand" "w")
3998 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3999 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4003 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4005 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
4007 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4010 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
4011 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
4013 [(match_operand:SD_HSI 1 "register_operand" "0")
4014 (match_operand:SD_HSI 2 "register_operand" "w")
4016 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4017 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
4021 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4023 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
4025 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4030 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
4031 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4033 (match_operand:<VWIDE> 1 "register_operand" "0")
4036 (sign_extend:<VWIDE>
4037 (match_operand:VSD_HSI 2 "register_operand" "w"))
4038 (sign_extend:<VWIDE>
4039 (match_operand:VSD_HSI 3 "register_operand" "w")))
4042 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4043 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
4048 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
4049 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4051 (match_operand:<VWIDE> 1 "register_operand" "0")
4054 (sign_extend:<VWIDE>
4055 (match_operand:VD_HSI 2 "register_operand" "w"))
4056 (sign_extend:<VWIDE>
4057 (vec_duplicate:VD_HSI
4059 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4060 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4065 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4067 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4069 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4072 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4073 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4075 (match_operand:<VWIDE> 1 "register_operand" "0")
4078 (sign_extend:<VWIDE>
4079 (match_operand:VD_HSI 2 "register_operand" "w"))
4080 (sign_extend:<VWIDE>
4081 (vec_duplicate:VD_HSI
4083 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4084 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4089 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4091 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4093 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4096 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
4097 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4099 (match_operand:<VWIDE> 1 "register_operand" "0")
4102 (sign_extend:<VWIDE>
4103 (match_operand:SD_HSI 2 "register_operand" "w"))
4104 (sign_extend:<VWIDE>
4106 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4107 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4112 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4114 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4116 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4119 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4120 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4122 (match_operand:<VWIDE> 1 "register_operand" "0")
4125 (sign_extend:<VWIDE>
4126 (match_operand:SD_HSI 2 "register_operand" "w"))
4127 (sign_extend:<VWIDE>
4129 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4130 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4135 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4137 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4139 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4144 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4145 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4147 (match_operand:<VWIDE> 1 "register_operand" "0")
4150 (sign_extend:<VWIDE>
4151 (match_operand:VD_HSI 2 "register_operand" "w"))
4152 (sign_extend:<VWIDE>
4153 (vec_duplicate:VD_HSI
4154 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4157 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4158 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4163 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4164 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4166 (match_operand:<VWIDE> 1 "register_operand" "0")
4169 (sign_extend:<VWIDE>
4171 (match_operand:VQ_HSI 2 "register_operand" "w")
4172 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4173 (sign_extend:<VWIDE>
4175 (match_operand:VQ_HSI 3 "register_operand" "w")
4179 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4180 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4183 (define_expand "aarch64_sqdmlal2<mode>"
4184 [(match_operand:<VWIDE> 0 "register_operand")
4185 (match_operand:<VWIDE> 1 "register_operand")
4186 (match_operand:VQ_HSI 2 "register_operand")
4187 (match_operand:VQ_HSI 3 "register_operand")]
4190 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4191 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4192 operands[2], operands[3], p));
4196 (define_expand "aarch64_sqdmlsl2<mode>"
4197 [(match_operand:<VWIDE> 0 "register_operand")
4198 (match_operand:<VWIDE> 1 "register_operand")
4199 (match_operand:VQ_HSI 2 "register_operand")
4200 (match_operand:VQ_HSI 3 "register_operand")]
4203 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4204 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4205 operands[2], operands[3], p));
4211 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4212 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4214 (match_operand:<VWIDE> 1 "register_operand" "0")
4217 (sign_extend:<VWIDE>
4219 (match_operand:VQ_HSI 2 "register_operand" "w")
4220 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4221 (sign_extend:<VWIDE>
4222 (vec_duplicate:<VHALF>
4224 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4225 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4230 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4232 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4234 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4237 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4238 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4240 (match_operand:<VWIDE> 1 "register_operand" "0")
4243 (sign_extend:<VWIDE>
4245 (match_operand:VQ_HSI 2 "register_operand" "w")
4246 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4247 (sign_extend:<VWIDE>
4248 (vec_duplicate:<VHALF>
4250 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4251 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4256 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4258 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4260 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4263 (define_expand "aarch64_sqdmlal2_lane<mode>"
4264 [(match_operand:<VWIDE> 0 "register_operand")
4265 (match_operand:<VWIDE> 1 "register_operand")
4266 (match_operand:VQ_HSI 2 "register_operand")
4267 (match_operand:<VCOND> 3 "register_operand")
4268 (match_operand:SI 4 "immediate_operand")]
4271 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4272 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4273 operands[2], operands[3],
4278 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4279 [(match_operand:<VWIDE> 0 "register_operand")
4280 (match_operand:<VWIDE> 1 "register_operand")
4281 (match_operand:VQ_HSI 2 "register_operand")
4282 (match_operand:<VCONQ> 3 "register_operand")
4283 (match_operand:SI 4 "immediate_operand")]
4286 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4287 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4288 operands[2], operands[3],
4293 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4294 [(match_operand:<VWIDE> 0 "register_operand")
4295 (match_operand:<VWIDE> 1 "register_operand")
4296 (match_operand:VQ_HSI 2 "register_operand")
4297 (match_operand:<VCOND> 3 "register_operand")
4298 (match_operand:SI 4 "immediate_operand")]
4301 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4302 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4303 operands[2], operands[3],
4308 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4309 [(match_operand:<VWIDE> 0 "register_operand")
4310 (match_operand:<VWIDE> 1 "register_operand")
4311 (match_operand:VQ_HSI 2 "register_operand")
4312 (match_operand:<VCONQ> 3 "register_operand")
4313 (match_operand:SI 4 "immediate_operand")]
4316 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4317 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4318 operands[2], operands[3],
4323 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4324 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4326 (match_operand:<VWIDE> 1 "register_operand" "0")
4329 (sign_extend:<VWIDE>
4331 (match_operand:VQ_HSI 2 "register_operand" "w")
4332 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4333 (sign_extend:<VWIDE>
4334 (vec_duplicate:<VHALF>
4335 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4338 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4339 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4342 (define_expand "aarch64_sqdmlal2_n<mode>"
4343 [(match_operand:<VWIDE> 0 "register_operand")
4344 (match_operand:<VWIDE> 1 "register_operand")
4345 (match_operand:VQ_HSI 2 "register_operand")
4346 (match_operand:<VEL> 3 "register_operand")]
4349 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4350 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4351 operands[2], operands[3],
4356 (define_expand "aarch64_sqdmlsl2_n<mode>"
4357 [(match_operand:<VWIDE> 0 "register_operand")
4358 (match_operand:<VWIDE> 1 "register_operand")
4359 (match_operand:VQ_HSI 2 "register_operand")
4360 (match_operand:<VEL> 3 "register_operand")]
4363 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4364 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4365 operands[2], operands[3],
4372 (define_insn "aarch64_sqdmull<mode>"
4373 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4376 (sign_extend:<VWIDE>
4377 (match_operand:VSD_HSI 1 "register_operand" "w"))
4378 (sign_extend:<VWIDE>
4379 (match_operand:VSD_HSI 2 "register_operand" "w")))
4382 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4383 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4388 (define_insn "aarch64_sqdmull_lane<mode>"
4389 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4392 (sign_extend:<VWIDE>
4393 (match_operand:VD_HSI 1 "register_operand" "w"))
4394 (sign_extend:<VWIDE>
4395 (vec_duplicate:VD_HSI
4397 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4398 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4403 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4404 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4406 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4409 (define_insn "aarch64_sqdmull_laneq<mode>"
4410 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4413 (sign_extend:<VWIDE>
4414 (match_operand:VD_HSI 1 "register_operand" "w"))
4415 (sign_extend:<VWIDE>
4416 (vec_duplicate:VD_HSI
4418 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4419 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4424 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4425 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4427 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4430 (define_insn "aarch64_sqdmull_lane<mode>"
4431 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4434 (sign_extend:<VWIDE>
4435 (match_operand:SD_HSI 1 "register_operand" "w"))
4436 (sign_extend:<VWIDE>
4438 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4439 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4444 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4445 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4447 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4450 (define_insn "aarch64_sqdmull_laneq<mode>"
4451 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4454 (sign_extend:<VWIDE>
4455 (match_operand:SD_HSI 1 "register_operand" "w"))
4456 (sign_extend:<VWIDE>
4458 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4459 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4464 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4465 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4467 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4472 (define_insn "aarch64_sqdmull_n<mode>"
4473 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4476 (sign_extend:<VWIDE>
4477 (match_operand:VD_HSI 1 "register_operand" "w"))
4478 (sign_extend:<VWIDE>
4479 (vec_duplicate:VD_HSI
4480 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4484 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4485 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4492 (define_insn "aarch64_sqdmull2<mode>_internal"
4493 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4496 (sign_extend:<VWIDE>
4498 (match_operand:VQ_HSI 1 "register_operand" "w")
4499 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4500 (sign_extend:<VWIDE>
4502 (match_operand:VQ_HSI 2 "register_operand" "w")
4507 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4508 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4511 (define_expand "aarch64_sqdmull2<mode>"
4512 [(match_operand:<VWIDE> 0 "register_operand")
4513 (match_operand:VQ_HSI 1 "register_operand")
4514 (match_operand:VQ_HSI 2 "register_operand")]
4517 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4518 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4525 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4526 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4529 (sign_extend:<VWIDE>
4531 (match_operand:VQ_HSI 1 "register_operand" "w")
4532 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4533 (sign_extend:<VWIDE>
4534 (vec_duplicate:<VHALF>
4536 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4537 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4542 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4543 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4545 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4548 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4549 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4552 (sign_extend:<VWIDE>
4554 (match_operand:VQ_HSI 1 "register_operand" "w")
4555 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4556 (sign_extend:<VWIDE>
4557 (vec_duplicate:<VHALF>
4559 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4560 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4565 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4566 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4568 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4571 (define_expand "aarch64_sqdmull2_lane<mode>"
4572 [(match_operand:<VWIDE> 0 "register_operand")
4573 (match_operand:VQ_HSI 1 "register_operand")
4574 (match_operand:<VCOND> 2 "register_operand")
4575 (match_operand:SI 3 "immediate_operand")]
4578 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4579 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4580 operands[2], operands[3],
4585 (define_expand "aarch64_sqdmull2_laneq<mode>"
4586 [(match_operand:<VWIDE> 0 "register_operand")
4587 (match_operand:VQ_HSI 1 "register_operand")
4588 (match_operand:<VCONQ> 2 "register_operand")
4589 (match_operand:SI 3 "immediate_operand")]
4592 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4593 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4594 operands[2], operands[3],
4601 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4602 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4605 (sign_extend:<VWIDE>
4607 (match_operand:VQ_HSI 1 "register_operand" "w")
4608 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4609 (sign_extend:<VWIDE>
4610 (vec_duplicate:<VHALF>
4611 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4615 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4616 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4619 (define_expand "aarch64_sqdmull2_n<mode>"
4620 [(match_operand:<VWIDE> 0 "register_operand")
4621 (match_operand:VQ_HSI 1 "register_operand")
4622 (match_operand:<VEL> 2 "register_operand")]
4625 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4626 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4633 (define_insn "aarch64_<sur>shl<mode>"
4634 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4636 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4637 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4640 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4641 [(set_attr "type" "neon_shift_reg<q>")]
4647 (define_insn "aarch64_<sur>q<r>shl<mode>"
4648 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4650 [(match_operand:VSDQ_I 1 "register_operand" "w")
4651 (match_operand:VSDQ_I 2 "register_operand" "w")]
4654 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4655 [(set_attr "type" "neon_sat_shift_reg<q>")]
4660 (define_insn "aarch64_<sur>shll_n<mode>"
4661 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4662 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4664 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4668 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4669 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4671 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4673 [(set_attr "type" "neon_shift_imm_long")]
4678 (define_insn "aarch64_<sur>shll2_n<mode>"
4679 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4680 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4681 (match_operand:SI 2 "immediate_operand" "i")]
4685 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4686 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4688 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4690 [(set_attr "type" "neon_shift_imm_long")]
4695 (define_insn "aarch64_<sur>shr_n<mode>"
4696 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4697 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4699 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4702 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4703 [(set_attr "type" "neon_sat_shift_imm<q>")]
4708 (define_insn "aarch64_<sur>sra_n<mode>"
4709 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4710 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4711 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4713 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4716 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4717 [(set_attr "type" "neon_shift_acc<q>")]
4722 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4723 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4724 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4725 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4727 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4730 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4731 [(set_attr "type" "neon_shift_imm<q>")]
4736 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4737 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4738 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4740 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4743 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4744 [(set_attr "type" "neon_sat_shift_imm<q>")]
4750 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4751 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4752 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4754 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4757 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4758 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4762 ;; cm(eq|ge|gt|lt|le)
4763 ;; Note, we have constraints for Dz and Z as different expanders
4764 ;; have different ideas of what should be passed to this pattern.
4766 (define_insn "aarch64_cm<optab><mode>"
4767 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4769 (COMPARISONS:<V_INT_EQUIV>
4770 (match_operand:VDQ_I 1 "register_operand" "w,w")
4771 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4775 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4776 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4777 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4780 (define_insn_and_split "aarch64_cm<optab>di"
4781 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4784 (match_operand:DI 1 "register_operand" "w,w,r")
4785 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4787 (clobber (reg:CC CC_REGNUM))]
4790 "&& reload_completed"
4791 [(set (match_operand:DI 0 "register_operand")
4794 (match_operand:DI 1 "register_operand")
4795 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4798 /* If we are in the general purpose register file,
4799 we split to a sequence of comparison and store. */
4800 if (GP_REGNUM_P (REGNO (operands[0]))
4801 && GP_REGNUM_P (REGNO (operands[1])))
4803 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4804 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4805 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4806 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4809 /* Otherwise, we expand to a similar pattern which does not
4810 clobber CC_REGNUM. */
4812 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4815 (define_insn "*aarch64_cm<optab>di"
4816 [(set (match_operand:DI 0 "register_operand" "=w,w")
4819 (match_operand:DI 1 "register_operand" "w,w")
4820 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4822 "TARGET_SIMD && reload_completed"
4824 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4825 cm<optab>\t%d0, %d1, #0"
4826 [(set_attr "type" "neon_compare, neon_compare_zero")]
4831 (define_insn "aarch64_cm<optab><mode>"
4832 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4834 (UCOMPARISONS:<V_INT_EQUIV>
4835 (match_operand:VDQ_I 1 "register_operand" "w")
4836 (match_operand:VDQ_I 2 "register_operand" "w")
4839 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4840 [(set_attr "type" "neon_compare<q>")]
4843 (define_insn_and_split "aarch64_cm<optab>di"
4844 [(set (match_operand:DI 0 "register_operand" "=w,r")
4847 (match_operand:DI 1 "register_operand" "w,r")
4848 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4850 (clobber (reg:CC CC_REGNUM))]
4853 "&& reload_completed"
4854 [(set (match_operand:DI 0 "register_operand")
4857 (match_operand:DI 1 "register_operand")
4858 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4861 /* If we are in the general purpose register file,
4862 we split to a sequence of comparison and store. */
4863 if (GP_REGNUM_P (REGNO (operands[0]))
4864 && GP_REGNUM_P (REGNO (operands[1])))
4866 machine_mode mode = CCmode;
4867 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4868 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4869 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4872 /* Otherwise, we expand to a similar pattern which does not
4873 clobber CC_REGNUM. */
4875 [(set_attr "type" "neon_compare,multiple")]
4878 (define_insn "*aarch64_cm<optab>di"
4879 [(set (match_operand:DI 0 "register_operand" "=w")
4882 (match_operand:DI 1 "register_operand" "w")
4883 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4885 "TARGET_SIMD && reload_completed"
4886 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4887 [(set_attr "type" "neon_compare")]
4892 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4893 ;; we don't have any insns using ne, and aarch64_vcond outputs
4894 ;; not (neg (eq (and x y) 0))
4895 ;; which is rewritten by simplify_rtx as
4896 ;; plus (eq (and x y) 0) -1.
4898 (define_insn "aarch64_cmtst<mode>"
4899 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4903 (match_operand:VDQ_I 1 "register_operand" "w")
4904 (match_operand:VDQ_I 2 "register_operand" "w"))
4905 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4906 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4909 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4910 [(set_attr "type" "neon_tst<q>")]
4913 (define_insn_and_split "aarch64_cmtstdi"
4914 [(set (match_operand:DI 0 "register_operand" "=w,r")
4918 (match_operand:DI 1 "register_operand" "w,r")
4919 (match_operand:DI 2 "register_operand" "w,r"))
4921 (clobber (reg:CC CC_REGNUM))]
4924 "&& reload_completed"
4925 [(set (match_operand:DI 0 "register_operand")
4929 (match_operand:DI 1 "register_operand")
4930 (match_operand:DI 2 "register_operand"))
4933 /* If we are in the general purpose register file,
4934 we split to a sequence of comparison and store. */
4935 if (GP_REGNUM_P (REGNO (operands[0]))
4936 && GP_REGNUM_P (REGNO (operands[1])))
4938 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4939 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4940 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4941 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4942 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4945 /* Otherwise, we expand to a similar pattern which does not
4946 clobber CC_REGNUM. */
4948 [(set_attr "type" "neon_tst,multiple")]
4951 (define_insn "*aarch64_cmtstdi"
4952 [(set (match_operand:DI 0 "register_operand" "=w")
4956 (match_operand:DI 1 "register_operand" "w")
4957 (match_operand:DI 2 "register_operand" "w"))
4960 "cmtst\t%d0, %d1, %d2"
4961 [(set_attr "type" "neon_tst")]
4964 ;; fcm(eq|ge|gt|le|lt)
4966 (define_insn "aarch64_cm<optab><mode>"
4967 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4969 (COMPARISONS:<V_INT_EQUIV>
4970 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4971 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4975 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4976 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4977 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4981 ;; Note we can also handle what would be fac(le|lt) by
4982 ;; generating fac(ge|gt).
4984 (define_insn "aarch64_fac<optab><mode>"
4985 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4987 (FAC_COMPARISONS:<V_INT_EQUIV>
4989 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4991 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4994 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4995 [(set_attr "type" "neon_fp_compare_<stype><q>")]
5000 (define_insn "aarch64_addp<mode>"
5001 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
5003 [(match_operand:VD_BHSI 1 "register_operand" "w")
5004 (match_operand:VD_BHSI 2 "register_operand" "w")]
5007 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5008 [(set_attr "type" "neon_reduc_add<q>")]
5011 (define_insn "aarch64_addpdi"
5012 [(set (match_operand:DI 0 "register_operand" "=w")
5014 [(match_operand:V2DI 1 "register_operand" "w")]
5018 [(set_attr "type" "neon_reduc_add")]
5023 (define_expand "sqrt<mode>2"
5024 [(set (match_operand:VHSDF 0 "register_operand")
5025 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
5028 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
5032 (define_insn "*sqrt<mode>2"
5033 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5034 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
5036 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
5037 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
5040 ;; Patterns for vector struct loads and stores.
5042 (define_insn "aarch64_simd_ld2<mode>"
5043 [(set (match_operand:OI 0 "register_operand" "=w")
5044 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5045 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5048 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5049 [(set_attr "type" "neon_load2_2reg<q>")]
5052 (define_insn "aarch64_simd_ld2r<mode>"
5053 [(set (match_operand:OI 0 "register_operand" "=w")
5054 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5055 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5058 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5059 [(set_attr "type" "neon_load2_all_lanes<q>")]
5062 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
5063 [(set (match_operand:OI 0 "register_operand" "=w")
5064 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5065 (match_operand:OI 2 "register_operand" "0")
5066 (match_operand:SI 3 "immediate_operand" "i")
5067 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5071 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5072 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
5074 [(set_attr "type" "neon_load2_one_lane")]
5077 (define_expand "vec_load_lanesoi<mode>"
5078 [(set (match_operand:OI 0 "register_operand")
5079 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
5080 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5084 if (BYTES_BIG_ENDIAN)
5086 rtx tmp = gen_reg_rtx (OImode);
5087 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5088 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
5089 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
5092 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
5096 (define_insn "aarch64_simd_st2<mode>"
5097 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5098 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
5099 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5102 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5103 [(set_attr "type" "neon_store2_2reg<q>")]
5106 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5107 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
5108 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5109 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5110 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5111 (match_operand:SI 2 "immediate_operand" "i")]
5115 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5116 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5118 [(set_attr "type" "neon_store2_one_lane<q>")]
5121 (define_expand "vec_store_lanesoi<mode>"
5122 [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5123 (unspec:OI [(match_operand:OI 1 "register_operand")
5124 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5128 if (BYTES_BIG_ENDIAN)
5130 rtx tmp = gen_reg_rtx (OImode);
5131 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5132 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5133 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5136 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5140 (define_insn "aarch64_simd_ld3<mode>"
5141 [(set (match_operand:CI 0 "register_operand" "=w")
5142 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5143 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5146 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5147 [(set_attr "type" "neon_load3_3reg<q>")]
5150 (define_insn "aarch64_simd_ld3r<mode>"
5151 [(set (match_operand:CI 0 "register_operand" "=w")
5152 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5153 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5156 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5157 [(set_attr "type" "neon_load3_all_lanes<q>")]
5160 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5161 [(set (match_operand:CI 0 "register_operand" "=w")
5162 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5163 (match_operand:CI 2 "register_operand" "0")
5164 (match_operand:SI 3 "immediate_operand" "i")
5165 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5169 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5170 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5172 [(set_attr "type" "neon_load3_one_lane")]
5175 (define_expand "vec_load_lanesci<mode>"
5176 [(set (match_operand:CI 0 "register_operand")
5177 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5178 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5182 if (BYTES_BIG_ENDIAN)
5184 rtx tmp = gen_reg_rtx (CImode);
5185 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5186 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5187 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5190 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5194 (define_insn "aarch64_simd_st3<mode>"
5195 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5196 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5197 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5200 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5201 [(set_attr "type" "neon_store3_3reg<q>")]
5204 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5205 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5206 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5207 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5208 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5209 (match_operand:SI 2 "immediate_operand" "i")]
5213 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5214 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5216 [(set_attr "type" "neon_store3_one_lane<q>")]
5219 (define_expand "vec_store_lanesci<mode>"
5220 [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5221 (unspec:CI [(match_operand:CI 1 "register_operand")
5222 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5226 if (BYTES_BIG_ENDIAN)
5228 rtx tmp = gen_reg_rtx (CImode);
5229 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5230 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5231 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5234 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5238 (define_insn "aarch64_simd_ld4<mode>"
5239 [(set (match_operand:XI 0 "register_operand" "=w")
5240 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5241 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5244 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5245 [(set_attr "type" "neon_load4_4reg<q>")]
5248 (define_insn "aarch64_simd_ld4r<mode>"
5249 [(set (match_operand:XI 0 "register_operand" "=w")
5250 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5251 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5254 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5255 [(set_attr "type" "neon_load4_all_lanes<q>")]
5258 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5259 [(set (match_operand:XI 0 "register_operand" "=w")
5260 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5261 (match_operand:XI 2 "register_operand" "0")
5262 (match_operand:SI 3 "immediate_operand" "i")
5263 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5267 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5268 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5270 [(set_attr "type" "neon_load4_one_lane")]
5273 (define_expand "vec_load_lanesxi<mode>"
5274 [(set (match_operand:XI 0 "register_operand")
5275 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5276 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5280 if (BYTES_BIG_ENDIAN)
5282 rtx tmp = gen_reg_rtx (XImode);
5283 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5284 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5285 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5288 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5292 (define_insn "aarch64_simd_st4<mode>"
5293 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5294 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5295 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5298 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5299 [(set_attr "type" "neon_store4_4reg<q>")]
5302 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5303 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5304 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5305 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5306 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5307 (match_operand:SI 2 "immediate_operand" "i")]
5311 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5312 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5314 [(set_attr "type" "neon_store4_one_lane<q>")]
5317 (define_expand "vec_store_lanesxi<mode>"
5318 [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5319 (unspec:XI [(match_operand:XI 1 "register_operand")
5320 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5324 if (BYTES_BIG_ENDIAN)
5326 rtx tmp = gen_reg_rtx (XImode);
5327 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5328 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5329 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5332 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5336 (define_insn_and_split "aarch64_rev_reglist<mode>"
5337 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5339 [(match_operand:VSTRUCT 1 "register_operand" "w")
5340 (match_operand:V16QI 2 "register_operand" "w")]
5341 UNSPEC_REV_REGLIST))]
5344 "&& reload_completed"
5348 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5349 for (i = 0; i < nregs; i++)
5351 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5352 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5353 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5357 [(set_attr "type" "neon_tbl1_q")
5358 (set_attr "length" "<insn_count>")]
5361 ;; Reload patterns for AdvSIMD register list operands.
5363 (define_expand "mov<mode>"
5364 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5365 (match_operand:VSTRUCT 1 "general_operand"))]
5368 if (can_create_pseudo_p ())
5370 if (GET_CODE (operands[0]) != REG)
5371 operands[1] = force_reg (<MODE>mode, operands[1]);
5376 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5377 [(match_operand:CI 0 "register_operand")
5378 (match_operand:DI 1 "register_operand")
5379 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5382 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5383 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5387 (define_insn "aarch64_ld1_x3_<mode>"
5388 [(set (match_operand:CI 0 "register_operand" "=w")
5390 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5391 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5393 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5394 [(set_attr "type" "neon_load1_3reg<q>")]
5397 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5398 [(match_operand:XI 0 "register_operand" "=w")
5399 (match_operand:DI 1 "register_operand" "r")
5400 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5403 rtx mem = gen_rtx_MEM (XImode, operands[1]);
5404 emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5408 (define_insn "aarch64_ld1_x4_<mode>"
5409 [(set (match_operand:XI 0 "register_operand" "=w")
5411 [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5412 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5415 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5416 [(set_attr "type" "neon_load1_4reg<q>")]
5419 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5420 [(match_operand:DI 0 "register_operand")
5421 (match_operand:OI 1 "register_operand")
5422 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5425 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5426 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5430 (define_insn "aarch64_st1_x2_<mode>"
5431 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5433 [(match_operand:OI 1 "register_operand" "w")
5434 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5436 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5437 [(set_attr "type" "neon_store1_2reg<q>")]
5440 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5441 [(match_operand:DI 0 "register_operand")
5442 (match_operand:CI 1 "register_operand")
5443 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5446 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5447 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5451 (define_insn "aarch64_st1_x3_<mode>"
5452 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5454 [(match_operand:CI 1 "register_operand" "w")
5455 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5457 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5458 [(set_attr "type" "neon_store1_3reg<q>")]
5461 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5462 [(match_operand:DI 0 "register_operand" "")
5463 (match_operand:XI 1 "register_operand" "")
5464 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5467 rtx mem = gen_rtx_MEM (XImode, operands[0]);
5468 emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5472 (define_insn "aarch64_st1_x4_<mode>"
5473 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5475 [(match_operand:XI 1 "register_operand" "w")
5476 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5479 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5480 [(set_attr "type" "neon_store1_4reg<q>")]
5483 (define_insn "*aarch64_mov<mode>"
5484 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5485 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5486 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5487 && (register_operand (operands[0], <MODE>mode)
5488 || register_operand (operands[1], <MODE>mode))"
5491 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5492 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5493 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5494 neon_load<nregs>_<nregs>reg_q")
5495 (set_attr "length" "<insn_count>,4,4")]
5498 (define_insn "aarch64_be_ld1<mode>"
5499 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5500 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5501 "aarch64_simd_struct_operand" "Utv")]
5504 "ld1\\t{%0<Vmtype>}, %1"
5505 [(set_attr "type" "neon_load1_1reg<q>")]
5508 (define_insn "aarch64_be_st1<mode>"
5509 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5510 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5513 "st1\\t{%1<Vmtype>}, %0"
5514 [(set_attr "type" "neon_store1_1reg<q>")]
5517 (define_insn "*aarch64_be_movoi"
5518 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5519 (match_operand:OI 1 "general_operand" " w,w,m"))]
5520 "TARGET_SIMD && BYTES_BIG_ENDIAN
5521 && (register_operand (operands[0], OImode)
5522 || register_operand (operands[1], OImode))"
5527 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5528 (set_attr "length" "8,4,4")]
5531 (define_insn "*aarch64_be_movci"
5532 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5533 (match_operand:CI 1 "general_operand" " w,w,o"))]
5534 "TARGET_SIMD && BYTES_BIG_ENDIAN
5535 && (register_operand (operands[0], CImode)
5536 || register_operand (operands[1], CImode))"
5538 [(set_attr "type" "multiple")
5539 (set_attr "length" "12,4,4")]
5542 (define_insn "*aarch64_be_movxi"
5543 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5544 (match_operand:XI 1 "general_operand" " w,w,o"))]
5545 "TARGET_SIMD && BYTES_BIG_ENDIAN
5546 && (register_operand (operands[0], XImode)
5547 || register_operand (operands[1], XImode))"
5549 [(set_attr "type" "multiple")
5550 (set_attr "length" "16,4,4")]
5554 [(set (match_operand:OI 0 "register_operand")
5555 (match_operand:OI 1 "register_operand"))]
5556 "TARGET_SIMD && reload_completed"
5559 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5564 [(set (match_operand:CI 0 "nonimmediate_operand")
5565 (match_operand:CI 1 "general_operand"))]
5566 "TARGET_SIMD && reload_completed"
5569 if (register_operand (operands[0], CImode)
5570 && register_operand (operands[1], CImode))
5572 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5575 else if (BYTES_BIG_ENDIAN)
5577 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5578 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5579 emit_move_insn (gen_lowpart (V16QImode,
5580 simplify_gen_subreg (TImode, operands[0],
5582 gen_lowpart (V16QImode,
5583 simplify_gen_subreg (TImode, operands[1],
5592 [(set (match_operand:XI 0 "nonimmediate_operand")
5593 (match_operand:XI 1 "general_operand"))]
5594 "TARGET_SIMD && reload_completed"
5597 if (register_operand (operands[0], XImode)
5598 && register_operand (operands[1], XImode))
5600 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5603 else if (BYTES_BIG_ENDIAN)
5605 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5606 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5607 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5608 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5615 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5616 [(match_operand:VSTRUCT 0 "register_operand")
5617 (match_operand:DI 1 "register_operand")
5618 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5621 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5622 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5625 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5630 (define_insn "aarch64_ld2<mode>_dreg"
5631 [(set (match_operand:OI 0 "register_operand" "=w")
5632 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5633 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5636 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5637 [(set_attr "type" "neon_load2_2reg<q>")]
5640 (define_insn "aarch64_ld2<mode>_dreg"
5641 [(set (match_operand:OI 0 "register_operand" "=w")
5642 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5643 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5646 "ld1\\t{%S0.1d - %T0.1d}, %1"
5647 [(set_attr "type" "neon_load1_2reg<q>")]
5650 (define_insn "aarch64_ld3<mode>_dreg"
5651 [(set (match_operand:CI 0 "register_operand" "=w")
5652 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5653 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5656 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5657 [(set_attr "type" "neon_load3_3reg<q>")]
5660 (define_insn "aarch64_ld3<mode>_dreg"
5661 [(set (match_operand:CI 0 "register_operand" "=w")
5662 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5663 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5666 "ld1\\t{%S0.1d - %U0.1d}, %1"
5667 [(set_attr "type" "neon_load1_3reg<q>")]
5670 (define_insn "aarch64_ld4<mode>_dreg"
5671 [(set (match_operand:XI 0 "register_operand" "=w")
5672 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5673 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5676 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5677 [(set_attr "type" "neon_load4_4reg<q>")]
5680 (define_insn "aarch64_ld4<mode>_dreg"
5681 [(set (match_operand:XI 0 "register_operand" "=w")
5682 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5683 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5686 "ld1\\t{%S0.1d - %V0.1d}, %1"
5687 [(set_attr "type" "neon_load1_4reg<q>")]
5690 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5691 [(match_operand:VSTRUCT 0 "register_operand")
5692 (match_operand:DI 1 "register_operand")
5693 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5696 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5697 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5699 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5703 (define_expand "aarch64_ld1<VALL_F16:mode>"
5704 [(match_operand:VALL_F16 0 "register_operand")
5705 (match_operand:DI 1 "register_operand")]
5708 machine_mode mode = <VALL_F16:MODE>mode;
5709 rtx mem = gen_rtx_MEM (mode, operands[1]);
5711 if (BYTES_BIG_ENDIAN)
5712 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5714 emit_move_insn (operands[0], mem);
5718 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5719 [(match_operand:VSTRUCT 0 "register_operand")
5720 (match_operand:DI 1 "register_operand")
5721 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5724 machine_mode mode = <VSTRUCT:MODE>mode;
5725 rtx mem = gen_rtx_MEM (mode, operands[1]);
5727 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5731 (define_expand "aarch64_ld1x2<VQ:mode>"
5732 [(match_operand:OI 0 "register_operand")
5733 (match_operand:DI 1 "register_operand")
5734 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5737 machine_mode mode = OImode;
5738 rtx mem = gen_rtx_MEM (mode, operands[1]);
5740 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5744 (define_expand "aarch64_ld1x2<VDC:mode>"
5745 [(match_operand:OI 0 "register_operand")
5746 (match_operand:DI 1 "register_operand")
5747 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5750 machine_mode mode = OImode;
5751 rtx mem = gen_rtx_MEM (mode, operands[1]);
5753 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5758 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5759 [(match_operand:VSTRUCT 0 "register_operand")
5760 (match_operand:DI 1 "register_operand")
5761 (match_operand:VSTRUCT 2 "register_operand")
5762 (match_operand:SI 3 "immediate_operand")
5763 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5766 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5767 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5770 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5771 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5772 operands[0], mem, operands[2], operands[3]));
5776 ;; Expanders for builtins to extract vector registers from large
5777 ;; opaque integer modes.
5781 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5782 [(match_operand:VDC 0 "register_operand")
5783 (match_operand:VSTRUCT 1 "register_operand")
5784 (match_operand:SI 2 "immediate_operand")]
5787 int part = INTVAL (operands[2]);
5788 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5789 int offset = part * 16;
5791 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5792 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5798 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5799 [(match_operand:VQ 0 "register_operand")
5800 (match_operand:VSTRUCT 1 "register_operand")
5801 (match_operand:SI 2 "immediate_operand")]
5804 int part = INTVAL (operands[2]);
5805 int offset = part * 16;
5807 emit_move_insn (operands[0],
5808 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5812 ;; Permuted-store expanders for neon intrinsics.
5814 ;; Permute instructions
5818 (define_expand "vec_perm<mode>"
5819 [(match_operand:VB 0 "register_operand")
5820 (match_operand:VB 1 "register_operand")
5821 (match_operand:VB 2 "register_operand")
5822 (match_operand:VB 3 "register_operand")]
5825 aarch64_expand_vec_perm (operands[0], operands[1],
5826 operands[2], operands[3], <nunits>);
5830 (define_insn "aarch64_tbl1<mode>"
5831 [(set (match_operand:VB 0 "register_operand" "=w")
5832 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5833 (match_operand:VB 2 "register_operand" "w")]
5836 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5837 [(set_attr "type" "neon_tbl1<q>")]
5840 ;; Two source registers.
5842 (define_insn "aarch64_tbl2v16qi"
5843 [(set (match_operand:V16QI 0 "register_operand" "=w")
5844 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5845 (match_operand:V16QI 2 "register_operand" "w")]
5848 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5849 [(set_attr "type" "neon_tbl2_q")]
5852 (define_insn "aarch64_tbl3<mode>"
5853 [(set (match_operand:VB 0 "register_operand" "=w")
5854 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5855 (match_operand:VB 2 "register_operand" "w")]
5858 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5859 [(set_attr "type" "neon_tbl3")]
5862 (define_insn "aarch64_tbx4<mode>"
5863 [(set (match_operand:VB 0 "register_operand" "=w")
5864 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5865 (match_operand:OI 2 "register_operand" "w")
5866 (match_operand:VB 3 "register_operand" "w")]
5869 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5870 [(set_attr "type" "neon_tbl4")]
5873 ;; Three source registers.
5875 (define_insn "aarch64_qtbl3<mode>"
5876 [(set (match_operand:VB 0 "register_operand" "=w")
5877 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5878 (match_operand:VB 2 "register_operand" "w")]
5881 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5882 [(set_attr "type" "neon_tbl3")]
5885 (define_insn "aarch64_qtbx3<mode>"
5886 [(set (match_operand:VB 0 "register_operand" "=w")
5887 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5888 (match_operand:CI 2 "register_operand" "w")
5889 (match_operand:VB 3 "register_operand" "w")]
5892 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5893 [(set_attr "type" "neon_tbl3")]
5896 ;; Four source registers.
5898 (define_insn "aarch64_qtbl4<mode>"
5899 [(set (match_operand:VB 0 "register_operand" "=w")
5900 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5901 (match_operand:VB 2 "register_operand" "w")]
5904 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5905 [(set_attr "type" "neon_tbl4")]
5908 (define_insn "aarch64_qtbx4<mode>"
5909 [(set (match_operand:VB 0 "register_operand" "=w")
5910 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5911 (match_operand:XI 2 "register_operand" "w")
5912 (match_operand:VB 3 "register_operand" "w")]
5915 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5916 [(set_attr "type" "neon_tbl4")]
5919 (define_insn_and_split "aarch64_combinev16qi"
5920 [(set (match_operand:OI 0 "register_operand" "=w")
5921 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5922 (match_operand:V16QI 2 "register_operand" "w")]
5926 "&& reload_completed"
5929 aarch64_split_combinev16qi (operands);
5932 [(set_attr "type" "multiple")]
5935 ;; This instruction's pattern is generated directly by
5936 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5937 ;; need corresponding changes there.
5938 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
5939 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5940 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5941 (match_operand:VALL_F16 2 "register_operand" "w")]
5944 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5945 [(set_attr "type" "neon_permute<q>")]
5948 ;; This instruction's pattern is generated directly by
5949 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5950 ;; need corresponding changes there. Note that the immediate (third)
5951 ;; operand is a lane index not a byte index.
5952 (define_insn "aarch64_ext<mode>"
5953 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5954 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5955 (match_operand:VALL_F16 2 "register_operand" "w")
5956 (match_operand:SI 3 "immediate_operand" "i")]
5960 operands[3] = GEN_INT (INTVAL (operands[3])
5961 * GET_MODE_UNIT_SIZE (<MODE>mode));
5962 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5964 [(set_attr "type" "neon_ext<q>")]
5967 ;; This instruction's pattern is generated directly by
5968 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5969 ;; need corresponding changes there.
5970 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5971 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5972 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5975 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5976 [(set_attr "type" "neon_rev<q>")]
5979 (define_insn "aarch64_st2<mode>_dreg"
5980 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5981 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5982 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5985 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5986 [(set_attr "type" "neon_store2_2reg")]
5989 (define_insn "aarch64_st2<mode>_dreg"
5990 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5991 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5992 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5995 "st1\\t{%S1.1d - %T1.1d}, %0"
5996 [(set_attr "type" "neon_store1_2reg")]
5999 (define_insn "aarch64_st3<mode>_dreg"
6000 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6001 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6002 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6005 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6006 [(set_attr "type" "neon_store3_3reg")]
6009 (define_insn "aarch64_st3<mode>_dreg"
6010 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6011 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
6012 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6015 "st1\\t{%S1.1d - %U1.1d}, %0"
6016 [(set_attr "type" "neon_store1_3reg")]
6019 (define_insn "aarch64_st4<mode>_dreg"
6020 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6021 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6022 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6025 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
6026 [(set_attr "type" "neon_store4_4reg")]
6029 (define_insn "aarch64_st4<mode>_dreg"
6030 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6031 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
6032 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6035 "st1\\t{%S1.1d - %V1.1d}, %0"
6036 [(set_attr "type" "neon_store1_4reg")]
6039 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
6040 [(match_operand:DI 0 "register_operand")
6041 (match_operand:VSTRUCT 1 "register_operand")
6042 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6045 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6046 set_mem_size (mem, <VSTRUCT:nregs> * 8);
6048 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
6052 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
6053 [(match_operand:DI 0 "register_operand")
6054 (match_operand:VSTRUCT 1 "register_operand")
6055 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6058 machine_mode mode = <VSTRUCT:MODE>mode;
6059 rtx mem = gen_rtx_MEM (mode, operands[0]);
6061 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
6065 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6066 [(match_operand:DI 0 "register_operand")
6067 (match_operand:VSTRUCT 1 "register_operand")
6068 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
6069 (match_operand:SI 2 "immediate_operand")]
6072 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6073 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6076 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6077 mem, operands[1], operands[2]));
6081 (define_expand "aarch64_st1<VALL_F16:mode>"
6082 [(match_operand:DI 0 "register_operand")
6083 (match_operand:VALL_F16 1 "register_operand")]
6086 machine_mode mode = <VALL_F16:MODE>mode;
6087 rtx mem = gen_rtx_MEM (mode, operands[0]);
6089 if (BYTES_BIG_ENDIAN)
6090 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
6092 emit_move_insn (mem, operands[1]);
6096 ;; Expander for builtins to insert vector registers into large
6097 ;; opaque integer modes.
6099 ;; Q-register list. We don't need a D-reg inserter as we zero
6100 ;; extend them in arm_neon.h and insert the resulting Q-regs.
6102 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
6103 [(match_operand:VSTRUCT 0 "register_operand")
6104 (match_operand:VSTRUCT 1 "register_operand")
6105 (match_operand:VQ 2 "register_operand")
6106 (match_operand:SI 3 "immediate_operand")]
6109 int part = INTVAL (operands[3]);
6110 int offset = part * 16;
6112 emit_move_insn (operands[0], operands[1]);
6113 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
6118 ;; Standard pattern name vec_init<mode><Vel>.
6120 (define_expand "vec_init<mode><Vel>"
6121 [(match_operand:VALL_F16 0 "register_operand")
6122 (match_operand 1 "" "")]
6125 aarch64_expand_vector_init (operands[0], operands[1]);
6129 (define_expand "vec_init<mode><Vhalf>"
6130 [(match_operand:VQ_NO2E 0 "register_operand")
6131 (match_operand 1 "" "")]
6134 aarch64_expand_vector_init (operands[0], operands[1]);
6138 (define_insn "*aarch64_simd_ld1r<mode>"
6139 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6140 (vec_duplicate:VALL_F16
6141 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
6143 "ld1r\\t{%0.<Vtype>}, %1"
6144 [(set_attr "type" "neon_load1_all_lanes")]
6147 (define_insn "aarch64_simd_ld1<mode>_x2"
6148 [(set (match_operand:OI 0 "register_operand" "=w")
6149 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6150 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6153 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6154 [(set_attr "type" "neon_load1_2reg<q>")]
6157 (define_insn "aarch64_simd_ld1<mode>_x2"
6158 [(set (match_operand:OI 0 "register_operand" "=w")
6159 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6160 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6163 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6164 [(set_attr "type" "neon_load1_2reg<q>")]
6168 (define_insn "@aarch64_frecpe<mode>"
6169 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6171 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6174 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6175 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6178 (define_insn "aarch64_frecpx<mode>"
6179 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6180 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6183 "frecpx\t%<s>0, %<s>1"
6184 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6187 (define_insn "@aarch64_frecps<mode>"
6188 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6190 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6191 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6194 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6195 [(set_attr "type" "neon_fp_recps_<stype><q>")]
6198 (define_insn "aarch64_urecpe<mode>"
6199 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6200 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6203 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6204 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6206 ;; Standard pattern name vec_extract<mode><Vel>.
6208 (define_expand "vec_extract<mode><Vel>"
6209 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6210 (match_operand:VALL_F16 1 "register_operand")
6211 (match_operand:SI 2 "immediate_operand")]
6215 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6219 ;; Extract a 64-bit vector from one half of a 128-bit vector.
6220 (define_expand "vec_extract<mode><Vhalf>"
6221 [(match_operand:<VHALF> 0 "register_operand")
6222 (match_operand:VQMOV_NO2E 1 "register_operand")
6223 (match_operand 2 "immediate_operand")]
6226 int start = INTVAL (operands[2]);
6227 if (start != 0 && start != <nunits> / 2)
6229 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
6230 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
6234 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
6235 (define_expand "vec_extractv2dfv1df"
6236 [(match_operand:V1DF 0 "register_operand")
6237 (match_operand:V2DF 1 "register_operand")
6238 (match_operand 2 "immediate_operand")]
6241 /* V1DF is rarely used by other patterns, so it should be better to hide
6242 it in a subreg destination of a normal DF op. */
6243 rtx scalar0 = gen_lowpart (DFmode, operands[0]);
6244 emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
6250 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6251 [(set (match_operand:V16QI 0 "register_operand" "=w")
6254 (match_operand:V16QI 1 "register_operand" "%0")
6255 (match_operand:V16QI 2 "register_operand" "w"))]
6257 "TARGET_SIMD && TARGET_AES"
6258 "aes<aes_op>\\t%0.16b, %2.16b"
6259 [(set_attr "type" "crypto_aese")]
6262 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6263 [(set (match_operand:V16QI 0 "register_operand" "=w")
6264 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6266 "TARGET_SIMD && TARGET_AES"
6267 "aes<aesmc_op>\\t%0.16b, %1.16b"
6268 [(set_attr "type" "crypto_aesmc")]
6271 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6272 ;; and enforce the register dependency without scheduling or register
6273 ;; allocation messing up the order or introducing moves inbetween.
6274 ;; Mash the two together during combine.
6276 (define_insn "*aarch64_crypto_aese_fused"
6277 [(set (match_operand:V16QI 0 "register_operand" "=w")
6281 (match_operand:V16QI 1 "register_operand" "%0")
6282 (match_operand:V16QI 2 "register_operand" "w"))]
6285 "TARGET_SIMD && TARGET_AES
6286 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6287 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6288 [(set_attr "type" "crypto_aese")
6289 (set_attr "length" "8")]
6292 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6293 ;; and enforce the register dependency without scheduling or register
6294 ;; allocation messing up the order or introducing moves inbetween.
6295 ;; Mash the two together during combine.
6297 (define_insn "*aarch64_crypto_aesd_fused"
6298 [(set (match_operand:V16QI 0 "register_operand" "=w")
6302 (match_operand:V16QI 1 "register_operand" "%0")
6303 (match_operand:V16QI 2 "register_operand" "w"))]
6306 "TARGET_SIMD && TARGET_AES
6307 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6308 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6309 [(set_attr "type" "crypto_aese")
6310 (set_attr "length" "8")]
6315 (define_insn "aarch64_crypto_sha1hsi"
6316 [(set (match_operand:SI 0 "register_operand" "=w")
6317 (unspec:SI [(match_operand:SI 1
6318 "register_operand" "w")]
6320 "TARGET_SIMD && TARGET_SHA2"
6322 [(set_attr "type" "crypto_sha1_fast")]
6325 (define_insn "aarch64_crypto_sha1hv4si"
6326 [(set (match_operand:SI 0 "register_operand" "=w")
6327 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6328 (parallel [(const_int 0)]))]
6330 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6332 [(set_attr "type" "crypto_sha1_fast")]
6335 (define_insn "aarch64_be_crypto_sha1hv4si"
6336 [(set (match_operand:SI 0 "register_operand" "=w")
6337 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6338 (parallel [(const_int 3)]))]
6340 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6342 [(set_attr "type" "crypto_sha1_fast")]
6345 (define_insn "aarch64_crypto_sha1su1v4si"
6346 [(set (match_operand:V4SI 0 "register_operand" "=w")
6347 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6348 (match_operand:V4SI 2 "register_operand" "w")]
6350 "TARGET_SIMD && TARGET_SHA2"
6351 "sha1su1\\t%0.4s, %2.4s"
6352 [(set_attr "type" "crypto_sha1_fast")]
6355 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6356 [(set (match_operand:V4SI 0 "register_operand" "=w")
6357 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6358 (match_operand:SI 2 "register_operand" "w")
6359 (match_operand:V4SI 3 "register_operand" "w")]
6361 "TARGET_SIMD && TARGET_SHA2"
6362 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6363 [(set_attr "type" "crypto_sha1_slow")]
6366 (define_insn "aarch64_crypto_sha1su0v4si"
6367 [(set (match_operand:V4SI 0 "register_operand" "=w")
6368 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6369 (match_operand:V4SI 2 "register_operand" "w")
6370 (match_operand:V4SI 3 "register_operand" "w")]
6372 "TARGET_SIMD && TARGET_SHA2"
6373 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6374 [(set_attr "type" "crypto_sha1_xor")]
6379 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6380 [(set (match_operand:V4SI 0 "register_operand" "=w")
6381 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6382 (match_operand:V4SI 2 "register_operand" "w")
6383 (match_operand:V4SI 3 "register_operand" "w")]
6385 "TARGET_SIMD && TARGET_SHA2"
6386 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6387 [(set_attr "type" "crypto_sha256_slow")]
6390 (define_insn "aarch64_crypto_sha256su0v4si"
6391 [(set (match_operand:V4SI 0 "register_operand" "=w")
6392 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6393 (match_operand:V4SI 2 "register_operand" "w")]
6395 "TARGET_SIMD && TARGET_SHA2"
6396 "sha256su0\\t%0.4s, %2.4s"
6397 [(set_attr "type" "crypto_sha256_fast")]
6400 (define_insn "aarch64_crypto_sha256su1v4si"
6401 [(set (match_operand:V4SI 0 "register_operand" "=w")
6402 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6403 (match_operand:V4SI 2 "register_operand" "w")
6404 (match_operand:V4SI 3 "register_operand" "w")]
6406 "TARGET_SIMD && TARGET_SHA2"
6407 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6408 [(set_attr "type" "crypto_sha256_slow")]
6413 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6414 [(set (match_operand:V2DI 0 "register_operand" "=w")
6415 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6416 (match_operand:V2DI 2 "register_operand" "w")
6417 (match_operand:V2DI 3 "register_operand" "w")]
6419 "TARGET_SIMD && TARGET_SHA3"
6420 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6421 [(set_attr "type" "crypto_sha512")]
6424 (define_insn "aarch64_crypto_sha512su0qv2di"
6425 [(set (match_operand:V2DI 0 "register_operand" "=w")
6426 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6427 (match_operand:V2DI 2 "register_operand" "w")]
6429 "TARGET_SIMD && TARGET_SHA3"
6430 "sha512su0\\t%0.2d, %2.2d"
6431 [(set_attr "type" "crypto_sha512")]
6434 (define_insn "aarch64_crypto_sha512su1qv2di"
6435 [(set (match_operand:V2DI 0 "register_operand" "=w")
6436 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6437 (match_operand:V2DI 2 "register_operand" "w")
6438 (match_operand:V2DI 3 "register_operand" "w")]
6440 "TARGET_SIMD && TARGET_SHA3"
6441 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6442 [(set_attr "type" "crypto_sha512")]
6447 (define_insn "eor3q<mode>4"
6448 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6451 (match_operand:VQ_I 2 "register_operand" "w")
6452 (match_operand:VQ_I 3 "register_operand" "w"))
6453 (match_operand:VQ_I 1 "register_operand" "w")))]
6454 "TARGET_SIMD && TARGET_SHA3"
6455 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6456 [(set_attr "type" "crypto_sha3")]
6459 (define_insn "aarch64_rax1qv2di"
6460 [(set (match_operand:V2DI 0 "register_operand" "=w")
6463 (match_operand:V2DI 2 "register_operand" "w")
6465 (match_operand:V2DI 1 "register_operand" "w")))]
6466 "TARGET_SIMD && TARGET_SHA3"
6467 "rax1\\t%0.2d, %1.2d, %2.2d"
6468 [(set_attr "type" "crypto_sha3")]
6471 (define_insn "aarch64_xarqv2di"
6472 [(set (match_operand:V2DI 0 "register_operand" "=w")
6475 (match_operand:V2DI 1 "register_operand" "%w")
6476 (match_operand:V2DI 2 "register_operand" "w"))
6477 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6478 "TARGET_SIMD && TARGET_SHA3"
6479 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6480 [(set_attr "type" "crypto_sha3")]
6483 (define_insn "bcaxq<mode>4"
6484 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6487 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6488 (match_operand:VQ_I 2 "register_operand" "w"))
6489 (match_operand:VQ_I 1 "register_operand" "w")))]
6490 "TARGET_SIMD && TARGET_SHA3"
6491 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6492 [(set_attr "type" "crypto_sha3")]
6497 (define_insn "aarch64_sm3ss1qv4si"
6498 [(set (match_operand:V4SI 0 "register_operand" "=w")
6499 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6500 (match_operand:V4SI 2 "register_operand" "w")
6501 (match_operand:V4SI 3 "register_operand" "w")]
6503 "TARGET_SIMD && TARGET_SM4"
6504 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6505 [(set_attr "type" "crypto_sm3")]
6509 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6510 [(set (match_operand:V4SI 0 "register_operand" "=w")
6511 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6512 (match_operand:V4SI 2 "register_operand" "w")
6513 (match_operand:V4SI 3 "register_operand" "w")
6514 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6516 "TARGET_SIMD && TARGET_SM4"
6517 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6518 [(set_attr "type" "crypto_sm3")]
6521 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6522 [(set (match_operand:V4SI 0 "register_operand" "=w")
6523 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6524 (match_operand:V4SI 2 "register_operand" "w")
6525 (match_operand:V4SI 3 "register_operand" "w")]
6527 "TARGET_SIMD && TARGET_SM4"
6528 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6529 [(set_attr "type" "crypto_sm3")]
6534 (define_insn "aarch64_sm4eqv4si"
6535 [(set (match_operand:V4SI 0 "register_operand" "=w")
6536 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6537 (match_operand:V4SI 2 "register_operand" "w")]
6539 "TARGET_SIMD && TARGET_SM4"
6540 "sm4e\\t%0.4s, %2.4s"
6541 [(set_attr "type" "crypto_sm4")]
6544 (define_insn "aarch64_sm4ekeyqv4si"
6545 [(set (match_operand:V4SI 0 "register_operand" "=w")
6546 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6547 (match_operand:V4SI 2 "register_operand" "w")]
6549 "TARGET_SIMD && TARGET_SM4"
6550 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6551 [(set_attr "type" "crypto_sm4")]
6556 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6557 [(set (match_operand:VDQSF 0 "register_operand")
6559 [(match_operand:VDQSF 1 "register_operand")
6560 (match_operand:<VFMLA_W> 2 "register_operand")
6561 (match_operand:<VFMLA_W> 3 "register_operand")]
6565 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6566 <nunits> * 2, false);
6567 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6568 <nunits> * 2, false);
6570 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6579 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6580 [(set (match_operand:VDQSF 0 "register_operand")
6582 [(match_operand:VDQSF 1 "register_operand")
6583 (match_operand:<VFMLA_W> 2 "register_operand")
6584 (match_operand:<VFMLA_W> 3 "register_operand")]
6588 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6589 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6591 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6599 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6600 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6603 (vec_select:<VFMLA_SEL_W>
6604 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6605 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6607 (vec_select:<VFMLA_SEL_W>
6608 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6609 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6610 (match_operand:VDQSF 1 "register_operand" "0")))]
6612 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6613 [(set_attr "type" "neon_fp_mul_s")]
6616 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6617 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6621 (vec_select:<VFMLA_SEL_W>
6622 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6623 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6625 (vec_select:<VFMLA_SEL_W>
6626 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6627 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6628 (match_operand:VDQSF 1 "register_operand" "0")))]
6630 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6631 [(set_attr "type" "neon_fp_mul_s")]
6634 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6635 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6638 (vec_select:<VFMLA_SEL_W>
6639 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6640 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6642 (vec_select:<VFMLA_SEL_W>
6643 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6644 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6645 (match_operand:VDQSF 1 "register_operand" "0")))]
6647 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6648 [(set_attr "type" "neon_fp_mul_s")]
6651 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6652 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6656 (vec_select:<VFMLA_SEL_W>
6657 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6658 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6660 (vec_select:<VFMLA_SEL_W>
6661 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6662 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6663 (match_operand:VDQSF 1 "register_operand" "0")))]
6665 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6666 [(set_attr "type" "neon_fp_mul_s")]
6669 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6670 [(set (match_operand:V2SF 0 "register_operand")
6671 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6672 (match_operand:V4HF 2 "register_operand")
6673 (match_operand:V4HF 3 "register_operand")
6674 (match_operand:SI 4 "aarch64_imm2")]
6678 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6679 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6681 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6690 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6691 [(set (match_operand:V2SF 0 "register_operand")
6692 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6693 (match_operand:V4HF 2 "register_operand")
6694 (match_operand:V4HF 3 "register_operand")
6695 (match_operand:SI 4 "aarch64_imm2")]
6699 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6700 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6702 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6710 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6711 [(set (match_operand:V2SF 0 "register_operand" "=w")
6715 (match_operand:V4HF 2 "register_operand" "w")
6716 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6720 (match_operand:V4HF 3 "register_operand" "x")
6721 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6722 (match_operand:V2SF 1 "register_operand" "0")))]
6724 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6725 [(set_attr "type" "neon_fp_mul_s")]
6728 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6729 [(set (match_operand:V2SF 0 "register_operand" "=w")
6734 (match_operand:V4HF 2 "register_operand" "w")
6735 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6739 (match_operand:V4HF 3 "register_operand" "x")
6740 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6741 (match_operand:V2SF 1 "register_operand" "0")))]
6743 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6744 [(set_attr "type" "neon_fp_mul_s")]
6747 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6748 [(set (match_operand:V2SF 0 "register_operand" "=w")
6752 (match_operand:V4HF 2 "register_operand" "w")
6753 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6757 (match_operand:V4HF 3 "register_operand" "x")
6758 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6759 (match_operand:V2SF 1 "register_operand" "0")))]
6761 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6762 [(set_attr "type" "neon_fp_mul_s")]
6765 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6766 [(set (match_operand:V2SF 0 "register_operand" "=w")
6771 (match_operand:V4HF 2 "register_operand" "w")
6772 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6776 (match_operand:V4HF 3 "register_operand" "x")
6777 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6778 (match_operand:V2SF 1 "register_operand" "0")))]
6780 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6781 [(set_attr "type" "neon_fp_mul_s")]
6784 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6785 [(set (match_operand:V4SF 0 "register_operand")
6786 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6787 (match_operand:V8HF 2 "register_operand")
6788 (match_operand:V8HF 3 "register_operand")
6789 (match_operand:SI 4 "aarch64_lane_imm3")]
6793 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6794 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6796 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6804 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6805 [(set (match_operand:V4SF 0 "register_operand")
6806 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6807 (match_operand:V8HF 2 "register_operand")
6808 (match_operand:V8HF 3 "register_operand")
6809 (match_operand:SI 4 "aarch64_lane_imm3")]
6813 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6814 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6816 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6824 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6825 [(set (match_operand:V4SF 0 "register_operand" "=w")
6829 (match_operand:V8HF 2 "register_operand" "w")
6830 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6834 (match_operand:V8HF 3 "register_operand" "x")
6835 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6836 (match_operand:V4SF 1 "register_operand" "0")))]
6838 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6839 [(set_attr "type" "neon_fp_mul_s")]
6842 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6843 [(set (match_operand:V4SF 0 "register_operand" "=w")
6848 (match_operand:V8HF 2 "register_operand" "w")
6849 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6853 (match_operand:V8HF 3 "register_operand" "x")
6854 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6855 (match_operand:V4SF 1 "register_operand" "0")))]
6857 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6858 [(set_attr "type" "neon_fp_mul_s")]
6861 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6862 [(set (match_operand:V4SF 0 "register_operand" "=w")
6866 (match_operand:V8HF 2 "register_operand" "w")
6867 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6871 (match_operand:V8HF 3 "register_operand" "x")
6872 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6873 (match_operand:V4SF 1 "register_operand" "0")))]
6875 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6876 [(set_attr "type" "neon_fp_mul_s")]
6879 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6880 [(set (match_operand:V4SF 0 "register_operand" "=w")
6885 (match_operand:V8HF 2 "register_operand" "w")
6886 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6890 (match_operand:V8HF 3 "register_operand" "x")
6891 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6892 (match_operand:V4SF 1 "register_operand" "0")))]
6894 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6895 [(set_attr "type" "neon_fp_mul_s")]
6898 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6899 [(set (match_operand:V2SF 0 "register_operand")
6900 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6901 (match_operand:V4HF 2 "register_operand")
6902 (match_operand:V8HF 3 "register_operand")
6903 (match_operand:SI 4 "aarch64_lane_imm3")]
6907 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6908 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6910 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6919 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6920 [(set (match_operand:V2SF 0 "register_operand")
6921 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6922 (match_operand:V4HF 2 "register_operand")
6923 (match_operand:V8HF 3 "register_operand")
6924 (match_operand:SI 4 "aarch64_lane_imm3")]
6928 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6929 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6931 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6940 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6941 [(set (match_operand:V2SF 0 "register_operand" "=w")
6945 (match_operand:V4HF 2 "register_operand" "w")
6946 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6950 (match_operand:V8HF 3 "register_operand" "x")
6951 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6952 (match_operand:V2SF 1 "register_operand" "0")))]
6954 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6955 [(set_attr "type" "neon_fp_mul_s")]
6958 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6959 [(set (match_operand:V2SF 0 "register_operand" "=w")
6964 (match_operand:V4HF 2 "register_operand" "w")
6965 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6969 (match_operand:V8HF 3 "register_operand" "x")
6970 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6971 (match_operand:V2SF 1 "register_operand" "0")))]
6973 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6974 [(set_attr "type" "neon_fp_mul_s")]
6977 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6978 [(set (match_operand:V2SF 0 "register_operand" "=w")
6982 (match_operand:V4HF 2 "register_operand" "w")
6983 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6987 (match_operand:V8HF 3 "register_operand" "x")
6988 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6989 (match_operand:V2SF 1 "register_operand" "0")))]
6991 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6992 [(set_attr "type" "neon_fp_mul_s")]
6995 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6996 [(set (match_operand:V2SF 0 "register_operand" "=w")
7001 (match_operand:V4HF 2 "register_operand" "w")
7002 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
7006 (match_operand:V8HF 3 "register_operand" "x")
7007 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
7008 (match_operand:V2SF 1 "register_operand" "0")))]
7010 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
7011 [(set_attr "type" "neon_fp_mul_s")]
7014 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
7015 [(set (match_operand:V4SF 0 "register_operand")
7016 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7017 (match_operand:V8HF 2 "register_operand")
7018 (match_operand:V4HF 3 "register_operand")
7019 (match_operand:SI 4 "aarch64_imm2")]
7023 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
7024 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7026 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
7034 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
7035 [(set (match_operand:V4SF 0 "register_operand")
7036 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
7037 (match_operand:V8HF 2 "register_operand")
7038 (match_operand:V4HF 3 "register_operand")
7039 (match_operand:SI 4 "aarch64_imm2")]
7043 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
7044 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
7046 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
7054 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
7055 [(set (match_operand:V4SF 0 "register_operand" "=w")
7059 (match_operand:V8HF 2 "register_operand" "w")
7060 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7064 (match_operand:V4HF 3 "register_operand" "x")
7065 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7066 (match_operand:V4SF 1 "register_operand" "0")))]
7068 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7069 [(set_attr "type" "neon_fp_mul_s")]
7072 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
7073 [(set (match_operand:V4SF 0 "register_operand" "=w")
7078 (match_operand:V8HF 2 "register_operand" "w")
7079 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7083 (match_operand:V4HF 3 "register_operand" "x")
7084 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7085 (match_operand:V4SF 1 "register_operand" "0")))]
7087 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7088 [(set_attr "type" "neon_fp_mul_s")]
7091 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
7092 [(set (match_operand:V4SF 0 "register_operand" "=w")
7096 (match_operand:V8HF 2 "register_operand" "w")
7097 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7101 (match_operand:V4HF 3 "register_operand" "x")
7102 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7103 (match_operand:V4SF 1 "register_operand" "0")))]
7105 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7106 [(set_attr "type" "neon_fp_mul_s")]
7109 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
7110 [(set (match_operand:V4SF 0 "register_operand" "=w")
7115 (match_operand:V8HF 2 "register_operand" "w")
7116 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7120 (match_operand:V4HF 3 "register_operand" "x")
7121 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7122 (match_operand:V4SF 1 "register_operand" "0")))]
7124 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7125 [(set_attr "type" "neon_fp_mul_s")]
7130 (define_insn "aarch64_crypto_pmulldi"
7131 [(set (match_operand:TI 0 "register_operand" "=w")
7132 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
7133 (match_operand:DI 2 "register_operand" "w")]
7135 "TARGET_SIMD && TARGET_AES"
7136 "pmull\\t%0.1q, %1.1d, %2.1d"
7137 [(set_attr "type" "crypto_pmull")]
7140 (define_insn "aarch64_crypto_pmullv2di"
7141 [(set (match_operand:TI 0 "register_operand" "=w")
7142 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
7143 (match_operand:V2DI 2 "register_operand" "w")]
7145 "TARGET_SIMD && TARGET_AES"
7146 "pmull2\\t%0.1q, %1.2d, %2.2d"
7147 [(set_attr "type" "crypto_pmull")]
7150 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
7151 (define_insn "<optab><Vnarrowq><mode>2"
7152 [(set (match_operand:VQN 0 "register_operand" "=w")
7153 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7155 "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
7156 [(set_attr "type" "neon_shift_imm_long")]
7159 ;; Truncate a 128-bit integer vector to a 64-bit vector.
7160 (define_insn "trunc<mode><Vnarrowq>2"
7161 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7162 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7164 "xtn\t%0.<Vntype>, %1.<Vtype>"
7165 [(set_attr "type" "neon_shift_imm_narrow_q")]
7168 (define_insn "aarch64_bfdot<mode>"
7169 [(set (match_operand:VDQSF 0 "register_operand" "=w")
7172 [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
7173 (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
7175 (match_operand:VDQSF 1 "register_operand" "0")))]
7177 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
7178 [(set_attr "type" "neon_dot<q>")]
7181 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
7182 [(set (match_operand:VDQSF 0 "register_operand" "=w")
7185 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
7186 (match_operand:VBF 3 "register_operand" "w")
7187 (match_operand:SI 4 "const_int_operand" "n")]
7189 (match_operand:VDQSF 1 "register_operand" "0")))]
7192 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
7193 int lane = INTVAL (operands[4]);
7194 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
7195 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
7197 [(set_attr "type" "neon_dot<VDQSF:q>")]
7201 (define_insn "aarch64_bfmmlaqv4sf"
7202 [(set (match_operand:V4SF 0 "register_operand" "=w")
7203 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
7204 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7205 (match_operand:V8BF 3 "register_operand" "w")]
7208 "bfmmla\\t%0.4s, %2.8h, %3.8h"
7209 [(set_attr "type" "neon_fp_mla_s_q")]
7213 (define_insn "aarch64_bfmlal<bt>v4sf"
7214 [(set (match_operand:V4SF 0 "register_operand" "=w")
7215 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7216 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7217 (match_operand:V8BF 3 "register_operand" "w")]
7220 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
7221 [(set_attr "type" "neon_fp_mla_s_q")]
7224 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
7225 [(set (match_operand:V4SF 0 "register_operand" "=w")
7226 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7227 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7228 (match_operand:VBF 3 "register_operand" "w")
7229 (match_operand:SI 4 "const_int_operand" "n")]
7233 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
7234 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
7236 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
7239 ;; 8-bit integer matrix multiply-accumulate
7240 (define_insn "aarch64_simd_<sur>mmlav16qi"
7241 [(set (match_operand:V4SI 0 "register_operand" "=w")
7243 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
7244 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
7245 (match_operand:V4SI 1 "register_operand" "0")))]
7247 "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
7248 [(set_attr "type" "neon_mla_s_q")]
7252 (define_insn "aarch64_bfcvtn<q><mode>"
7253 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
7254 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
7257 "bfcvtn\\t%0.4h, %1.4s"
7258 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7261 (define_insn "aarch64_bfcvtn2v8bf"
7262 [(set (match_operand:V8BF 0 "register_operand" "=w")
7263 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
7264 (match_operand:V4SF 2 "register_operand" "w")]
7267 "bfcvtn2\\t%0.8h, %2.4s"
7268 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7271 (define_insn "aarch64_bfcvtbf"
7272 [(set (match_operand:BF 0 "register_operand" "=w")
7273 (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
7277 [(set_attr "type" "f_cvt")]