1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The following define_subst rules are used to produce patterns representing
22 ;; the implicit zeroing effect of 64-bit Advanced SIMD operations, in effect
23 ;; a vec_concat with zeroes. The order of the vec_concat operands differs
24 ;; for big-endian so we have a separate define_subst rule for each endianness.
25 (define_subst "add_vec_concat_subst_le"
26 [(set (match_operand:VDZ 0)
27 (match_operand:VDZ 1))]
29 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
32 (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")))])
34 (define_subst "add_vec_concat_subst_be"
35 [(set (match_operand:VDZ 0)
36 (match_operand:VDZ 1))]
38 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
40 (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")
43 ;; The subst_attr definitions used to annotate patterns further in the file.
44 ;; Patterns that need to have the above substitutions added to them should
45 ;; have <vczle><vczbe> added to their name.
46 (define_subst_attr "vczle" "add_vec_concat_subst_le" "" "_vec_concatz_le")
47 (define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
49 (define_expand "mov<mode>"
50 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
51 (match_operand:VALL_F16 1 "general_operand"))]
54 /* Force the operand into a register if it is not an
55 immediate whose use can be replaced with xzr.
56 If the mode is 16 bytes wide, then we will be doing
57 a stp in DI mode, so we check the validity of that.
58 If the mode is 8 bytes wide, then we will do doing a
59 normal str, so the check need not apply. */
60 if (GET_CODE (operands[0]) == MEM
61 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
62 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
63 && aarch64_mem_pair_operand (operands[0], DImode))
64 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
65 operands[1] = force_reg (<MODE>mode, operands[1]);
67 /* If a constant is too complex to force to memory (e.g. because it
68 contains CONST_POLY_INTs), build it up from individual elements instead.
69 We should only need to do this before RA; aarch64_legitimate_constant_p
70 should ensure that we don't try to rematerialize the constant later. */
71 if (GET_CODE (operands[1]) == CONST_VECTOR
72 && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
74 aarch64_expand_vector_init (operands[0], operands[1]);
80 (define_expand "movmisalign<mode>"
81 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
82 (match_operand:VALL_F16 1 "general_operand"))]
83 "TARGET_FLOAT && !STRICT_ALIGNMENT"
85 /* This pattern is not permitted to fail during expansion: if both arguments
86 are non-registers (e.g. memory := constant, which can be created by the
87 auto-vectorizer), force operand 1 into a register. */
88 if (!register_operand (operands[0], <MODE>mode)
89 && !register_operand (operands[1], <MODE>mode))
90 operands[1] = force_reg (<MODE>mode, operands[1]);
93 (define_insn "aarch64_simd_dup<mode>"
94 [(set (match_operand:VDQ_I 0 "register_operand")
96 (match_operand:<VEL> 1 "register_operand")))]
98 {@ [ cons: =0 , 1 ; attrs: type ]
99 [ w , w ; neon_dup<q> ] dup\t%0.<Vtype>, %1.<Vetype>[0]
100 [ w , ?r ; neon_from_gp<q> ] dup\t%0.<Vtype>, %<vwcore>1
104 (define_insn "aarch64_simd_dup<mode>"
105 [(set (match_operand:VDQF_F16 0 "register_operand")
106 (vec_duplicate:VDQF_F16
107 (match_operand:<VEL> 1 "register_operand")))]
109 {@ [ cons: =0 , 1 ; attrs: type ]
110 [ w , w ; neon_dup<q> ] dup\t%0.<Vtype>, %1.<Vetype>[0]
111 [ w , r ; neon_from_gp<q> ] dup\t%0.<Vtype>, %<vwcore>1
115 (define_insn "aarch64_dup_lane<mode>"
116 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
117 (vec_duplicate:VALL_F16
119 (match_operand:VALL_F16 1 "register_operand" "w")
120 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
124 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
125 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
127 [(set_attr "type" "neon_dup<q>")]
130 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
131 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
132 (vec_duplicate:VALL_F16_NO_V2Q
134 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
135 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
139 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
140 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
142 [(set_attr "type" "neon_dup<q>")]
145 (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>"
146 [(set (match_operand:VDMOV 0 "nonimmediate_operand")
147 (match_operand:VDMOV 1 "general_operand"))]
149 && (register_operand (operands[0], <MODE>mode)
150 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
151 {@ [cons: =0, 1; attrs: type, arch, length]
152 [w , m ; neon_load1_1reg<q> , * , *] ldr\t%d0, %1
153 [r , m ; load_8 , * , *] ldr\t%x0, %1
154 [m , Dz; store_8 , * , *] str\txzr, %0
155 [m , w ; neon_store1_1reg<q>, * , *] str\t%d1, %0
156 [m , r ; store_8 , * , *] str\t%x1, %0
157 [w , w ; neon_logic<q> , simd , *] mov\t%0.<Vbtype>, %1.<Vbtype>
158 [w , w ; neon_logic<q> , * , *] fmov\t%d0, %d1
159 [?r, w ; neon_to_gp<q> , base_simd, *] umov\t%0, %1.d[0]
160 [?r, w ; neon_to_gp<q> , * , *] fmov\t%x0, %d1
161 [?w, r ; f_mcr , * , *] fmov\t%d0, %1
162 [?r, r ; mov_reg , * , *] mov\t%0, %1
163 [w , Dn; neon_move<q> , simd , *] << aarch64_output_simd_mov_immediate (operands[1], 64);
164 [w , Dz; f_mcr , * , *] fmov\t%d0, xzr
165 [w , Dx; neon_move , simd , 8] #
167 "CONST_INT_P (operands[1])
168 && aarch64_simd_special_constant_p (operands[1], <MODE>mode)
169 && FP_REGNUM_P (REGNO (operands[0]))"
172 aarch64_maybe_generate_simd_constant (operands[0], operands[1], <MODE>mode);
177 (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
178 [(set (match_operand:VQMOV 0 "nonimmediate_operand")
179 (match_operand:VQMOV 1 "general_operand"))]
181 && (register_operand (operands[0], <MODE>mode)
182 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
183 {@ [cons: =0, 1; attrs: type, arch, length]
184 [w , m ; neon_load1_1reg<q> , * , 4] ldr\t%q0, %1
185 [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0
186 [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0
187 [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
188 [w , w ; * , sve , 4] mov\t%Z0.d, %Z1.d
189 [?r , w ; multiple , * , 8] #
190 [?w , r ; multiple , * , 8] #
191 [?r , r ; multiple , * , 8] #
192 [w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
193 [w , Dz; fmov , * , 4] fmov\t%d0, xzr
194 [w , Dx; neon_move , simd, 8] #
197 && ((REG_P (operands[0])
198 && REG_P (operands[1])
199 && !(FP_REGNUM_P (REGNO (operands[0]))
200 && FP_REGNUM_P (REGNO (operands[1]))))
201 || (aarch64_simd_special_constant_p (operands[1], <MODE>mode)
202 && FP_REGNUM_P (REGNO (operands[0]))))"
205 if (GP_REGNUM_P (REGNO (operands[0]))
206 && GP_REGNUM_P (REGNO (operands[1])))
207 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
210 if (FP_REGNUM_P (REGNO (operands[0]))
211 && <MODE>mode == V2DImode
212 && aarch64_maybe_generate_simd_constant (operands[0], operands[1],
216 aarch64_split_simd_move (operands[0], operands[1]);
222 ;; When storing lane zero we can use the normal STR and its more permissive
225 (define_insn "aarch64_store_lane0<mode>"
226 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
227 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
228 (parallel [(match_operand 2 "const_int_operand" "n")])))]
230 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
231 "str\\t%<Vetype>1, %0"
232 [(set_attr "type" "neon_store1_1reg<q>")]
235 (define_insn "aarch64_simd_stp<mode>"
236 [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand")
237 (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand")))]
239 {@ [ cons: =0 , 1 ; attrs: type ]
240 [ Umn , w ; neon_stp ] stp\t%<Vetype>1, %<Vetype>1, %y0
241 [ Umn , r ; store_<ldpstp_vel_sz> ] stp\t%<vwcore>1, %<vwcore>1, %y0
245 (define_expand "@aarch64_split_simd_mov<mode>"
246 [(set (match_operand:VQMOV 0)
247 (match_operand:VQMOV 1))]
250 rtx dst = operands[0];
251 rtx src = operands[1];
253 if (GP_REGNUM_P (REGNO (src)))
255 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
256 rtx src_high_part = gen_highpart (<VHALF>mode, src);
257 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
259 emit_move_insn (dst_low_part, src_low_part);
260 emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
265 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
266 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
267 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
268 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
269 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
270 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
276 (define_expand "aarch64_get_half<mode>"
277 [(set (match_operand:<VHALF> 0 "register_operand")
279 (match_operand:VQMOV 1 "register_operand")
280 (match_operand 2 "ascending_int_parallel")))]
283 if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
285 emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
291 (define_expand "aarch64_get_low<mode>"
292 [(match_operand:<VHALF> 0 "register_operand")
293 (match_operand:VQMOV 1 "register_operand")]
296 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
297 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
302 (define_expand "aarch64_get_high<mode>"
303 [(match_operand:<VHALF> 0 "register_operand")
304 (match_operand:VQMOV 1 "register_operand")]
307 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
308 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
313 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
314 [(set (match_operand:<VHALF> 0 "register_operand")
316 (match_operand:VQMOV_NO2E 1 "register_operand")
317 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half")))]
319 {@ [ cons: =0 , 1 ; attrs: type , arch ]
320 [ w , w ; mov_reg , simd ] #
321 [ ?r , w ; neon_to_gp<q> , base_simd ] umov\t%0, %1.d[0]
322 [ ?r , w ; f_mrc , * ] fmov\t%0, %d1
324 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
325 [(set (match_dup 0) (match_dup 1))]
327 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
329 [(set_attr "length" "4")]
332 (define_insn "aarch64_simd_mov_from_<mode>high"
333 [(set (match_operand:<VHALF> 0 "register_operand")
335 (match_operand:VQMOV_NO2E 1 "register_operand")
336 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half")))]
338 {@ [ cons: =0 , 1 ; attrs: type , arch ]
339 [ w , w ; neon_dup<q> , simd ] dup\t%d0, %1.d[1]
340 [ w , w ; * , sve ] ext\t%Z0.b, %Z0.b, %Z0.b, #8
341 [ ?r , w ; neon_to_gp<q> , simd ] umov\t%0, %1.d[1]
342 [ ?r , w ; f_mrc , * ] fmov\t%0, %1.d[1]
344 [(set_attr "length" "4")]
347 (define_insn "orn<mode>3<vczle><vczbe>"
348 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
349 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
350 (match_operand:VDQ_I 2 "register_operand" "w")))]
352 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
353 [(set_attr "type" "neon_logic<q>")]
356 (define_insn "bic<mode>3<vczle><vczbe>"
357 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
358 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
359 (match_operand:VDQ_I 2 "register_operand" "w")))]
361 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
362 [(set_attr "type" "neon_logic<q>")]
365 (define_insn "add<mode>3<vczle><vczbe>"
366 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
367 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
368 (match_operand:VDQ_I 2 "register_operand" "w")))]
370 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
371 [(set_attr "type" "neon_add<q>")]
374 (define_insn "sub<mode>3<vczle><vczbe>"
375 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
376 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
377 (match_operand:VDQ_I 2 "register_operand" "w")))]
379 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
380 [(set_attr "type" "neon_sub<q>")]
383 (define_insn "mul<mode>3<vczle><vczbe>"
384 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
385 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
386 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
388 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
389 [(set_attr "type" "neon_mul_<Vetype><q>")]
392 (define_insn "bswap<mode>2"
393 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
394 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
396 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
397 [(set_attr "type" "neon_rev<q>")]
400 (define_insn "aarch64_rbit<mode><vczle><vczbe>"
401 [(set (match_operand:VB 0 "register_operand" "=w")
402 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
405 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
406 [(set_attr "type" "neon_rbit")]
409 (define_expand "ctz<mode>2"
410 [(set (match_operand:VS 0 "register_operand")
411 (ctz:VS (match_operand:VS 1 "register_operand")))]
414 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
415 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
417 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
418 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
423 (define_expand "@xorsign<mode>3"
424 [(match_operand:VHSDF 0 "register_operand")
425 (match_operand:VHSDF 1 "register_operand")
426 (match_operand:VHSDF 2 "register_operand")]
430 machine_mode imode = <V_INT_EQUIV>mode;
431 rtx v_bitmask = gen_reg_rtx (imode);
432 rtx op1x = gen_reg_rtx (imode);
433 rtx op2x = gen_reg_rtx (imode);
435 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
436 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
438 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
440 emit_move_insn (v_bitmask,
441 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
442 HOST_WIDE_INT_M1U << bits));
444 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
445 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
446 emit_move_insn (operands[0],
447 lowpart_subreg (<MODE>mode, op1x, imode));
452 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
453 ;; fact that their usage need to guarantee that the source vectors are
454 ;; contiguous. It would be wrong to describe the operation without being able
455 ;; to describe the permute that is also required, but even if that is done
456 ;; the permute would have been created as a LOAD_LANES which means the values
457 ;; in the registers are in the wrong order.
458 (define_insn "aarch64_fcadd<rot><mode><vczle><vczbe>"
459 [(set (match_operand:VHSDF 0 "register_operand" "=w")
460 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
461 (match_operand:VHSDF 2 "register_operand" "w")]
464 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
465 [(set_attr "type" "neon_fcadd")]
468 (define_expand "cadd<rot><mode>3"
469 [(set (match_operand:VHSDF 0 "register_operand")
470 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
471 (match_operand:VHSDF 2 "register_operand")]
473 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
476 (define_insn "aarch64_fcmla<rot><mode><vczle><vczbe>"
477 [(set (match_operand:VHSDF 0 "register_operand" "=w")
478 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
479 (match_operand:VHSDF 3 "register_operand" "w")]
481 (match_operand:VHSDF 1 "register_operand" "0")))]
483 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
484 [(set_attr "type" "neon_fcmla")]
488 (define_insn "aarch64_fcmla_lane<rot><mode><vczle><vczbe>"
489 [(set (match_operand:VHSDF 0 "register_operand" "=w")
490 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
491 (match_operand:VHSDF 3 "register_operand" "w")
492 (match_operand:SI 4 "const_int_operand" "n")]
494 (match_operand:VHSDF 1 "register_operand" "0")))]
497 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
498 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
500 [(set_attr "type" "neon_fcmla")]
503 (define_insn "aarch64_fcmla_laneq<rot>v4hf<vczle><vczbe>"
504 [(set (match_operand:V4HF 0 "register_operand" "=w")
505 (plus:V4HF (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
506 (match_operand:V8HF 3 "register_operand" "w")
507 (match_operand:SI 4 "const_int_operand" "n")]
509 (match_operand:V4HF 1 "register_operand" "0")))]
512 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
513 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
515 [(set_attr "type" "neon_fcmla")]
518 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
519 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
520 (plus:VQ_HSF (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
521 (match_operand:<VHALF> 3 "register_operand" "w")
522 (match_operand:SI 4 "const_int_operand" "n")]
524 (match_operand:VQ_HSF 1 "register_operand" "0")))]
527 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
529 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
530 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
532 [(set_attr "type" "neon_fcmla")]
535 ;; The complex mla/mls operations always need to expand to two instructions.
536 ;; The first operation does half the computation and the second does the
537 ;; remainder. Because of this, expand early.
538 (define_expand "cml<fcmac1><conj_op><mode>4"
539 [(set (match_operand:VHSDF 0 "register_operand")
540 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
541 (match_operand:VHSDF 2 "register_operand")]
543 (match_operand:VHSDF 3 "register_operand")))]
544 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
546 rtx tmp = gen_reg_rtx (<MODE>mode);
547 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3],
548 operands[2], operands[1]));
549 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
550 operands[2], operands[1]));
554 ;; The complex mul operations always need to expand to two instructions.
555 ;; The first operation does half the computation and the second does the
556 ;; remainder. Because of this, expand early.
557 (define_expand "cmul<conj_op><mode>3"
558 [(set (match_operand:VHSDF 0 "register_operand")
559 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
560 (match_operand:VHSDF 2 "register_operand")]
562 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
564 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
565 rtx res1 = gen_reg_rtx (<MODE>mode);
566 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
567 operands[2], operands[1]));
568 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
569 operands[2], operands[1]));
573 ;; These expands map to the Dot Product optab the vectorizer checks for
574 ;; and to the intrinsics patttern.
575 ;; The auto-vectorizer expects a dot product builtin that also does an
576 ;; accumulation into the provided register.
577 ;; Given the following pattern
579 ;; for (i=0; i<len; i++) {
585 ;; This can be auto-vectorized to
586 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
588 ;; given enough iterations. However the vectorizer can keep unrolling the loop
589 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
590 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
593 ;; and so the vectorizer provides r, in which the result has to be accumulated.
594 (define_insn "<sur>dot_prod<vsi2qi><vczle><vczbe>"
595 [(set (match_operand:VS 0 "register_operand" "=w")
597 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
598 (match_operand:<VSI2QI> 2 "register_operand" "w")]
600 (match_operand:VS 3 "register_operand" "0")))]
602 "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
603 [(set_attr "type" "neon_dot<q>")]
606 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
607 ;; (vector) Dot Product operation and the vectorized optab.
608 (define_insn "usdot_prod<vsi2qi><vczle><vczbe>"
609 [(set (match_operand:VS 0 "register_operand" "=w")
611 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
612 (match_operand:<VSI2QI> 2 "register_operand" "w")]
614 (match_operand:VS 3 "register_operand" "0")))]
616 "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
617 [(set_attr "type" "neon_dot<q>")]
620 ;; These instructions map to the __builtins for the Dot Product
621 ;; indexed operations.
622 (define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
623 [(set (match_operand:VS 0 "register_operand" "=w")
625 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
626 (match_operand:V8QI 3 "register_operand" "<h_con>")
627 (match_operand:SI 4 "immediate_operand" "i")]
629 (match_operand:VS 1 "register_operand" "0")))]
632 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
633 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
635 [(set_attr "type" "neon_dot<q>")]
638 (define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
639 [(set (match_operand:VS 0 "register_operand" "=w")
641 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
642 (match_operand:V16QI 3 "register_operand" "<h_con>")
643 (match_operand:SI 4 "immediate_operand" "i")]
645 (match_operand:VS 1 "register_operand" "0")))]
648 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
649 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
651 [(set_attr "type" "neon_dot<q>")]
654 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
655 ;; (by element) Dot Product operations.
656 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
657 [(set (match_operand:VS 0 "register_operand" "=w")
659 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
660 (match_operand:VB 3 "register_operand" "w")
661 (match_operand:SI 4 "immediate_operand" "i")]
663 (match_operand:VS 1 "register_operand" "0")))]
666 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
667 int lane = INTVAL (operands[4]);
668 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
669 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
671 [(set_attr "type" "neon_dot<VS:q>")]
674 (define_expand "copysign<mode>3"
675 [(match_operand:VHSDF 0 "register_operand")
676 (match_operand:VHSDF 1 "register_operand")
677 (match_operand:VHSDF 2 "nonmemory_operand")]
680 machine_mode int_mode = <V_INT_EQUIV>mode;
681 rtx v_bitmask = gen_reg_rtx (int_mode);
682 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
684 emit_move_insn (v_bitmask,
685 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
686 HOST_WIDE_INT_M1U << bits));
688 /* copysign (x, -1) should instead be expanded as orr with the sign
690 if (!REG_P (operands[2]))
692 rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
693 if (GET_CODE (op2_elt) == CONST_DOUBLE
694 && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
696 emit_insn (gen_ior<v_int_equiv>3 (
697 lowpart_subreg (int_mode, operands[0], <MODE>mode),
698 lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
703 operands[2] = force_reg (<MODE>mode, operands[2]);
704 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
705 operands[2], operands[1]));
710 (define_insn "mul_lane<mode>3"
711 [(set (match_operand:VMULD 0 "register_operand" "=w")
715 (match_operand:<VCOND> 2 "register_operand" "<h_con>")
716 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
717 (match_operand:VMULD 1 "register_operand" "w")))]
720 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
721 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
723 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
726 (define_insn "mul_laneq<mode>3"
727 [(set (match_operand:VMUL 0 "register_operand" "=w")
731 (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
732 (parallel [(match_operand:SI 3 "immediate_operand")])))
733 (match_operand:VMUL 1 "register_operand" "w")))]
736 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
737 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
739 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
742 (define_insn "mul_n<mode>3"
743 [(set (match_operand:VMUL 0 "register_operand" "=w")
746 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
747 (match_operand:VMUL 1 "register_operand" "w")))]
749 "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
750 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
753 (define_insn "@aarch64_rsqrte<mode>"
754 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
755 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
758 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
759 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
761 (define_insn "@aarch64_rsqrts<mode>"
762 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
763 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
764 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
767 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
768 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
770 (define_expand "rsqrt<mode>2"
771 [(set (match_operand:VALLF 0 "register_operand")
772 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
776 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
780 (define_insn "aarch64_ursqrte<mode>"
781 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
782 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
785 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
786 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
788 (define_insn "*aarch64_mul3_elt_to_64v2df"
789 [(set (match_operand:DF 0 "register_operand" "=w")
792 (match_operand:V2DF 1 "register_operand" "w")
793 (parallel [(match_operand:SI 2 "immediate_operand")]))
794 (match_operand:DF 3 "register_operand" "w")))]
797 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
798 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
800 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
803 (define_insn "neg<mode>2<vczle><vczbe>"
804 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
805 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
807 "neg\t%0.<Vtype>, %1.<Vtype>"
808 [(set_attr "type" "neon_neg<q>")]
811 (define_insn "abs<mode>2<vczle><vczbe>"
812 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
813 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
815 "abs\t%0.<Vtype>, %1.<Vtype>"
816 [(set_attr "type" "neon_abs<q>")]
819 ;; The intrinsic version of integer ABS must not be allowed to
820 ;; combine with any operation with an integrated ABS step, such
822 (define_insn "aarch64_abs<mode><vczle><vczbe>"
823 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
825 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
828 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
829 [(set_attr "type" "neon_abs<q>")]
832 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
833 ;; This isn't accurate as ABS treats always its input as a signed value.
834 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
835 ;; Whereas SABD would return 192 (-64 signed) on the above example.
836 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
837 (define_insn "aarch64_<su>abd<mode><vczle><vczbe>"
838 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
841 (match_operand:VDQ_BHSI 1 "register_operand" "w")
842 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
847 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
848 [(set_attr "type" "neon_abd<q>")]
851 (define_expand "<su>abd<mode>3"
852 [(match_operand:VDQ_BHSI 0 "register_operand")
854 (match_operand:VDQ_BHSI 1 "register_operand")
855 (match_operand:VDQ_BHSI 2 "register_operand"))]
858 emit_insn (gen_aarch64_<su>abd<mode> (operands[0], operands[1], operands[2]));
863 (define_insn "aarch64_<su>abdl<mode>"
864 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
868 (match_operand:VD_BHSI 1 "register_operand" "w")
869 (match_operand:VD_BHSI 2 "register_operand" "w"))
874 "<su>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
875 [(set_attr "type" "neon_abd<q>")]
878 (define_insn "aarch64_<su>abdl2<mode>_insn"
879 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
884 (match_operand:VQW 1 "register_operand" "w")
885 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))
887 (match_operand:VQW 2 "register_operand" "w")
898 "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
899 [(set_attr "type" "neon_abd<q>")]
902 (define_expand "aarch64_<su>abdl2<mode>"
903 [(match_operand:<VDBLW> 0 "register_operand")
905 (match_operand:VQW 1 "register_operand")
906 (match_operand:VQW 2 "register_operand"))]
909 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
910 emit_insn (gen_aarch64_<su>abdl2<mode>_insn (operands[0], operands[1],
916 (define_insn "aarch64_<su>abdl<mode>_hi_internal"
917 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
922 (match_operand:VQW 1 "register_operand" "w")
923 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
926 (match_operand:VQW 2 "register_operand" "w")
929 "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
930 [(set_attr "type" "neon_abd_long")]
933 (define_insn "aarch64_<su>abdl<mode>_lo_internal"
934 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
939 (match_operand:VQW 1 "register_operand" "w")
940 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
943 (match_operand:VQW 2 "register_operand" "w")
946 "<su>abdl\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
947 [(set_attr "type" "neon_abd_long")]
950 (define_expand "vec_widen_<su>abd_hi_<mode>"
951 [(match_operand:<VWIDE> 0 "register_operand")
952 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
953 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
956 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
957 emit_insn (gen_aarch64_<su>abdl<mode>_hi_internal (operands[0], operands[1],
963 (define_expand "vec_widen_<su>abd_lo_<mode>"
964 [(match_operand:<VWIDE> 0 "register_operand")
965 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
966 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
969 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
970 emit_insn (gen_aarch64_<su>abdl<mode>_lo_internal (operands[0], operands[1],
976 (define_insn "aarch64_<su>abal<mode>"
977 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
982 (match_operand:VD_BHSI 2 "register_operand" "w")
983 (match_operand:VD_BHSI 3 "register_operand" "w"))
987 (match_operand:<VWIDE> 1 "register_operand" "0")))]
989 "<su>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
990 [(set_attr "type" "neon_arith_acc<q>")]
993 (define_insn "aarch64_<su>abal2<mode>_insn"
994 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1000 (match_operand:VQW 2 "register_operand" "w")
1001 (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
1003 (match_operand:VQW 3 "register_operand" "w")
1012 (match_operand:<VDBLW> 1 "register_operand" "0")))]
1014 "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1015 [(set_attr "type" "neon_arith_acc<q>")]
1018 (define_expand "aarch64_<su>abal2<mode>"
1019 [(match_operand:<VDBLW> 0 "register_operand")
1020 (match_operand:<VDBLW> 1 "register_operand")
1022 (match_operand:VQW 2 "register_operand")
1023 (match_operand:VQW 3 "register_operand"))]
1026 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1027 emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
1028 operands[2], operands[3], hi));
1033 (define_expand "aarch64_<su>adalp<mode>"
1034 [(set (match_operand:<VDBLW> 0 "register_operand")
1038 (ANY_EXTEND:<V2XWIDE>
1039 (match_operand:VDQV_L 2 "register_operand"))
1041 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1043 (match_operand:<VDBLW> 1 "register_operand")))]
1046 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
1047 operands[3] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
1048 operands[4] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
1052 (define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn"
1053 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1057 (ANY_EXTEND:<V2XWIDE>
1058 (match_operand:VDQV_L 2 "register_operand" "w"))
1059 (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half" ""))
1060 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1061 (match_operand:<V2XWIDE> 4 "vect_par_cnst_even_or_odd_half" "")))
1062 (match_operand:<VDBLW> 1 "register_operand" "0")))]
1064 && !rtx_equal_p (operands[3], operands[4])"
1065 "<su>adalp\t%0.<Vwhalf>, %2.<Vtype>"
1066 [(set_attr "type" "neon_reduc_add<q>")]
1069 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
1070 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
1071 ;; reduction of the difference into a V4SI vector and accumulate that into
1072 ;; operand 3 before copying that into the result operand 0.
1073 ;; Perform that with a sequence of:
1074 ;; UABDL2 tmp.8h, op1.16b, op2.16b
1075 ;; UABAL tmp.8h, op1.8b, op2.8b
1076 ;; UADALP op3.4s, tmp.8h
1077 ;; MOV op0, op3 // should be eliminated in later passes.
1079 ;; For TARGET_DOTPROD we do:
1080 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
1081 ;; UABD tmp2.16b, op1.16b, op2.16b
1082 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
1083 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
1085 ;; The signed version just uses the signed variants of the above instructions
1086 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
1089 (define_expand "<su>sadv16qi"
1090 [(use (match_operand:V4SI 0 "register_operand"))
1091 (USMAX:V16QI (match_operand:V16QI 1 "register_operand")
1092 (match_operand:V16QI 2 "register_operand"))
1093 (use (match_operand:V4SI 3 "register_operand"))]
1098 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
1099 rtx abd = gen_reg_rtx (V16QImode);
1100 emit_insn (gen_aarch64_<su>abdv16qi (abd, operands[1], operands[2]));
1101 emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
1104 rtx reduc = gen_reg_rtx (V8HImode);
1105 emit_insn (gen_aarch64_<su>abdl2v16qi (reduc, operands[1],
1107 emit_insn (gen_aarch64_<su>abalv8qi (reduc, reduc,
1108 gen_lowpart (V8QImode, operands[1]),
1109 gen_lowpart (V8QImode,
1111 emit_insn (gen_aarch64_<su>adalpv8hi (operands[3], operands[3], reduc));
1112 emit_move_insn (operands[0], operands[3]);
1117 (define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
1118 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1119 (plus:VDQ_BHSI (minus:VDQ_BHSI
1121 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1122 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1126 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1128 "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1129 [(set_attr "type" "neon_arith_acc<q>")]
1132 (define_insn "fabd<mode>3<vczle><vczbe>"
1133 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
1136 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
1137 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
1139 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
1140 [(set_attr "type" "neon_fp_abd_<stype><q>")]
1143 ;; For AND (vector, register) and BIC (vector, immediate)
1144 (define_insn "and<mode>3<vczle><vczbe>"
1145 [(set (match_operand:VDQ_I 0 "register_operand")
1146 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1147 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm")))]
1149 {@ [ cons: =0 , 1 , 2 ]
1150 [ w , w , w ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1151 [ w , 0 , Db ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_BIC);
1153 [(set_attr "type" "neon_logic<q>")]
1156 ;; For ORR (vector, register) and ORR (vector, immediate)
1157 (define_insn "ior<mode>3<vczle><vczbe>"
1158 [(set (match_operand:VDQ_I 0 "register_operand")
1159 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1160 (match_operand:VDQ_I 2 "aarch64_orr_imm_sve_advsimd")))]
1162 {@ [ cons: =0 , 1 , 2; attrs: arch ]
1163 [ w , w , w ; simd ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1164 [ w , 0 , vsl; sve ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
1165 [ w , 0 , Do ; simd ] \
1166 << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \
1169 [(set_attr "type" "neon_logic<q>")]
1172 (define_insn "xor<mode>3<vczle><vczbe>"
1173 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1174 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1175 (match_operand:VDQ_I 2 "register_operand" "w")))]
1177 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
1178 [(set_attr "type" "neon_logic<q>")]
1181 (define_insn "one_cmpl<mode>2<vczle><vczbe>"
1182 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1183 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1185 "not\t%0.<Vbtype>, %1.<Vbtype>"
1186 [(set_attr "type" "neon_logic<q>")]
1189 (define_insn "aarch64_simd_vec_set<mode>"
1190 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1192 (vec_duplicate:VALL_F16
1193 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
1194 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1195 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1196 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1198 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1199 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1200 switch (which_alternative)
1203 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1205 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1207 return "ld1\\t{%0.<Vetype>}[%p2], %1";
1212 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1215 (define_insn "aarch64_simd_vec_set_zero<mode>"
1216 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1218 (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
1219 (match_operand:VALL_F16 3 "register_operand" "0")
1220 (match_operand:SI 2 "immediate_operand" "i")))]
1221 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1223 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1224 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1225 return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
1229 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1230 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1232 (vec_duplicate:VALL_F16
1234 (match_operand:VALL_F16 3 "register_operand" "w")
1236 [(match_operand:SI 4 "immediate_operand" "i")])))
1237 (match_operand:VALL_F16 1 "register_operand" "0")
1238 (match_operand:SI 2 "immediate_operand" "i")))]
1239 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1241 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1242 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1243 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1245 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1247 [(set_attr "type" "neon_ins<q>")]
1250 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1251 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1252 (vec_merge:VALL_F16_NO_V2Q
1253 (vec_duplicate:VALL_F16_NO_V2Q
1255 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1257 [(match_operand:SI 4 "immediate_operand" "i")])))
1258 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1259 (match_operand:SI 2 "immediate_operand" "i")))]
1260 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1262 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1263 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1264 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1265 INTVAL (operands[4]));
1267 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1269 [(set_attr "type" "neon_ins<q>")]
1272 (define_expand "signbit<mode>2"
1273 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1274 (use (match_operand:VDQSF 1 "register_operand"))]
1277 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1278 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1280 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1282 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1287 (define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
1288 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1289 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1290 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1292 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1293 [(set_attr "type" "neon_shift_imm<q>")]
1296 (define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
1297 [(set (match_operand:VDQ_I 0 "register_operand")
1298 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1299 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm")))]
1301 {@ [ cons: =0 , 1 , 2 ; attrs: type ]
1302 [ w , w , D1 ; neon_compare<q> ] cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1303 [ w , w , Dr ; neon_shift_imm<q> ] sshr\t%0.<Vtype>, %1.<Vtype>, %2
1307 (define_insn "aarch64_<sra_op>sra_n<mode>_insn"
1308 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1311 (match_operand:VDQ_I 2 "register_operand" "w")
1312 (match_operand:VDQ_I 3 "aarch64_simd_rshift_imm"))
1313 (match_operand:VDQ_I 1 "register_operand" "0")))]
1315 "<sra_op>sra\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1316 [(set_attr "type" "neon_shift_acc<q>")]
1319 (define_insn "aarch64_<sra_op>rsra_n<mode>_insn"
1320 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
1325 (<SHIFTEXTEND>:<V2XWIDE>
1326 (match_operand:VSDQ_I_DI 2 "register_operand" "w"))
1327 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
1328 (match_operand:VSDQ_I_DI 3 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>")))
1329 (match_operand:VSDQ_I_DI 1 "register_operand" "0")))]
1331 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
1332 "<sra_op>rsra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1333 [(set_attr "type" "neon_shift_acc<q>")]
1336 (define_expand "aarch64_<sra_op>sra_n<mode>"
1337 [(set (match_operand:VDQ_I 0 "register_operand")
1340 (match_operand:VDQ_I 2 "register_operand")
1341 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))
1342 (match_operand:VDQ_I 1 "register_operand")))]
1346 = aarch64_simd_gen_const_vector_dup (<MODE>mode, UINTVAL (operands[3]));
1350 (define_expand "aarch64_<sra_op>rsra_n<mode>"
1351 [(match_operand:VSDQ_I_DI 0 "register_operand")
1352 (match_operand:VSDQ_I_DI 1 "register_operand")
1354 (match_operand:VSDQ_I_DI 2 "register_operand")
1355 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
1358 /* Use this expander to create the rounding constant vector, which is
1359 1 << (shift - 1). Use wide_int here to ensure that the right TImode
1360 RTL is generated when handling the DImode expanders. */
1361 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
1362 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
1363 rtx shft = gen_int_mode (INTVAL (operands[3]), DImode);
1364 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
1365 if (VECTOR_MODE_P (<MODE>mode))
1367 shft = gen_const_vec_duplicate (<MODE>mode, shft);
1368 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
1371 emit_insn (gen_aarch64_<sra_op>rsra_n<mode>_insn (operands[0], operands[1],
1372 operands[2], shft, rnd));
1377 (define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
1378 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1379 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1380 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
1382 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1383 [(set_attr "type" "neon_shift_imm<q>")]
1386 (define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
1387 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1388 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1389 (match_operand:VDQ_I 2 "register_operand" "w")))]
1391 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1392 [(set_attr "type" "neon_shift_reg<q>")]
1395 (define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
1396 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1397 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1398 (match_operand:VDQ_I 2 "register_operand" "w")]
1399 UNSPEC_ASHIFT_UNSIGNED))]
1401 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1402 [(set_attr "type" "neon_shift_reg<q>")]
1405 (define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
1406 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1407 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1408 (match_operand:VDQ_I 2 "register_operand" "w")]
1409 UNSPEC_ASHIFT_SIGNED))]
1411 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1412 [(set_attr "type" "neon_shift_reg<q>")]
1415 (define_expand "ashl<mode>3"
1416 [(match_operand:VDQ_I 0 "register_operand")
1417 (match_operand:VDQ_I 1 "register_operand")
1418 (match_operand:SI 2 "general_operand")]
1421 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1424 if (CONST_INT_P (operands[2]))
1426 shift_amount = INTVAL (operands[2]);
1427 if (shift_amount >= 0 && shift_amount < bit_width)
1429 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1431 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1438 operands[2] = force_reg (SImode, operands[2]);
1440 rtx tmp = gen_reg_rtx (<MODE>mode);
1441 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1444 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1448 (define_expand "lshr<mode>3"
1449 [(match_operand:VDQ_I 0 "register_operand")
1450 (match_operand:VDQ_I 1 "register_operand")
1451 (match_operand:SI 2 "general_operand")]
1454 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1457 if (CONST_INT_P (operands[2]))
1459 shift_amount = INTVAL (operands[2]);
1460 if (shift_amount > 0 && shift_amount <= bit_width)
1462 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1464 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1471 operands[2] = force_reg (SImode, operands[2]);
1473 rtx tmp = gen_reg_rtx (SImode);
1474 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1475 emit_insn (gen_negsi2 (tmp, operands[2]));
1476 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1477 convert_to_mode (<VEL>mode, tmp, 0)));
1478 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1483 (define_expand "ashr<mode>3"
1484 [(match_operand:VDQ_I 0 "register_operand")
1485 (match_operand:VDQ_I 1 "register_operand")
1486 (match_operand:SI 2 "general_operand")]
1489 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1492 if (CONST_INT_P (operands[2]))
1494 shift_amount = INTVAL (operands[2]);
1495 if (shift_amount > 0 && shift_amount <= bit_width)
1497 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1499 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1506 operands[2] = force_reg (SImode, operands[2]);
1508 rtx tmp = gen_reg_rtx (SImode);
1509 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1510 emit_insn (gen_negsi2 (tmp, operands[2]));
1511 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1513 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1518 (define_expand "vashl<mode>3"
1519 [(match_operand:VDQ_I 0 "register_operand")
1520 (match_operand:VDQ_I 1 "register_operand")
1521 (match_operand:VDQ_I 2 "register_operand")]
1524 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1529 (define_expand "vashr<mode>3"
1530 [(match_operand:VDQ_I 0 "register_operand")
1531 (match_operand:VDQ_I 1 "register_operand")
1532 (match_operand:VDQ_I 2 "register_operand")]
1535 rtx neg = gen_reg_rtx (<MODE>mode);
1536 emit (gen_neg<mode>2 (neg, operands[2]));
1537 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1543 (define_expand "aarch64_ashr_simddi"
1544 [(match_operand:DI 0 "register_operand")
1545 (match_operand:DI 1 "register_operand")
1546 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1549 /* An arithmetic shift right by 64 fills the result with copies of the sign
1550 bit, just like asr by 63 - however the standard pattern does not handle
1552 if (INTVAL (operands[2]) == 64)
1553 operands[2] = GEN_INT (63);
1554 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1559 (define_expand "vlshr<mode>3"
1560 [(match_operand:VDQ_I 0 "register_operand")
1561 (match_operand:VDQ_I 1 "register_operand")
1562 (match_operand:VDQ_I 2 "register_operand")]
1565 rtx neg = gen_reg_rtx (<MODE>mode);
1566 emit (gen_neg<mode>2 (neg, operands[2]));
1567 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1572 (define_expand "aarch64_lshr_simddi"
1573 [(match_operand:DI 0 "register_operand")
1574 (match_operand:DI 1 "register_operand")
1575 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1578 if (INTVAL (operands[2]) == 64)
1579 emit_move_insn (operands[0], const0_rtx);
1581 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1586 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1587 (define_insn "vec_shr_<mode><vczle><vczbe>"
1588 [(set (match_operand:VD 0 "register_operand" "=w")
1589 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1590 (match_operand:SI 2 "immediate_operand" "i")]
1594 if (BYTES_BIG_ENDIAN)
1595 return "shl %d0, %d1, %2";
1597 return "ushr %d0, %d1, %2";
1599 [(set_attr "type" "neon_shift_imm")]
1602 (define_expand "vec_set<mode>"
1603 [(match_operand:VALL_F16 0 "register_operand")
1604 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
1605 (match_operand:SI 2 "immediate_operand")]
1608 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1609 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1610 GEN_INT (elem), operands[0]));
1616 (define_insn "aarch64_mla<mode><vczle><vczbe>"
1617 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1618 (plus:VDQ_BHSI (mult:VDQ_BHSI
1619 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1620 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1621 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1623 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1624 [(set_attr "type" "neon_mla_<Vetype><q>")]
1627 (define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
1628 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1631 (vec_duplicate:VDQHS
1633 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1634 (parallel [(match_operand:SI 2 "immediate_operand")])))
1635 (match_operand:VDQHS 3 "register_operand" "w"))
1636 (match_operand:VDQHS 4 "register_operand" "0")))]
1639 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1640 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1642 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1645 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
1646 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1649 (vec_duplicate:VDQHS
1651 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1652 (parallel [(match_operand:SI 2 "immediate_operand")])))
1653 (match_operand:VDQHS 3 "register_operand" "w"))
1654 (match_operand:VDQHS 4 "register_operand" "0")))]
1657 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1658 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1660 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1663 (define_insn "aarch64_mla_n<mode><vczle><vczbe>"
1664 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1667 (vec_duplicate:VDQHS
1668 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1669 (match_operand:VDQHS 2 "register_operand" "w"))
1670 (match_operand:VDQHS 1 "register_operand" "0")))]
1672 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1673 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1676 (define_insn "aarch64_mls<mode><vczle><vczbe>"
1677 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1678 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1679 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1680 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1682 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1683 [(set_attr "type" "neon_mla_<Vetype><q>")]
1686 (define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
1687 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1689 (match_operand:VDQHS 4 "register_operand" "0")
1691 (vec_duplicate:VDQHS
1693 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1694 (parallel [(match_operand:SI 2 "immediate_operand")])))
1695 (match_operand:VDQHS 3 "register_operand" "w"))))]
1698 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1699 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1701 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1704 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
1705 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1707 (match_operand:VDQHS 4 "register_operand" "0")
1709 (vec_duplicate:VDQHS
1711 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1712 (parallel [(match_operand:SI 2 "immediate_operand")])))
1713 (match_operand:VDQHS 3 "register_operand" "w"))))]
1716 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1717 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1719 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1722 (define_insn "aarch64_mls_n<mode><vczle><vczbe>"
1723 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1725 (match_operand:VDQHS 1 "register_operand" "0")
1727 (vec_duplicate:VDQHS
1728 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1729 (match_operand:VDQHS 2 "register_operand" "w"))))]
1731 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1732 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1735 ;; Max/Min operations.
1736 (define_insn "<su><maxmin><mode>3<vczle><vczbe>"
1737 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1738 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1739 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1741 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1742 [(set_attr "type" "neon_minmax<q>")]
1745 (define_expand "<su><maxmin>v2di3"
1746 [(set (match_operand:V2DI 0 "register_operand")
1747 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1748 (match_operand:V2DI 2 "register_operand")))]
1751 enum rtx_code cmp_operator;
1772 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1773 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1774 operands[2], cmp_fmt, operands[1], operands[2]));
1778 ;; Pairwise Integer Max/Min operations.
1779 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1780 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1781 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1782 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1785 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1786 [(set_attr "type" "neon_minmax<q>")]
1789 ;; Pairwise FP Max/Min operations.
1790 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1791 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1792 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1793 (match_operand:VHSDF 2 "register_operand" "w")]
1796 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1797 [(set_attr "type" "neon_minmax<q>")]
1800 ;; vec_concat gives a new vector with the low elements from operand 1, and
1801 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1802 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1803 ;; What that means, is that the RTL descriptions of the below patterns
1804 ;; need to change depending on endianness.
1806 ;; Narrowing operations.
1808 (define_insn "aarch64_xtn2<mode>_insn_le"
1809 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1810 (vec_concat:<VNARROWQ2>
1811 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1812 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1813 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1814 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1815 [(set_attr "type" "neon_move_narrow_q")]
1818 (define_insn "aarch64_xtn2<mode>_insn_be"
1819 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1820 (vec_concat:<VNARROWQ2>
1821 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1822 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1823 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1824 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1825 [(set_attr "type" "neon_move_narrow_q")]
1828 (define_expand "aarch64_xtn2<mode>"
1829 [(match_operand:<VNARROWQ2> 0 "register_operand")
1830 (match_operand:<VNARROWQ> 1 "register_operand")
1831 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1834 if (BYTES_BIG_ENDIAN)
1835 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1838 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1844 (define_insn "*aarch64_narrow_trunc<mode>"
1845 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1846 (vec_concat:<VNARROWQ2>
1847 (truncate:<VNARROWQ>
1848 (match_operand:VQN 1 "register_operand" "w"))
1849 (truncate:<VNARROWQ>
1850 (match_operand:VQN 2 "register_operand" "w"))))]
1853 if (!BYTES_BIG_ENDIAN)
1854 return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1856 return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1858 [(set_attr "type" "neon_permute<q>")]
1863 (define_expand "vec_pack_trunc_<mode>"
1864 [(match_operand:<VNARROWD> 0 "register_operand")
1865 (match_operand:VDN 1 "general_operand")
1866 (match_operand:VDN 2 "general_operand")]
1869 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1870 emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2]));
1871 emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1877 (define_expand "vec_pack_trunc_<mode>"
1878 [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1879 (vec_concat:<VNARROWQ2>
1880 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1881 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1884 rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1885 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1886 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1888 emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1890 if (BYTES_BIG_ENDIAN)
1891 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1894 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1900 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1901 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1902 (vec_concat:<VNARROWQ2>
1903 (truncate:<VNARROWQ>
1904 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1905 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1906 (truncate:<VNARROWQ>
1907 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1909 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1910 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1911 [(set_attr "type" "neon_permute<q>")]
1914 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1915 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1916 (vec_concat:<VNARROWQ2>
1917 (truncate:<VNARROWQ>
1918 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1919 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1920 (truncate:<VNARROWQ>
1921 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1923 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1924 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1925 [(set_attr "type" "neon_permute<q>")]
1928 ;; Widening operations.
1930 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1931 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1932 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1933 (match_operand:VQW 1 "register_operand" "w")
1934 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1937 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1938 [(set_attr "type" "neon_shift_imm_long")]
1941 (define_insn_and_split "aarch64_simd_vec_unpack<su>_hi_<mode>"
1942 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1943 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1944 (match_operand:VQW 1 "register_operand" "w")
1945 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1948 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1949 "&& <CODE> == ZERO_EXTEND
1950 && aarch64_split_simd_shift_p (insn)"
1953 /* On many cores, it is cheaper to implement UXTL2 using a ZIP2 with zero,
1954 provided that the cost of the zero can be amortized over several
1955 operations. We'll later recombine the zero and zip if there are
1956 not sufficient uses of the zero to make the split worthwhile. */
1957 rtx res = simplify_gen_subreg (<MODE>mode, operands[0], <VWIDE>mode, 0);
1958 rtx zero = aarch64_gen_shareable_zero (<MODE>mode);
1959 emit_insn (gen_aarch64_zip2<mode> (res, operands[1], zero));
1962 [(set_attr "type" "neon_shift_imm_long")]
1965 (define_expand "vec_unpack<su>_hi_<mode>"
1966 [(match_operand:<VWIDE> 0 "register_operand")
1967 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1970 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1971 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1977 (define_expand "vec_unpack<su>_lo_<mode>"
1978 [(match_operand:<VWIDE> 0 "register_operand")
1979 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1982 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1983 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1989 ;; Widening arithmetic.
1991 (define_insn "*aarch64_<su>mlal_lo<mode>"
1992 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1995 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1996 (match_operand:VQW 2 "register_operand" "w")
1997 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1998 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1999 (match_operand:VQW 4 "register_operand" "w")
2001 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2003 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2004 [(set_attr "type" "neon_mla_<Vetype>_long")]
2007 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2008 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2011 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2012 (match_operand:VQW 2 "register_operand" "w")
2013 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2014 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2015 (match_operand:VQW 4 "register_operand" "w")
2017 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2019 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2020 [(set_attr "type" "neon_mla_<Vetype>_long")]
2023 (define_expand "aarch64_<su>mlal_hi<mode>"
2024 [(match_operand:<VWIDE> 0 "register_operand")
2025 (match_operand:<VWIDE> 1 "register_operand")
2026 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2027 (match_operand:VQW 3 "register_operand")]
2030 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2031 emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2032 operands[2], p, operands[3]));
2037 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2038 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2043 (match_operand:VQ_HSI 2 "register_operand" "w")
2044 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2045 (vec_duplicate:<VWIDE>
2046 (ANY_EXTEND:<VWIDE_S>
2047 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2048 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2050 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2051 [(set_attr "type" "neon_mla_<Vetype>_long")]
2054 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2055 [(match_operand:<VWIDE> 0 "register_operand")
2056 (match_operand:<VWIDE> 1 "register_operand")
2057 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2058 (match_operand:<VEL> 3 "register_operand")]
2061 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2062 emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2063 operands[1], operands[2], p, operands[3]));
2068 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2069 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2071 (match_operand:<VWIDE> 1 "register_operand" "0")
2073 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2074 (match_operand:VQW 2 "register_operand" "w")
2075 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2076 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2077 (match_operand:VQW 4 "register_operand" "w")
2080 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2081 [(set_attr "type" "neon_mla_<Vetype>_long")]
2084 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2085 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2087 (match_operand:<VWIDE> 1 "register_operand" "0")
2089 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2090 (match_operand:VQW 2 "register_operand" "w")
2091 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2092 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2093 (match_operand:VQW 4 "register_operand" "w")
2096 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2097 [(set_attr "type" "neon_mla_<Vetype>_long")]
2100 (define_expand "aarch64_<su>mlsl_hi<mode>"
2101 [(match_operand:<VWIDE> 0 "register_operand")
2102 (match_operand:<VWIDE> 1 "register_operand")
2103 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2104 (match_operand:VQW 3 "register_operand")]
2107 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108 emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2109 operands[2], p, operands[3]));
2114 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2115 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2117 (match_operand:<VWIDE> 1 "register_operand" "0")
2121 (match_operand:VQ_HSI 2 "register_operand" "w")
2122 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2123 (vec_duplicate:<VWIDE>
2124 (ANY_EXTEND:<VWIDE_S>
2125 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2127 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2128 [(set_attr "type" "neon_mla_<Vetype>_long")]
2131 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2132 [(match_operand:<VWIDE> 0 "register_operand")
2133 (match_operand:<VWIDE> 1 "register_operand")
2134 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2135 (match_operand:<VEL> 3 "register_operand")]
2138 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2139 emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2140 operands[1], operands[2], p, operands[3]));
2145 (define_insn "aarch64_<su>mlal<mode>"
2146 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2150 (match_operand:VD_BHSI 2 "register_operand" "w"))
2152 (match_operand:VD_BHSI 3 "register_operand" "w")))
2153 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2155 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2156 [(set_attr "type" "neon_mla_<Vetype>_long")]
2159 (define_insn "aarch64_<su>mlal_n<mode>"
2160 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2164 (match_operand:VD_HSI 2 "register_operand" "w"))
2165 (vec_duplicate:<VWIDE>
2166 (ANY_EXTEND:<VWIDE_S>
2167 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2168 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2170 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2171 [(set_attr "type" "neon_mla_<Vetype>_long")]
2174 (define_insn "aarch64_<su>mlsl<mode>"
2175 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2177 (match_operand:<VWIDE> 1 "register_operand" "0")
2180 (match_operand:VD_BHSI 2 "register_operand" "w"))
2182 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2184 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2185 [(set_attr "type" "neon_mla_<Vetype>_long")]
2188 (define_insn "aarch64_<su>mlsl_n<mode>"
2189 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2191 (match_operand:<VWIDE> 1 "register_operand" "0")
2194 (match_operand:VD_HSI 2 "register_operand" "w"))
2195 (vec_duplicate:<VWIDE>
2196 (ANY_EXTEND:<VWIDE_S>
2197 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2199 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2200 [(set_attr "type" "neon_mla_<Vetype>_long")]
2203 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2204 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2205 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2206 (match_operand:VQW 1 "register_operand" "w")
2207 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2208 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2209 (match_operand:VQW 2 "register_operand" "w")
2212 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2213 [(set_attr "type" "neon_mul_<Vetype>_long")]
2216 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2218 (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2219 (match_operand:VD_BHSI 1 "register_operand" "w"))
2221 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2223 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2224 [(set_attr "type" "neon_mul_<Vetype>_long")]
2227 (define_expand "vec_widen_<su>mult_lo_<mode>"
2228 [(match_operand:<VWIDE> 0 "register_operand")
2229 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2230 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2233 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2234 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2241 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2242 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2243 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2244 (match_operand:VQW 1 "register_operand" "w")
2245 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2246 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2247 (match_operand:VQW 2 "register_operand" "w")
2250 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2251 [(set_attr "type" "neon_mul_<Vetype>_long")]
2254 (define_expand "vec_widen_<su>mult_hi_<mode>"
2255 [(match_operand:<VWIDE> 0 "register_operand")
2256 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2257 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2260 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2261 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2269 ;; vmull_lane_s16 intrinsics
2270 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2271 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2274 (match_operand:<VCOND> 1 "register_operand" "w"))
2275 (vec_duplicate:<VWIDE>
2276 (ANY_EXTEND:<VWIDE_S>
2278 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2279 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2282 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2283 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2285 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2288 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2289 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2293 (match_operand:VQ_HSI 1 "register_operand" "w")
2294 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2295 (vec_duplicate:<VWIDE>
2296 (ANY_EXTEND:<VWIDE_S>
2298 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2299 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2302 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2303 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2305 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2308 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2309 [(match_operand:<VWIDE> 0 "register_operand")
2310 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2311 (match_operand:<VCOND> 2 "register_operand")
2312 (match_operand:SI 3 "immediate_operand")]
2315 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2316 emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2317 operands[1], p, operands[2], operands[3]));
2322 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2323 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2327 (match_operand:VQ_HSI 1 "register_operand" "w")
2328 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2329 (vec_duplicate:<VWIDE>
2330 (ANY_EXTEND:<VWIDE_S>
2332 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2333 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2336 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2337 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2339 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2342 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2343 [(match_operand:<VWIDE> 0 "register_operand")
2344 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2345 (match_operand:<VCONQ> 2 "register_operand")
2346 (match_operand:SI 3 "immediate_operand")]
2349 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2350 emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2351 operands[1], p, operands[2], operands[3]));
2356 (define_insn "aarch64_<su>mull_n<mode>"
2357 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2360 (match_operand:VD_HSI 1 "register_operand" "w"))
2361 (vec_duplicate:<VWIDE>
2362 (ANY_EXTEND:<VWIDE_S>
2363 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2365 "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2366 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2369 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2370 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2374 (match_operand:VQ_HSI 1 "register_operand" "w")
2375 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2376 (vec_duplicate:<VWIDE>
2377 (ANY_EXTEND:<VWIDE_S>
2378 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2380 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2381 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2384 (define_expand "aarch64_<su>mull_hi_n<mode>"
2385 [(match_operand:<VWIDE> 0 "register_operand")
2386 (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2387 (match_operand:<VEL> 2 "register_operand")]
2390 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2391 emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2397 ;; vmlal_lane_s16 intrinsics
2398 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2399 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2403 (match_operand:<VCOND> 2 "register_operand" "w"))
2404 (vec_duplicate:<VWIDE>
2405 (ANY_EXTEND:<VWIDE_S>
2407 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2408 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2409 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2412 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2413 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2415 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2418 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2419 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2424 (match_operand:VQ_HSI 2 "register_operand" "w")
2425 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2426 (vec_duplicate:<VWIDE>
2427 (ANY_EXTEND:<VWIDE_S>
2429 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2430 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2431 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2434 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2435 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2437 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2440 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2441 [(match_operand:<VWIDE> 0 "register_operand")
2442 (match_operand:<VWIDE> 1 "register_operand")
2443 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2444 (match_operand:<VCOND> 3 "register_operand")
2445 (match_operand:SI 4 "immediate_operand")]
2448 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2449 emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2450 operands[1], operands[2], p, operands[3], operands[4]));
2455 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2456 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2461 (match_operand:VQ_HSI 2 "register_operand" "w")
2462 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2463 (vec_duplicate:<VWIDE>
2464 (ANY_EXTEND:<VWIDE_S>
2466 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2467 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2468 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2471 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2472 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2474 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2477 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2478 [(match_operand:<VWIDE> 0 "register_operand")
2479 (match_operand:<VWIDE> 1 "register_operand")
2480 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2481 (match_operand:<VCONQ> 3 "register_operand")
2482 (match_operand:SI 4 "immediate_operand")]
2485 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2486 emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2487 operands[1], operands[2], p, operands[3], operands[4]));
2492 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2493 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2495 (match_operand:<VWIDE> 1 "register_operand" "0")
2498 (match_operand:<VCOND> 2 "register_operand" "w"))
2499 (vec_duplicate:<VWIDE>
2500 (ANY_EXTEND:<VWIDE_S>
2502 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2503 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2506 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2507 return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2509 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2512 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2513 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2515 (match_operand:<VWIDE> 1 "register_operand" "0")
2519 (match_operand:VQ_HSI 2 "register_operand" "w")
2520 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2521 (vec_duplicate:<VWIDE>
2522 (ANY_EXTEND:<VWIDE_S>
2524 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2525 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2529 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2530 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2532 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2535 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2536 [(match_operand:<VWIDE> 0 "register_operand")
2537 (match_operand:<VWIDE> 1 "register_operand")
2538 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2539 (match_operand:<VCOND> 3 "register_operand")
2540 (match_operand:SI 4 "immediate_operand")]
2543 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2544 emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2545 operands[1], operands[2], p, operands[3], operands[4]));
2550 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2551 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2553 (match_operand:<VWIDE> 1 "register_operand" "0")
2557 (match_operand:VQ_HSI 2 "register_operand" "w")
2558 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2559 (vec_duplicate:<VWIDE>
2560 (ANY_EXTEND:<VWIDE_S>
2562 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2563 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2567 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2568 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2570 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2573 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2574 [(match_operand:<VWIDE> 0 "register_operand")
2575 (match_operand:<VWIDE> 1 "register_operand")
2576 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2577 (match_operand:<VCONQ> 3 "register_operand")
2578 (match_operand:SI 4 "immediate_operand")]
2581 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2582 emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2583 operands[1], operands[2], p, operands[3], operands[4]));
2588 ;; FP vector operations.
2589 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
2590 ;; double-precision (64-bit) floating-point data types and arithmetic as
2591 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
2592 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2594 ;; Floating-point operations can raise an exception. Vectorizing such
2595 ;; operations are safe because of reasons explained below.
2597 ;; ARMv8 permits an extension to enable trapped floating-point
2598 ;; exception handling, however this is an optional feature. In the
2599 ;; event of a floating-point exception being raised by vectorised
2601 ;; 1. If trapped floating-point exceptions are available, then a trap
2602 ;; will be taken when any lane raises an enabled exception. A trap
2603 ;; handler may determine which lane raised the exception.
2604 ;; 2. Alternatively a sticky exception flag is set in the
2605 ;; floating-point status register (FPSR). Software may explicitly
2606 ;; test the exception flags, in which case the tests will either
2607 ;; prevent vectorisation, allowing precise identification of the
2608 ;; failing operation, or if tested outside of vectorisable regions
2609 ;; then the specific operation and lane are not of interest.
2611 ;; FP arithmetic operations.
2613 (define_insn "add<mode>3<vczle><vczbe>"
2614 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2615 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2616 (match_operand:VHSDF 2 "register_operand" "w")))]
2618 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2619 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2622 (define_insn "sub<mode>3<vczle><vczbe>"
2623 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2624 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2625 (match_operand:VHSDF 2 "register_operand" "w")))]
2627 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2628 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2631 (define_insn "mul<mode>3<vczle><vczbe>"
2632 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2633 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2634 (match_operand:VHSDF 2 "register_operand" "w")))]
2636 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2637 [(set_attr "type" "neon_fp_mul_<stype><q>")]
2640 (define_expand "div<mode>3"
2641 [(set (match_operand:VHSDF 0 "register_operand")
2642 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2643 (match_operand:VHSDF 2 "register_operand")))]
2646 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2649 operands[1] = force_reg (<MODE>mode, operands[1]);
2652 (define_insn "*div<mode>3<vczle><vczbe>"
2653 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2654 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2655 (match_operand:VHSDF 2 "register_operand" "w")))]
2657 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2658 [(set_attr "type" "neon_fp_div_<stype><q>")]
2661 (define_insn "neg<mode>2<vczle><vczbe>"
2662 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2663 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2665 "fneg\\t%0.<Vtype>, %1.<Vtype>"
2666 [(set_attr "type" "neon_fp_neg_<stype><q>")]
2669 (define_insn "abs<mode>2<vczle><vczbe>"
2670 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2671 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2673 "fabs\\t%0.<Vtype>, %1.<Vtype>"
2674 [(set_attr "type" "neon_fp_abs_<stype><q>")]
2677 (define_expand "aarch64_float_mla<mode>"
2678 [(set (match_operand:VDQF_DF 0 "register_operand")
2681 (match_operand:VDQF_DF 2 "register_operand")
2682 (match_operand:VDQF_DF 3 "register_operand"))
2683 (match_operand:VDQF_DF 1 "register_operand")))]
2686 rtx scratch = gen_reg_rtx (<MODE>mode);
2687 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2688 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2693 (define_expand "aarch64_float_mls<mode>"
2694 [(set (match_operand:VDQF_DF 0 "register_operand")
2696 (match_operand:VDQF_DF 1 "register_operand")
2698 (match_operand:VDQF_DF 2 "register_operand")
2699 (match_operand:VDQF_DF 3 "register_operand"))))]
2702 rtx scratch = gen_reg_rtx (<MODE>mode);
2703 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2704 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2709 (define_expand "aarch64_float_mla_n<mode>"
2710 [(set (match_operand:VDQSF 0 "register_operand")
2713 (vec_duplicate:VDQSF
2714 (match_operand:<VEL> 3 "register_operand"))
2715 (match_operand:VDQSF 2 "register_operand"))
2716 (match_operand:VDQSF 1 "register_operand")))]
2719 rtx scratch = gen_reg_rtx (<MODE>mode);
2720 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2721 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2726 (define_expand "aarch64_float_mls_n<mode>"
2727 [(set (match_operand:VDQSF 0 "register_operand")
2729 (match_operand:VDQSF 1 "register_operand")
2731 (vec_duplicate:VDQSF
2732 (match_operand:<VEL> 3 "register_operand"))
2733 (match_operand:VDQSF 2 "register_operand"))))]
2736 rtx scratch = gen_reg_rtx (<MODE>mode);
2737 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2738 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2743 (define_expand "aarch64_float_mla_lane<mode>"
2744 [(set (match_operand:VDQSF 0 "register_operand")
2747 (vec_duplicate:VDQSF
2749 (match_operand:V2SF 3 "register_operand")
2750 (parallel [(match_operand:SI 4 "immediate_operand")])))
2751 (match_operand:VDQSF 2 "register_operand"))
2752 (match_operand:VDQSF 1 "register_operand")))]
2755 rtx scratch = gen_reg_rtx (<MODE>mode);
2756 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2757 operands[3], operands[4]));
2758 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2763 (define_expand "aarch64_float_mls_lane<mode>"
2764 [(set (match_operand:VDQSF 0 "register_operand")
2766 (match_operand:VDQSF 1 "register_operand")
2768 (vec_duplicate:VDQSF
2770 (match_operand:V2SF 3 "register_operand")
2771 (parallel [(match_operand:SI 4 "immediate_operand")])))
2772 (match_operand:VDQSF 2 "register_operand"))))]
2775 rtx scratch = gen_reg_rtx (<MODE>mode);
2776 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2777 operands[3], operands[4]));
2778 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2783 (define_expand "aarch64_float_mla_laneq<mode>"
2784 [(set (match_operand:VDQSF 0 "register_operand")
2787 (vec_duplicate:VDQSF
2789 (match_operand:V4SF 3 "register_operand")
2790 (parallel [(match_operand:SI 4 "immediate_operand")])))
2791 (match_operand:VDQSF 2 "register_operand"))
2792 (match_operand:VDQSF 1 "register_operand")))]
2795 rtx scratch = gen_reg_rtx (<MODE>mode);
2796 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2797 operands[3], operands[4]));
2798 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2803 (define_expand "aarch64_float_mls_laneq<mode>"
2804 [(set (match_operand:VDQSF 0 "register_operand")
2806 (match_operand:VDQSF 1 "register_operand")
2808 (vec_duplicate:VDQSF
2810 (match_operand:V4SF 3 "register_operand")
2811 (parallel [(match_operand:SI 4 "immediate_operand")])))
2812 (match_operand:VDQSF 2 "register_operand"))))]
2815 rtx scratch = gen_reg_rtx (<MODE>mode);
2816 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2817 operands[3], operands[4]));
2818 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2823 (define_insn "fma<mode>4<vczle><vczbe>"
2824 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2825 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2826 (match_operand:VHSDF 2 "register_operand" "w")
2827 (match_operand:VHSDF 3 "register_operand" "0")))]
2829 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2830 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2833 (define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
2834 [(set (match_operand:VDQF 0 "register_operand" "=w")
2838 (match_operand:VDQF 1 "register_operand" "<h_con>")
2839 (parallel [(match_operand:SI 2 "immediate_operand")])))
2840 (match_operand:VDQF 3 "register_operand" "w")
2841 (match_operand:VDQF 4 "register_operand" "0")))]
2844 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2845 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2847 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2850 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2851 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2853 (vec_duplicate:VDQSF
2855 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2856 (parallel [(match_operand:SI 2 "immediate_operand")])))
2857 (match_operand:VDQSF 3 "register_operand" "w")
2858 (match_operand:VDQSF 4 "register_operand" "0")))]
2861 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2862 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2864 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2867 (define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
2868 [(set (match_operand:VMUL 0 "register_operand" "=w")
2871 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2872 (match_operand:VMUL 2 "register_operand" "w")
2873 (match_operand:VMUL 3 "register_operand" "0")))]
2875 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2876 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2879 (define_insn "*aarch64_fma4_elt_to_64v2df"
2880 [(set (match_operand:DF 0 "register_operand" "=w")
2883 (match_operand:V2DF 1 "register_operand" "w")
2884 (parallel [(match_operand:SI 2 "immediate_operand")]))
2885 (match_operand:DF 3 "register_operand" "w")
2886 (match_operand:DF 4 "register_operand" "0")))]
2889 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2890 return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2892 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2895 (define_insn "fnma<mode>4<vczle><vczbe>"
2896 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2898 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2899 (match_operand:VHSDF 2 "register_operand" "w")
2900 (match_operand:VHSDF 3 "register_operand" "0")))]
2902 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2903 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2906 (define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
2907 [(set (match_operand:VDQF 0 "register_operand" "=w")
2910 (match_operand:VDQF 3 "register_operand" "w"))
2913 (match_operand:VDQF 1 "register_operand" "<h_con>")
2914 (parallel [(match_operand:SI 2 "immediate_operand")])))
2915 (match_operand:VDQF 4 "register_operand" "0")))]
2918 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2919 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2921 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2924 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2925 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2928 (match_operand:VDQSF 3 "register_operand" "w"))
2929 (vec_duplicate:VDQSF
2931 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2932 (parallel [(match_operand:SI 2 "immediate_operand")])))
2933 (match_operand:VDQSF 4 "register_operand" "0")))]
2936 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2937 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2939 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2942 (define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
2943 [(set (match_operand:VMUL 0 "register_operand" "=w")
2946 (match_operand:VMUL 2 "register_operand" "w"))
2948 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2949 (match_operand:VMUL 3 "register_operand" "0")))]
2951 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2952 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2955 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2956 [(set (match_operand:DF 0 "register_operand" "=w")
2959 (match_operand:V2DF 1 "register_operand" "w")
2960 (parallel [(match_operand:SI 2 "immediate_operand")]))
2962 (match_operand:DF 3 "register_operand" "w"))
2963 (match_operand:DF 4 "register_operand" "0")))]
2966 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2967 return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2969 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2972 ;; Vector versions of the floating-point frint patterns.
2973 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2974 (define_insn "<frint_pattern><mode>2<vczle><vczbe>"
2975 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2976 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2979 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2980 [(set_attr "type" "neon_fp_round_<stype><q>")]
2983 ;; Vector versions of the fcvt standard patterns.
2984 ;; Expands to lbtrunc, lround, lceil, lfloor
2985 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2986 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2987 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2988 [(match_operand:VHSDF 1 "register_operand" "w")]
2991 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2992 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2995 ;; HF Scalar variants of related SIMD instructions.
2996 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2997 [(set (match_operand:HI 0 "register_operand" "=w")
2998 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
3000 "TARGET_SIMD_F16INST"
3001 "fcvt<frint_suffix><su>\t%h0, %h1"
3002 [(set_attr "type" "neon_fp_to_int_s")]
3005 (define_insn "<optab>_trunchfhi2"
3006 [(set (match_operand:HI 0 "register_operand" "=w")
3007 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3008 "TARGET_SIMD_F16INST"
3009 "fcvtz<su>\t%h0, %h1"
3010 [(set_attr "type" "neon_fp_to_int_s")]
3013 (define_insn "<optab>hihf2"
3014 [(set (match_operand:HF 0 "register_operand" "=w")
3015 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3016 "TARGET_SIMD_F16INST"
3017 "<su_optab>cvtf\t%h0, %h1"
3018 [(set_attr "type" "neon_int_to_fp_s")]
3021 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3022 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3023 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3025 (match_operand:VDQF 1 "register_operand" "w")
3026 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3029 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3030 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3032 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3034 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3035 output_asm_insn (buf, operands);
3038 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3041 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3042 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3043 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3044 [(match_operand:VHSDF 1 "register_operand")]
3049 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3050 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3051 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3052 [(match_operand:VHSDF 1 "register_operand")]
3057 (define_expand "ftrunc<VHSDF:mode>2"
3058 [(set (match_operand:VHSDF 0 "register_operand")
3059 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3064 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3065 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3067 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3069 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3070 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3073 ;; Conversions between vectors of floats and doubles.
3074 ;; Contains a mix of patterns to match standard pattern names
3075 ;; and those for intrinsics.
3077 ;; Float widening operations.
3079 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3080 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3081 (float_extend:<VWIDE> (vec_select:<VHALF>
3082 (match_operand:VQ_HSF 1 "register_operand" "w")
3083 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3086 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3087 [(set_attr "type" "neon_fp_cvt_widen_s")]
3090 ;; Convert between fixed-point and floating-point (vector modes)
3092 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3093 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3094 (unspec:<VHSDF:FCVT_TARGET>
3095 [(match_operand:VHSDF 1 "register_operand" "w")
3096 (match_operand:SI 2 "immediate_operand" "i")]
3099 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3100 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3103 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3104 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3105 (unspec:<VDQ_HSDI:FCVT_TARGET>
3106 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3107 (match_operand:SI 2 "immediate_operand" "i")]
3110 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3111 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3114 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3115 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3116 ;; the meaning of HI and LO changes depending on the target endianness.
3117 ;; While elsewhere we map the higher numbered elements of a vector to
3118 ;; the lower architectural lanes of the vector, for these patterns we want
3119 ;; to always treat "hi" as referring to the higher architectural lanes.
3120 ;; Consequently, while the patterns below look inconsistent with our
3121 ;; other big-endian patterns their behavior is as required.
3123 (define_expand "vec_unpacks_lo_<mode>"
3124 [(match_operand:<VWIDE> 0 "register_operand")
3125 (match_operand:VQ_HSF 1 "register_operand")]
3128 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3129 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3135 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3136 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3137 (float_extend:<VWIDE> (vec_select:<VHALF>
3138 (match_operand:VQ_HSF 1 "register_operand" "w")
3139 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3142 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3143 [(set_attr "type" "neon_fp_cvt_widen_s")]
3146 (define_expand "vec_unpacks_hi_<mode>"
3147 [(match_operand:<VWIDE> 0 "register_operand")
3148 (match_operand:VQ_HSF 1 "register_operand")]
3151 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3152 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3157 (define_insn "aarch64_float_extend_lo_<Vwide>"
3158 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3159 (float_extend:<VWIDE>
3160 (match_operand:VDF 1 "register_operand" "w")))]
3162 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3163 [(set_attr "type" "neon_fp_cvt_widen_s")]
3166 ;; Float narrowing operations.
3168 (define_insn "aarch64_float_trunc_rodd_df"
3169 [(set (match_operand:SF 0 "register_operand" "=w")
3170 (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3174 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3177 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3178 [(set (match_operand:V2SF 0 "register_operand" "=w")
3179 (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3182 "fcvtxn\\t%0.2s, %1.2d"
3183 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3186 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3187 [(set (match_operand:V4SF 0 "register_operand" "=w")
3189 (match_operand:V2SF 1 "register_operand" "0")
3190 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3192 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3193 "fcvtxn2\\t%0.4s, %2.2d"
3194 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3197 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3198 [(set (match_operand:V4SF 0 "register_operand" "=w")
3200 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3202 (match_operand:V2SF 1 "register_operand" "0")))]
3203 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3204 "fcvtxn2\\t%0.4s, %2.2d"
3205 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3208 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3209 [(match_operand:V4SF 0 "register_operand")
3210 (match_operand:V2SF 1 "register_operand")
3211 (match_operand:V2DF 2 "register_operand")]
3214 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3215 ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3216 : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3217 emit_insn (gen (operands[0], operands[1], operands[2]));
3222 (define_insn "aarch64_float_truncate_lo_<mode><vczle><vczbe>"
3223 [(set (match_operand:VDF 0 "register_operand" "=w")
3225 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3227 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3228 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3231 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3232 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3234 (match_operand:VDF 1 "register_operand" "0")
3236 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3237 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3238 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3239 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3242 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3243 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3246 (match_operand:<VWIDE> 2 "register_operand" "w"))
3247 (match_operand:VDF 1 "register_operand" "0")))]
3248 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3249 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3250 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3253 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3254 [(match_operand:<VDBL> 0 "register_operand")
3255 (match_operand:VDF 1 "register_operand")
3256 (match_operand:<VWIDE> 2 "register_operand")]
3259 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3260 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3261 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3262 emit_insn (gen (operands[0], operands[1], operands[2]));
3267 (define_expand "vec_pack_trunc_v2df"
3268 [(set (match_operand:V4SF 0 "register_operand")
3270 (float_truncate:V2SF
3271 (match_operand:V2DF 1 "register_operand"))
3272 (float_truncate:V2SF
3273 (match_operand:V2DF 2 "register_operand"))
3277 rtx tmp = gen_reg_rtx (V2SFmode);
3278 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3279 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3281 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
3282 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3283 tmp, operands[hi]));
3288 (define_expand "vec_pack_trunc_df"
3289 [(set (match_operand:V2SF 0 "register_operand")
3291 (float_truncate:SF (match_operand:DF 1 "general_operand"))
3292 (float_truncate:SF (match_operand:DF 2 "general_operand"))))]
3295 rtx tmp = gen_reg_rtx (V2SFmode);
3296 emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2]));
3297 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
3303 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
3305 ;; a = (b < c) ? b : c;
3306 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3307 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3310 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3311 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3312 ;; operand will be returned when both operands are zero (i.e. they may not
3313 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
3314 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3317 (define_insn "<su><maxmin><mode>3"
3318 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3319 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3320 (match_operand:VHSDF 2 "register_operand" "w")))]
3322 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3323 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3326 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3327 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3328 ;; which implement the IEEE fmax ()/fmin () functions.
3329 (define_insn "<fmaxmin><mode>3<vczle><vczbe>"
3330 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3331 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3332 (match_operand:VHSDF 2 "register_operand" "w")]
3335 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3336 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3339 ;; 'across lanes' add.
3341 (define_insn "aarch64_faddp<mode><vczle><vczbe>"
3342 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3343 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3344 (match_operand:VHSDF 2 "register_operand" "w")]
3347 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3348 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3351 (define_insn "reduc_plus_scal_<mode>"
3352 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3353 (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
3356 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3357 [(set_attr "type" "neon_reduc_add<q>")]
3360 (define_insn "reduc_plus_scal_v2si"
3361 [(set (match_operand:SI 0 "register_operand" "=w")
3362 (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
3365 "addp\\t%0.2s, %1.2s, %1.2s"
3366 [(set_attr "type" "neon_reduc_add")]
3369 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3370 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3371 [(set (match_operand:GPI 0 "register_operand" "=w")
3373 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3376 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3377 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3380 (define_insn "reduc_plus_scal_<mode>"
3381 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3382 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3385 "faddp\\t%<Vetype>0, %1.<Vtype>"
3386 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3389 (define_expand "reduc_plus_scal_v4sf"
3390 [(set (match_operand:SF 0 "register_operand")
3391 (unspec:SF [(match_operand:V4SF 1 "register_operand")]
3395 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3396 rtx scratch = gen_reg_rtx (V4SFmode);
3397 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3398 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3399 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3403 ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
3404 ;; sign or zero-extends its elements.
3405 (define_insn "aarch64_<su>addlv<mode>"
3406 [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3408 [(ANY_EXTEND:<V2XWIDE>
3409 (match_operand:VDQV_L 1 "register_operand" "w"))]
3412 "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3413 [(set_attr "type" "neon_reduc_add<q>")]
3416 ;; An ADDV over a vector PLUS of elements extracted and widened all from the
3417 ;; same vector is the same as an [SU]ADDLV above, so long as all the elements
3418 ;; of that vector are used. We can greatly simplify the RTL expression using
3420 (define_insn_and_split "*aarch64_<su>addlv<mode>_reduction"
3421 [(set (match_operand:<VWIDE_S> 0 "register_operand")
3425 (ANY_EXTEND:<V2XWIDE>
3426 (match_operand:VDQV_L 1 "register_operand"))
3427 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3428 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3429 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half")))]
3431 "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3436 [(ANY_EXTEND:<V2XWIDE>
3442 ;; Similar to the above but for two-step zero-widening reductions.
3443 ;; We can push the outer zero_extend outside the ADDV unspec and make
3444 ;; use of the implicit high-part zeroing semantics of UADDLV to do it all
3445 ;; in a single instruction.
3446 (define_insn_and_split "*aarch64_uaddlv<mode>_reduction_2"
3447 [(set (match_operand:<VWIDE2X_S> 0 "register_operand" "=w")
3449 [(zero_extend:<VQUADW>
3452 (zero_extend:<V2XWIDE>
3453 (match_operand:VDQQH 1 "register_operand" "w"))
3454 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3455 (vec_select:<VDBLW> (zero_extend:<V2XWIDE> (match_dup 1))
3456 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half"))))]
3458 "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3462 (zero_extend:<VWIDE2X_S>
3464 [(zero_extend:<V2XWIDE>
3470 ;; Zero-extending version of the above. As these intrinsics produce a scalar
3471 ;; value that may be used by further intrinsics we want to avoid moving the
3472 ;; result into GP regs to do a zero-extension that ADDLV/ADDLP gives for free.
3474 (define_insn "*aarch64_<su>addlv<VDQV_L:mode>_ze<GPI:mode>"
3475 [(set (match_operand:GPI 0 "register_operand" "=w")
3478 [(ANY_EXTEND:<VDQV_L:V2XWIDE>
3479 (match_operand:VDQV_L 1 "register_operand" "w"))]
3482 && (GET_MODE_SIZE (<GPI:MODE>mode) > GET_MODE_SIZE (<VWIDE_S>mode))"
3483 "<su>addl<VDQV_L:vp>\\t%<VDQV_L:Vwstype>0<VDQV_L:Vwsuf>, %1.<VDQV_L:Vtype>"
3484 [(set_attr "type" "neon_reduc_add<VDQV_L:q>")]
3487 (define_expand "aarch64_<su>addlp<mode>"
3488 [(set (match_operand:<VDBLW> 0 "register_operand")
3491 (ANY_EXTEND:<V2XWIDE>
3492 (match_operand:VDQV_L 1 "register_operand"))
3494 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3498 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
3499 operands[2] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
3500 operands[3] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
3504 (define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn"
3505 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3508 (ANY_EXTEND:<V2XWIDE>
3509 (match_operand:VDQV_L 1 "register_operand" "w"))
3510 (match_operand:<V2XWIDE> 2 "vect_par_cnst_even_or_odd_half"))
3511 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3512 (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half"))))]
3514 && !rtx_equal_p (operands[2], operands[3])"
3515 "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3516 [(set_attr "type" "neon_reduc_add<q>")]
3519 (define_insn "clrsb<mode>2<vczle><vczbe>"
3520 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3521 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3523 "cls\\t%0.<Vtype>, %1.<Vtype>"
3524 [(set_attr "type" "neon_cls<q>")]
3527 (define_insn "clz<mode>2<vczle><vczbe>"
3528 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3529 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3531 "clz\\t%0.<Vtype>, %1.<Vtype>"
3532 [(set_attr "type" "neon_cls<q>")]
3535 (define_insn "popcount<mode>2<vczle><vczbe>"
3536 [(set (match_operand:VB 0 "register_operand" "=w")
3537 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3539 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3540 [(set_attr "type" "neon_cnt<q>")]
3543 ;; 'across lanes' max and min ops.
3545 ;; Template for outputting a scalar, so we can create __builtins which can be
3546 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
3547 (define_expand "reduc_<optab>_scal_<mode>"
3548 [(match_operand:<VEL> 0 "register_operand")
3549 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3553 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3554 rtx scratch = gen_reg_rtx (<MODE>mode);
3555 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3557 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3562 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3563 [(match_operand:<VEL> 0 "register_operand")
3564 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3568 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3573 ;; Likewise for integer cases, signed and unsigned.
3574 (define_expand "reduc_<optab>_scal_<mode>"
3575 [(match_operand:<VEL> 0 "register_operand")
3576 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3580 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3581 rtx scratch = gen_reg_rtx (<MODE>mode);
3582 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3584 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3589 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3590 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3591 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3594 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3595 [(set_attr "type" "neon_reduc_minmax<q>")]
3598 (define_insn "aarch64_reduc_<optab>_internalv2si"
3599 [(set (match_operand:V2SI 0 "register_operand" "=w")
3600 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3603 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3604 [(set_attr "type" "neon_reduc_minmax")]
3607 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3608 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3609 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3612 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3613 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3616 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3618 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3621 ;; Thus our BSL is of the form:
3622 ;; op0 = bsl (mask, op2, op3)
3623 ;; We can use any of:
3626 ;; bsl mask, op1, op2
3627 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3628 ;; bit op0, op2, mask
3629 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3630 ;; bif op0, op1, mask
3632 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3633 ;; Some forms of straight-line code may generate the equivalent form
3634 ;; in *aarch64_simd_bsl<mode>_alt.
3636 (define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
3637 [(set (match_operand:VDQ_I 0 "register_operand")
3641 (match_operand:<V_INT_EQUIV> 3 "register_operand")
3642 (match_operand:VDQ_I 2 "register_operand"))
3643 (match_operand:VDQ_I 1 "register_operand"))
3644 (match_dup:<V_INT_EQUIV> 3)
3647 {@ [ cons: =0 , 1 , 2 , 3 ]
3648 [ w , 0 , w , w ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3649 [ w , w , w , 0 ] bit\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3650 [ w , w , 0 , w ] bif\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3652 [(set_attr "type" "neon_bsl<q>")]
3655 ;; We need this form in addition to the above pattern to match the case
3656 ;; when combine tries merging three insns such that the second operand of
3657 ;; the outer XOR matches the second operand of the inner XOR rather than
3658 ;; the first. The two are equivalent but since recog doesn't try all
3659 ;; permutations of commutative operations, we have to have a separate pattern.
3661 (define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
3662 [(set (match_operand:VDQ_I 0 "register_operand")
3666 (match_operand:VDQ_I 3 "register_operand")
3667 (match_operand:<V_INT_EQUIV> 2 "register_operand"))
3668 (match_operand:VDQ_I 1 "register_operand"))
3669 (match_dup:<V_INT_EQUIV> 2)))]
3671 {@ [ cons: =0 , 1 , 2 , 3 ]
3672 [ w , 0 , w , w ] bsl\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3673 [ w , w , 0 , w ] bit\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3674 [ w , w , w , 0 ] bif\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3676 [(set_attr "type" "neon_bsl<q>")]
3679 ;; DImode is special, we want to avoid computing operations which are
3680 ;; more naturally computed in general purpose registers in the vector
3681 ;; registers. If we do that, we need to move all three operands from general
3682 ;; purpose registers to vector registers, then back again. However, we
3683 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3684 ;; optimizations based on the component operations of a BSL.
3686 ;; That means we need a splitter back to the individual operations, if they
3687 ;; would be better calculated on the integer side.
3689 (define_insn_and_split "aarch64_simd_bsldi_internal"
3690 [(set (match_operand:DI 0 "register_operand")
3694 (match_operand:DI 3 "register_operand")
3695 (match_operand:DI 2 "register_operand"))
3696 (match_operand:DI 1 "register_operand"))
3700 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3701 [ w , 0 , w , w ; neon_bsl , 4 ] bsl\t%0.8b, %2.8b, %3.8b
3702 [ w , w , w , 0 ; neon_bsl , 4 ] bit\t%0.8b, %2.8b, %1.8b
3703 [ w , w , 0 , w ; neon_bsl , 4 ] bif\t%0.8b, %3.8b, %1.8b
3704 [ &r , r , r , r ; multiple , 12 ] #
3706 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3707 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3709 /* Split back to individual operations. If we're before reload, and
3710 able to create a temporary register, do so. If we're after reload,
3711 we've got an early-clobber destination register, so use that.
3712 Otherwise, we can't create pseudos and we can't yet guarantee that
3713 operands[0] is safe to write, so FAIL to split. */
3716 if (reload_completed)
3717 scratch = operands[0];
3718 else if (can_create_pseudo_p ())
3719 scratch = gen_reg_rtx (DImode);
3723 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3724 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3725 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3730 (define_insn_and_split "aarch64_simd_bsldi_alt"
3731 [(set (match_operand:DI 0 "register_operand")
3735 (match_operand:DI 3 "register_operand")
3736 (match_operand:DI 2 "register_operand"))
3737 (match_operand:DI 1 "register_operand"))
3741 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3742 [ w , 0 , w , w ; neon_bsl , 4 ] bsl\t%0.8b, %3.8b, %2.8b
3743 [ w , w , 0 , w ; neon_bsl , 4 ] bit\t%0.8b, %3.8b, %1.8b
3744 [ w , w , w , 0 ; neon_bsl , 4 ] bif\t%0.8b, %2.8b, %1.8b
3745 [ &r , r , r , r ; multiple , 12 ] #
3747 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3748 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3750 /* Split back to individual operations. If we're before reload, and
3751 able to create a temporary register, do so. If we're after reload,
3752 we've got an early-clobber destination register, so use that.
3753 Otherwise, we can't create pseudos and we can't yet guarantee that
3754 operands[0] is safe to write, so FAIL to split. */
3757 if (reload_completed)
3758 scratch = operands[0];
3759 else if (can_create_pseudo_p ())
3760 scratch = gen_reg_rtx (DImode);
3764 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3765 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3766 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3771 (define_expand "aarch64_simd_bsl<mode>"
3772 [(match_operand:VALLDIF 0 "register_operand")
3773 (match_operand:<V_INT_EQUIV> 1 "register_operand")
3774 (match_operand:VALLDIF 2 "register_operand")
3775 (match_operand:VALLDIF 3 "register_operand")]
3778 /* We can't alias operands together if they have different modes. */
3779 rtx tmp = operands[0];
3780 if (FLOAT_MODE_P (<MODE>mode))
3782 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3783 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3784 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3786 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3787 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3791 if (tmp != operands[0])
3792 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3797 (define_expand "vcond_mask_<mode><v_int_equiv>"
3798 [(match_operand:VALLDI 0 "register_operand")
3799 (match_operand:VALLDI 1 "nonmemory_operand")
3800 (match_operand:VALLDI 2 "nonmemory_operand")
3801 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3804 /* If we have (a = (P) ? -1 : 0);
3805 Then we can simply move the generated mask (result must be int). */
3806 if (operands[1] == CONSTM1_RTX (<MODE>mode)
3807 && operands[2] == CONST0_RTX (<MODE>mode))
3808 emit_move_insn (operands[0], operands[3]);
3809 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
3810 else if (operands[1] == CONST0_RTX (<MODE>mode)
3811 && operands[2] == CONSTM1_RTX (<MODE>mode))
3812 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3815 if (!REG_P (operands[1]))
3816 operands[1] = force_reg (<MODE>mode, operands[1]);
3817 if (!REG_P (operands[2]))
3818 operands[2] = force_reg (<MODE>mode, operands[2]);
3819 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3820 operands[1], operands[2]));
3826 ;; Patterns comparing two vectors and conditionally jump
3828 (define_expand "cbranch<mode>4"
3831 (match_operator 0 "aarch64_equality_operator"
3832 [(match_operand:VDQ_I 1 "register_operand")
3833 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")])
3834 (label_ref (match_operand 3 ""))
3838 auto code = GET_CODE (operands[0]);
3839 rtx tmp = operands[1];
3841 /* If comparing against a non-zero vector we have to do a comparison first
3842 so we can have a != 0 comparison with the result. */
3843 if (operands[2] != CONST0_RTX (<MODE>mode))
3845 tmp = gen_reg_rtx (<MODE>mode);
3846 emit_insn (gen_xor<mode>3 (tmp, operands[1], operands[2]));
3849 /* For 64-bit vectors we need no reductions. */
3850 if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
3852 /* Always reduce using a V4SI. */
3853 rtx reduc = gen_lowpart (V4SImode, tmp);
3854 rtx res = gen_reg_rtx (V4SImode);
3855 emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
3856 emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
3859 rtx val = gen_reg_rtx (DImode);
3860 emit_move_insn (val, gen_lowpart (DImode, tmp));
3862 rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx);
3863 rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx);
3864 emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));
3868 ;; Patterns comparing two vectors to produce a mask.
3870 (define_expand "vec_cmp<mode><mode>"
3871 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3872 (match_operator 1 "comparison_operator"
3873 [(match_operand:VSDQ_I_DI 2 "register_operand")
3874 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3877 rtx mask = operands[0];
3878 enum rtx_code code = GET_CODE (operands[1]);
3888 if (operands[3] == CONST0_RTX (<MODE>mode))
3893 if (!REG_P (operands[3]))
3894 operands[3] = force_reg (<MODE>mode, operands[3]);
3902 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3906 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3910 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3914 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
3918 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
3922 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
3926 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
3930 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
3934 /* Handle NE as !EQ. */
3935 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3936 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
3940 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3950 (define_expand "vec_cmp<mode><v_int_equiv>"
3951 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3952 (match_operator 1 "comparison_operator"
3953 [(match_operand:VDQF 2 "register_operand")
3954 (match_operand:VDQF 3 "nonmemory_operand")]))]
3957 int use_zero_form = 0;
3958 enum rtx_code code = GET_CODE (operands[1]);
3959 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3961 rtx (*comparison) (rtx, rtx, rtx) = NULL;
3970 if (operands[3] == CONST0_RTX (<MODE>mode))
3977 if (!REG_P (operands[3]))
3978 operands[3] = force_reg (<MODE>mode, operands[3]);
3988 comparison = gen_aarch64_cmlt<mode>;
3993 std::swap (operands[2], operands[3]);
3997 comparison = gen_aarch64_cmgt<mode>;
4002 comparison = gen_aarch64_cmle<mode>;
4007 std::swap (operands[2], operands[3]);
4011 comparison = gen_aarch64_cmge<mode>;
4015 comparison = gen_aarch64_cmeq<mode>;
4033 /* All of the above must not raise any FP exceptions. Thus we first
4034 check each operand for NaNs and force any elements containing NaN to
4035 zero before using them in the compare.
4036 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4037 (cm<cc> (isnan (a) ? 0.0 : a,
4038 isnan (b) ? 0.0 : b))
4039 We use the following transformations for doing the comparisions:
4043 a UNLT b -> b GT a. */
4045 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4046 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4047 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4048 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4049 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4050 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4051 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4052 lowpart_subreg (<V_INT_EQUIV>mode,
4055 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4056 lowpart_subreg (<V_INT_EQUIV>mode,
4059 gcc_assert (comparison != NULL);
4060 emit_insn (comparison (operands[0],
4061 lowpart_subreg (<MODE>mode,
4062 tmp0, <V_INT_EQUIV>mode),
4063 lowpart_subreg (<MODE>mode,
4064 tmp1, <V_INT_EQUIV>mode)));
4065 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
4075 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
4076 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
4082 a NE b -> ~(a EQ b) */
4083 gcc_assert (comparison != NULL);
4084 emit_insn (comparison (operands[0], operands[2], operands[3]));
4086 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4090 /* LTGT is not guranteed to not generate a FP exception. So let's
4091 go the faster way : ((a > b) || (b > a)). */
4092 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4093 operands[2], operands[3]));
4094 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4095 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4101 /* cmeq (a, a) & cmeq (b, b). */
4102 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4103 operands[2], operands[2]));
4104 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4105 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4107 if (code == UNORDERED)
4108 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4109 else if (code == UNEQ)
4111 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4112 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
4123 (define_expand "vec_cmpu<mode><mode>"
4124 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4125 (match_operator 1 "comparison_operator"
4126 [(match_operand:VSDQ_I_DI 2 "register_operand")
4127 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4130 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4131 operands[2], operands[3]));
4135 (define_expand "vcond<mode><mode>"
4136 [(set (match_operand:VALLDI 0 "register_operand")
4137 (if_then_else:VALLDI
4138 (match_operator 3 "comparison_operator"
4139 [(match_operand:VALLDI 4 "register_operand")
4140 (match_operand:VALLDI 5 "nonmemory_operand")])
4141 (match_operand:VALLDI 1 "nonmemory_operand")
4142 (match_operand:VALLDI 2 "nonmemory_operand")))]
4145 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4146 enum rtx_code code = GET_CODE (operands[3]);
4148 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4149 it as well as switch operands 1/2 in order to avoid the additional
4153 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4154 operands[4], operands[5]);
4155 std::swap (operands[1], operands[2]);
4157 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4158 operands[4], operands[5]));
4159 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4160 operands[2], mask));
4165 (define_expand "vcond<v_cmp_mixed><mode>"
4166 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
4167 (if_then_else:<V_cmp_mixed>
4168 (match_operator 3 "comparison_operator"
4169 [(match_operand:VDQF_COND 4 "register_operand")
4170 (match_operand:VDQF_COND 5 "nonmemory_operand")])
4171 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
4172 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
4175 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4176 enum rtx_code code = GET_CODE (operands[3]);
4178 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4179 it as well as switch operands 1/2 in order to avoid the additional
4183 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4184 operands[4], operands[5]);
4185 std::swap (operands[1], operands[2]);
4187 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4188 operands[4], operands[5]));
4189 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
4190 operands[0], operands[1],
4191 operands[2], mask));
4196 (define_expand "vcondu<mode><mode>"
4197 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4198 (if_then_else:VSDQ_I_DI
4199 (match_operator 3 "comparison_operator"
4200 [(match_operand:VSDQ_I_DI 4 "register_operand")
4201 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
4202 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
4203 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
4206 rtx mask = gen_reg_rtx (<MODE>mode);
4207 enum rtx_code code = GET_CODE (operands[3]);
4209 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4210 it as well as switch operands 1/2 in order to avoid the additional
4214 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4215 operands[4], operands[5]);
4216 std::swap (operands[1], operands[2]);
4218 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
4219 operands[4], operands[5]));
4220 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4221 operands[2], mask));
4225 (define_expand "vcondu<mode><v_cmp_mixed>"
4226 [(set (match_operand:VDQF 0 "register_operand")
4228 (match_operator 3 "comparison_operator"
4229 [(match_operand:<V_cmp_mixed> 4 "register_operand")
4230 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
4231 (match_operand:VDQF 1 "nonmemory_operand")
4232 (match_operand:VDQF 2 "nonmemory_operand")))]
4235 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4236 enum rtx_code code = GET_CODE (operands[3]);
4238 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4239 it as well as switch operands 1/2 in order to avoid the additional
4243 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4244 operands[4], operands[5]);
4245 std::swap (operands[1], operands[2]);
4247 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
4249 operands[4], operands[5]));
4250 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4251 operands[2], mask));
4255 ;; Patterns for AArch64 SIMD Intrinsics.
4257 ;; Lane extraction with sign extension to general purpose register.
4258 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4259 [(set (match_operand:GPI 0 "register_operand" "=r")
4261 (vec_select:<VDQQH:VEL>
4262 (match_operand:VDQQH 1 "register_operand" "w")
4263 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4266 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4267 INTVAL (operands[2]));
4268 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4270 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4273 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4274 [(set (match_operand:GPI 0 "register_operand" "=r")
4276 (vec_select:<VDQQH:VEL>
4277 (match_operand:VDQQH 1 "register_operand" "w")
4278 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4281 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4282 INTVAL (operands[2]));
4283 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4285 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4288 ;; Lane extraction of a value, neither sign nor zero extension
4289 ;; is guaranteed so upper bits should be considered undefined.
4290 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4291 ;; Extracting lane zero is split into a simple move when it is between SIMD
4292 ;; registers or a store.
4293 (define_insn_and_split "aarch64_get_lane<mode>"
4294 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4296 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4297 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4300 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4301 switch (which_alternative)
4304 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4306 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4308 return "st1\\t{%1.<Vetype>}[%2], %0";
4313 "&& reload_completed
4314 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4315 [(set (match_dup 0) (match_dup 1))]
4317 operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4319 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4322 (define_insn "*aarch64_get_high<mode>"
4323 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
4325 (match_operand:VQ_2E 1 "register_operand" "w")
4326 (parallel [(match_operand:SI 2 "immediate_operand")])))]
4327 "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
4329 [(set_attr "type" "f_mrc")]
4332 (define_insn "load_pair_lanes<mode>"
4333 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4335 (match_operand:VDCSIF 1 "memory_operand" "Utq")
4336 (match_operand:VDCSIF 2 "memory_operand" "m")))]
4338 && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
4339 "ldr\\t%<single_dtype>0, %1"
4340 [(set_attr "type" "neon_load1_1reg<dblq>")]
4343 ;; This STP pattern is a partial duplicate of the general vec_concat patterns
4344 ;; below. The reason for having both of them is that the alternatives of
4345 ;; the later patterns do not have consistent register preferences: the STP
4346 ;; alternatives have no preference between GPRs and FPRs (and if anything,
4347 ;; the GPR form is more natural for scalar integers) whereas the other
4348 ;; alternatives *require* an FPR for operand 1 and prefer one for operand 2.
4350 ;; Using "*" to hide the STP alternatives from the RA penalizes cases in
4351 ;; which the destination was always memory. On the other hand, expressing
4352 ;; the true preferences makes GPRs seem more palatable than they really are
4353 ;; for register destinations.
4355 ;; Despite that, we do still want the general form to have STP alternatives,
4356 ;; in order to handle cases where a register destination is spilled.
4358 ;; The best compromise therefore seemed to be to have a dedicated STP
4359 ;; pattern to catch cases in which the destination was always memory.
4360 ;; This dedicated pattern must come first.
4362 (define_insn "store_pair_lanes<mode>"
4363 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand")
4365 (match_operand:VDCSIF 1 "register_operand")
4366 (match_operand:VDCSIF 2 "register_operand")))]
4368 {@ [ cons: =0 , 1 , 2 ; attrs: type ]
4369 [ Umn , w , w ; neon_stp ] stp\t%<single_type>1, %<single_type>2, %y0
4370 [ Umn , r , r ; store_16 ] stp\t%<single_wx>1, %<single_wx>2, %y0
4374 ;; Form a vector whose least significant half comes from operand 1 and whose
4375 ;; most significant half comes from operand 2. The register alternatives
4376 ;; tie the least significant half to the same register as the destination,
4377 ;; so that only the other half needs to be handled explicitly. For the
4378 ;; reasons given above, the STP alternatives use ? for constraints that
4379 ;; the register alternatives either don't accept or themselves disparage.
4381 (define_insn "*aarch64_combine_internal<mode>"
4382 [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4384 (match_operand:VDCSIF 1 "register_operand")
4385 (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")))]
4387 && !BYTES_BIG_ENDIAN
4388 && (register_operand (operands[0], <VDBL>mode)
4389 || register_operand (operands[2], <MODE>mode))"
4390 {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ]
4391 [ w , 0 , w ; neon_ins<dblq> , simd ] ins\t%0.<single_type>[1], %2.<single_type>[0]
4392 [ w , 0 , ?r ; neon_from_gp<dblq> , simd ] ins\t%0.<single_type>[1], %<single_wx>2
4393 [ w , 0 , ?r ; f_mcr , * ] fmov\t%0.d[1], %2
4394 [ w , 0 , Utv ; neon_load1_one_lane<dblq> , simd ] ld1\t{%0.<single_type>}[1], %2
4395 [ Umn , ?w , w ; neon_stp , * ] stp\t%<single_type>1, %<single_type>2, %y0
4396 [ Umn , ?r , ?r ; store_16 , * ] stp\t%<single_wx>1, %<single_wx>2, %y0
4400 (define_insn "*aarch64_combine_internal_be<mode>"
4401 [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4403 (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")
4404 (match_operand:VDCSIF 1 "register_operand")))]
4407 && (register_operand (operands[0], <VDBL>mode)
4408 || register_operand (operands[2], <MODE>mode))"
4409 {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ]
4410 [ w , 0 , w ; neon_ins<dblq> , simd ] ins\t%0.<single_type>[1], %2.<single_type>[0]
4411 [ w , 0 , ?r ; neon_from_gp<dblq> , simd ] ins\t%0.<single_type>[1], %<single_wx>2
4412 [ w , 0 , ?r ; f_mcr , * ] fmov\t%0.d[1], %2
4413 [ w , 0 , Utv ; neon_load1_one_lane<dblq> , simd ] ld1\t{%0.<single_type>}[1], %2
4414 [ Umn , ?w , ?w ; neon_stp , * ] stp\t%<single_type>2, %<single_type>1, %y0
4415 [ Umn , ?r , ?r ; store_16 , * ] stp\t%<single_wx>2, %<single_wx>1, %y0
4419 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4422 (define_insn "*aarch64_combinez<mode>"
4423 [(set (match_operand:<VDBL> 0 "register_operand")
4425 (match_operand:VDCSIF 1 "nonimmediate_operand")
4426 (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
4427 "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
4428 {@ [ cons: =0 , 1 ; attrs: type ]
4429 [ w , w ; neon_move<q> ] fmov\t%<single_type>0, %<single_type>1
4430 [ w , ?r ; neon_from_gp ] fmov\t%<single_type>0, %<single_wx>1
4431 [ w , m ; neon_load1_1reg ] ldr\t%<single_type>0, %1
4435 (define_insn "*aarch64_combinez_be<mode>"
4436 [(set (match_operand:<VDBL> 0 "register_operand")
4438 (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
4439 (match_operand:VDCSIF 1 "nonimmediate_operand")))]
4440 "TARGET_FLOAT && BYTES_BIG_ENDIAN"
4441 {@ [ cons: =0 , 1 ; attrs: type ]
4442 [ w , w ; neon_move<q> ] fmov\t%<single_type>0, %<single_type>1
4443 [ w , ?r ; neon_from_gp ] fmov\t%<single_type>0, %<single_wx>1
4444 [ w , m ; neon_load1_1reg ] ldr\t%<single_type>0, %1
4448 ;; Form a vector whose first half (in array order) comes from operand 1
4449 ;; and whose second half (in array order) comes from operand 2.
4450 ;; This operand order follows the RTL vec_concat operation.
4451 (define_expand "@aarch64_vec_concat<mode>"
4452 [(set (match_operand:<VDBL> 0 "register_operand")
4454 (match_operand:VDCSIF 1 "general_operand")
4455 (match_operand:VDCSIF 2 "general_operand")))]
4458 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
4459 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
4461 if (MEM_P (operands[1])
4462 && MEM_P (operands[2])
4463 && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
4464 /* Use load_pair_lanes<mode>. */
4466 else if (operands[hi] == CONST0_RTX (<MODE>mode))
4468 /* Use *aarch64_combinez<mode>. */
4469 if (!nonimmediate_operand (operands[lo], <MODE>mode))
4470 operands[lo] = force_reg (<MODE>mode, operands[lo]);
4474 /* Use *aarch64_combine_internal<mode>. */
4475 operands[lo] = force_reg (<MODE>mode, operands[lo]);
4476 if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
4478 if (MEM_P (operands[hi]))
4480 rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
4481 operands[hi] = replace_equiv_address (operands[hi], addr);
4484 operands[hi] = force_reg (<MODE>mode, operands[hi]);
4489 ;; Form a vector whose least significant half comes from operand 1 and whose
4490 ;; most significant half comes from operand 2. This operand order follows
4491 ;; arm_neon.h vcombine* intrinsics.
4492 (define_expand "aarch64_combine<mode>"
4493 [(match_operand:<VDBL> 0 "register_operand")
4494 (match_operand:VDC 1 "general_operand")
4495 (match_operand:VDC 2 "general_operand")]
4498 if (BYTES_BIG_ENDIAN)
4499 std::swap (operands[1], operands[2]);
4500 emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
4506 ;; <su><addsub>l<q>.
4508 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4509 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4510 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4511 (match_operand:VQW 1 "register_operand" "w")
4512 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4513 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4514 (match_operand:VQW 2 "register_operand" "w")
4517 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4518 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4521 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4522 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4523 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4524 (match_operand:VQW 1 "register_operand" "w")
4525 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4526 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4527 (match_operand:VQW 2 "register_operand" "w")
4530 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4531 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4534 (define_expand "vec_widen_<su>add_lo_<mode>"
4535 [(match_operand:<VWIDE> 0 "register_operand")
4536 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4537 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4540 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4541 emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4546 (define_expand "vec_widen_<su>add_hi_<mode>"
4547 [(match_operand:<VWIDE> 0 "register_operand")
4548 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4549 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4552 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4553 emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4558 (define_expand "vec_widen_<su>sub_lo_<mode>"
4559 [(match_operand:<VWIDE> 0 "register_operand")
4560 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4561 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4564 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4565 emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4570 (define_expand "vec_widen_<su>sub_hi_<mode>"
4571 [(match_operand:<VWIDE> 0 "register_operand")
4572 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4573 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4576 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4577 emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4582 (define_expand "aarch64_saddl2<mode>"
4583 [(match_operand:<VWIDE> 0 "register_operand")
4584 (match_operand:VQW 1 "register_operand")
4585 (match_operand:VQW 2 "register_operand")]
4588 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4589 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4594 (define_expand "aarch64_uaddl2<mode>"
4595 [(match_operand:<VWIDE> 0 "register_operand")
4596 (match_operand:VQW 1 "register_operand")
4597 (match_operand:VQW 2 "register_operand")]
4600 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4601 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4606 (define_expand "aarch64_ssubl2<mode>"
4607 [(match_operand:<VWIDE> 0 "register_operand")
4608 (match_operand:VQW 1 "register_operand")
4609 (match_operand:VQW 2 "register_operand")]
4612 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4613 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4618 (define_expand "aarch64_usubl2<mode>"
4619 [(match_operand:<VWIDE> 0 "register_operand")
4620 (match_operand:VQW 1 "register_operand")
4621 (match_operand:VQW 2 "register_operand")]
4624 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4625 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4630 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4631 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4632 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4633 (match_operand:VD_BHSI 1 "register_operand" "w"))
4635 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4637 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4638 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4641 ;; <su><addsub>w<q>.
4643 (define_expand "widen_ssum<mode>3"
4644 [(set (match_operand:<VDBLW> 0 "register_operand")
4645 (plus:<VDBLW> (sign_extend:<VDBLW>
4646 (match_operand:VQW 1 "register_operand"))
4647 (match_operand:<VDBLW> 2 "register_operand")))]
4650 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4651 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4653 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4655 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4660 (define_expand "widen_ssum<mode>3"
4661 [(set (match_operand:<VWIDE> 0 "register_operand")
4662 (plus:<VWIDE> (sign_extend:<VWIDE>
4663 (match_operand:VD_BHSI 1 "register_operand"))
4664 (match_operand:<VWIDE> 2 "register_operand")))]
4667 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4671 (define_expand "widen_usum<mode>3"
4672 [(set (match_operand:<VDBLW> 0 "register_operand")
4673 (plus:<VDBLW> (zero_extend:<VDBLW>
4674 (match_operand:VQW 1 "register_operand"))
4675 (match_operand:<VDBLW> 2 "register_operand")))]
4678 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4679 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4681 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4683 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4688 (define_expand "widen_usum<mode>3"
4689 [(set (match_operand:<VWIDE> 0 "register_operand")
4690 (plus:<VWIDE> (zero_extend:<VWIDE>
4691 (match_operand:VD_BHSI 1 "register_operand"))
4692 (match_operand:<VWIDE> 2 "register_operand")))]
4695 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4699 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4700 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4701 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4703 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4705 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4706 [(set_attr "type" "neon_sub_widen")]
4709 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4710 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4711 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4714 (match_operand:VQW 2 "register_operand" "w")
4715 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4717 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4718 [(set_attr "type" "neon_sub_widen")]
4721 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4722 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4723 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4726 (match_operand:VQW 2 "register_operand" "w")
4727 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4729 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4730 [(set_attr "type" "neon_sub_widen")]
4733 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4734 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4736 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4737 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4739 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4740 [(set_attr "type" "neon_add_widen")]
4743 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4744 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4748 (match_operand:VQW 2 "register_operand" "w")
4749 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4750 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4752 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4753 [(set_attr "type" "neon_add_widen")]
4756 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4757 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4761 (match_operand:VQW 2 "register_operand" "w")
4762 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4763 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4765 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4766 [(set_attr "type" "neon_add_widen")]
4769 (define_expand "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>"
4770 [(set (match_operand:<VWIDE> 0 "register_operand")
4774 (match_operand:VQW 2 "register_operand")
4776 (match_operand:<VWIDE> 1 "register_operand")))]
4779 /* We still do an emit_insn rather than relying on the pattern above
4780 because for the MINUS case the operands would need to be swapped
4783 = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4784 emit_insn (gen_aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal(
4792 ;; <su><r>h<addsub>.
4794 (define_expand "<su_optab>avg<mode>3_floor"
4795 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4799 (ANY_EXTEND:<V2XWIDE>
4800 (match_operand:VDQ_BHSI 1 "register_operand"))
4801 (ANY_EXTEND:<V2XWIDE>
4802 (match_operand:VDQ_BHSI 2 "register_operand")))
4806 operands[3] = CONST1_RTX (<V2XWIDE>mode);
4810 (define_expand "<su_optab>avg<mode>3_ceil"
4811 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4816 (ANY_EXTEND:<V2XWIDE>
4817 (match_operand:VDQ_BHSI 1 "register_operand"))
4818 (ANY_EXTEND:<V2XWIDE>
4819 (match_operand:VDQ_BHSI 2 "register_operand")))
4824 operands[3] = CONST1_RTX (<V2XWIDE>mode);
4828 (define_expand "aarch64_<su>hsub<mode>"
4829 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4833 (ANY_EXTEND:<V2XWIDE>
4834 (match_operand:VDQ_BHSI 1 "register_operand"))
4835 (ANY_EXTEND:<V2XWIDE>
4836 (match_operand:VDQ_BHSI 2 "register_operand")))
4840 operands[3] = CONST1_RTX (<V2XWIDE>mode);
4844 (define_insn "*aarch64_<su>h<ADDSUB:optab><mode><vczle><vczbe>_insn"
4845 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4849 (ANY_EXTEND:<V2XWIDE>
4850 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4851 (ANY_EXTEND:<V2XWIDE>
4852 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4853 (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))))]
4855 "<su>h<ADDSUB:optab>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4856 [(set_attr "type" "neon_<ADDSUB:optab>_halve<q>")]
4859 (define_insn "*aarch64_<su>rhadd<mode><vczle><vczbe>_insn"
4860 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4865 (ANY_EXTEND:<V2XWIDE>
4866 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4867 (ANY_EXTEND:<V2XWIDE>
4868 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4869 (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))
4872 "<su>rhadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4873 [(set_attr "type" "neon_add_halve<q>")]
4876 ;; <r><addsub>hn<q>.
4878 (define_insn "aarch64_<optab>hn<mode>_insn<vczle><vczbe>"
4879 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4880 (truncate:<VNARROWQ>
4882 (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4883 (match_operand:VQN 2 "register_operand" "w"))
4884 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top"))))]
4886 "<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4887 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4890 (define_insn "aarch64_r<optab>hn<mode>_insn<vczle><vczbe>"
4891 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4892 (truncate:<VNARROWQ>
4895 (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4896 (match_operand:VQN 2 "register_operand" "w"))
4897 (match_operand:VQN 3 "aarch64_simd_raddsubhn_imm_vec"))
4898 (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top"))))]
4900 "r<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4901 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4904 (define_expand "aarch64_<optab>hn<mode>"
4905 [(set (match_operand:<VNARROWQ> 0 "register_operand")
4906 (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4907 (match_operand:VQN 2 "register_operand")))]
4911 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4912 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4913 emit_insn (gen_aarch64_<optab>hn<mode>_insn (operands[0], operands[1],
4914 operands[2], shft));
4919 (define_expand "aarch64_r<optab>hn<mode>"
4920 [(set (match_operand:<VNARROWQ> 0 "register_operand")
4921 (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4922 (match_operand:VQN 2 "register_operand")))]
4926 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4927 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4929 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4930 HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4931 emit_insn (gen_aarch64_r<optab>hn<mode>_insn (operands[0], operands[1],
4932 operands[2], rnd, shft));
4937 (define_insn "aarch64_<optab>hn2<mode>_insn_le"
4938 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4939 (vec_concat:<VNARROWQ2>
4940 (match_operand:<VNARROWQ> 1 "register_operand" "0")
4941 (truncate:<VNARROWQ>
4943 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4944 (match_operand:VQN 3 "register_operand" "w"))
4945 (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))))]
4946 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4947 "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4948 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4951 (define_insn "aarch64_r<optab>hn2<mode>_insn_le"
4952 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4953 (vec_concat:<VNARROWQ2>
4954 (match_operand:<VNARROWQ> 1 "register_operand" "0")
4955 (truncate:<VNARROWQ>
4958 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4959 (match_operand:VQN 3 "register_operand" "w"))
4960 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4961 (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))))]
4962 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4963 "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4964 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4967 (define_insn "aarch64_<optab>hn2<mode>_insn_be"
4968 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4969 (vec_concat:<VNARROWQ2>
4970 (truncate:<VNARROWQ>
4972 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4973 (match_operand:VQN 3 "register_operand" "w"))
4974 (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))
4975 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4976 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4977 "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4978 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4981 (define_insn "aarch64_r<optab>hn2<mode>_insn_be"
4982 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4983 (vec_concat:<VNARROWQ2>
4984 (truncate:<VNARROWQ>
4987 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4988 (match_operand:VQN 3 "register_operand" "w"))
4989 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4990 (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))
4991 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4992 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4993 "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4994 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4997 (define_expand "aarch64_<optab>hn2<mode>"
4998 [(match_operand:<VNARROWQ2> 0 "register_operand")
4999 (match_operand:<VNARROWQ> 1 "register_operand")
5000 (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
5001 (match_operand:VQN 3 "register_operand"))]
5005 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5006 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
5007 if (BYTES_BIG_ENDIAN)
5008 emit_insn (gen_aarch64_<optab>hn2<mode>_insn_be (operands[0],
5009 operands[1], operands[2], operands[3], shft));
5011 emit_insn (gen_aarch64_<optab>hn2<mode>_insn_le (operands[0],
5012 operands[1], operands[2], operands[3], shft));
5017 (define_expand "aarch64_r<optab>hn2<mode>"
5018 [(match_operand:<VNARROWQ2> 0 "register_operand")
5019 (match_operand:<VNARROWQ> 1 "register_operand")
5020 (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
5021 (match_operand:VQN 3 "register_operand"))]
5025 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5026 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
5028 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5029 HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
5030 if (BYTES_BIG_ENDIAN)
5031 emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_be (operands[0],
5032 operands[1], operands[2], operands[3], rnd, shft));
5034 emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_le (operands[0],
5035 operands[1], operands[2], operands[3], rnd, shft));
5040 ;; Optimize ((a + b) >> n) + c where n is half the bitsize of the vector
5041 (define_insn_and_split "*bitmask_shift_plus<mode>"
5042 [(set (match_operand:VQN 0 "register_operand" "=&w")
5045 (plus:VQN (match_operand:VQN 1 "register_operand" "w")
5046 (match_operand:VQN 2 "register_operand" "w"))
5047 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top" ""))
5048 (match_operand:VQN 4 "register_operand" "w")))]
5055 if (can_create_pseudo_p ())
5056 tmp = gen_reg_rtx (<VNARROWQ>mode);
5058 tmp = gen_rtx_REG (<VNARROWQ>mode, REGNO (operands[0]));
5059 emit_insn (gen_aarch64_addhn<mode> (tmp, operands[1], operands[2]));
5060 emit_insn (gen_aarch64_uaddw<Vnarrowq> (operands[0], operands[4], tmp));
5066 (define_insn "aarch64_pmul<mode>"
5067 [(set (match_operand:VB 0 "register_operand" "=w")
5068 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
5069 (match_operand:VB 2 "register_operand" "w")]
5072 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5073 [(set_attr "type" "neon_mul_<Vetype><q>")]
5076 (define_insn "aarch64_pmullv8qi"
5077 [(set (match_operand:V8HI 0 "register_operand" "=w")
5078 (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
5079 (match_operand:V8QI 2 "register_operand" "w")]
5082 "pmull\\t%0.8h, %1.8b, %2.8b"
5083 [(set_attr "type" "neon_mul_b_long")]
5086 (define_insn "aarch64_pmull_hiv16qi_insn"
5087 [(set (match_operand:V8HI 0 "register_operand" "=w")
5090 (match_operand:V16QI 1 "register_operand" "w")
5091 (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
5093 (match_operand:V16QI 2 "register_operand" "w")
5097 "pmull2\\t%0.8h, %1.16b, %2.16b"
5098 [(set_attr "type" "neon_mul_b_long")]
5101 (define_expand "aarch64_pmull_hiv16qi"
5102 [(match_operand:V8HI 0 "register_operand")
5103 (match_operand:V16QI 1 "register_operand")
5104 (match_operand:V16QI 2 "register_operand")]
5107 rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
5108 emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
5116 (define_insn "aarch64_fmulx<mode>"
5117 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5119 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5120 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5123 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5124 [(set_attr "type" "neon_fp_mul_<stype>")]
5127 ;; vmulxq_lane_f32, and vmulx_laneq_f32
5129 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
5130 [(set (match_operand:VDQSF 0 "register_operand" "=w")
5132 [(match_operand:VDQSF 1 "register_operand" "w")
5133 (vec_duplicate:VDQSF
5135 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
5136 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5140 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
5141 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5143 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
5146 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
5148 (define_insn "*aarch64_mulx_elt<mode>"
5149 [(set (match_operand:VDQF 0 "register_operand" "=w")
5151 [(match_operand:VDQF 1 "register_operand" "w")
5154 (match_operand:VDQF 2 "register_operand" "w")
5155 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5159 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5160 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5162 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
5167 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
5168 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5170 [(match_operand:VHSDF 1 "register_operand" "w")
5171 (vec_duplicate:VHSDF
5172 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5175 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
5176 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
5179 ;; vmulxs_lane_f32, vmulxs_laneq_f32
5180 ;; vmulxd_lane_f64 == vmulx_lane_f64
5181 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
5183 (define_insn "*aarch64_vgetfmulx<mode>"
5184 [(set (match_operand:<VEL> 0 "register_operand" "=w")
5186 [(match_operand:<VEL> 1 "register_operand" "w")
5188 (match_operand:VDQF 2 "register_operand" "w")
5189 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5193 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5194 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5196 [(set_attr "type" "fmul<Vetype>")]
5200 (define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
5201 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5202 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
5203 (match_operand:VSDQ_I 2 "register_operand" "w")))]
5205 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5206 [(set_attr "type" "neon_q<addsub><q>")]
5209 ;; suqadd and usqadd
5211 (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
5212 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5213 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5214 (match_operand:VSDQ_I 2 "register_operand" "w")]
5217 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5218 [(set_attr "type" "neon_qadd<q>")]
5221 ;; sqmovn and uqmovn
5223 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5224 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5225 (SAT_TRUNC:<VNARROWQ>
5226 (match_operand:SD_HSDI 1 "register_operand" "w")))]
5228 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5229 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5232 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5233 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5234 (SAT_TRUNC:<VNARROWQ>
5235 (match_operand:VQN 1 "register_operand" "w")))]
5237 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5238 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5241 (define_insn "aarch64_<su>qxtn2<mode>_le"
5242 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5243 (vec_concat:<VNARROWQ2>
5244 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5245 (SAT_TRUNC:<VNARROWQ>
5246 (match_operand:VQN 2 "register_operand" "w"))))]
5247 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5248 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5249 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5252 (define_insn "aarch64_<su>qxtn2<mode>_be"
5253 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5254 (vec_concat:<VNARROWQ2>
5255 (SAT_TRUNC:<VNARROWQ>
5256 (match_operand:VQN 2 "register_operand" "w"))
5257 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5258 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5259 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5260 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5263 (define_expand "aarch64_<su>qxtn2<mode>"
5264 [(match_operand:<VNARROWQ2> 0 "register_operand")
5265 (match_operand:<VNARROWQ> 1 "register_operand")
5266 (SAT_TRUNC:<VNARROWQ>
5267 (match_operand:VQN 2 "register_operand"))]
5270 if (BYTES_BIG_ENDIAN)
5271 emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5274 emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5282 (define_insn "aarch64_sqmovun<mode>"
5283 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5284 (truncate:<VNARROWQ>
5287 (match_operand:SD_HSDI 1 "register_operand" "w")
5289 (const_int <half_mask>))))]
5291 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5292 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5295 (define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
5296 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5297 (truncate:<VNARROWQ>
5299 (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5300 (match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
5301 (match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
5303 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5304 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5307 (define_expand "aarch64_sqmovun<mode>"
5308 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5309 (truncate:<VNARROWQ>
5311 (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5316 operands[2] = CONST0_RTX (<MODE>mode);
5318 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5319 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5323 (define_insn "aarch64_sqxtun2<mode>_le"
5324 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5325 (vec_concat:<VNARROWQ2>
5326 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5327 (truncate:<VNARROWQ>
5330 (match_operand:VQN 2 "register_operand" "w")
5331 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5332 (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
5333 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5334 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5335 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5338 (define_insn "aarch64_sqxtun2<mode>_be"
5339 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5340 (vec_concat:<VNARROWQ2>
5341 (truncate:<VNARROWQ>
5344 (match_operand:VQN 2 "register_operand" "w")
5345 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5346 (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
5347 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5348 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5349 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5350 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5353 (define_expand "aarch64_sqxtun2<mode>"
5354 [(match_operand:<VNARROWQ2> 0 "register_operand")
5355 (match_operand:<VNARROWQ> 1 "register_operand")
5356 (match_operand:VQN 2 "register_operand")]
5359 rtx zeros = CONST0_RTX (<MODE>mode);
5360 rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5361 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5362 if (BYTES_BIG_ENDIAN)
5363 emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5364 operands[2], zeros, half_umax));
5366 emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5367 operands[2], zeros, half_umax));
5374 (define_insn "aarch64_s<optab><mode><vczle><vczbe>"
5375 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5377 (match_operand:VSDQ_I 1 "register_operand" "w")))]
5379 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5380 [(set_attr "type" "neon_<optab><q>")]
5385 (define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
5386 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5388 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5389 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5392 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5393 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5396 (define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
5397 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5399 [(match_operand:VDQHS 1 "register_operand" "w")
5400 (vec_duplicate:VDQHS
5401 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5404 "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5405 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5410 (define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
5411 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5413 [(match_operand:VDQHS 1 "register_operand" "w")
5415 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5416 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5420 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5421 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5422 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5425 (define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
5426 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5428 [(match_operand:VDQHS 1 "register_operand" "w")
5430 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5431 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5435 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5436 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5437 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5440 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5441 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5443 [(match_operand:SD_HSI 1 "register_operand" "w")
5445 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5446 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5450 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5451 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5452 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5455 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5456 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5458 [(match_operand:SD_HSI 1 "register_operand" "w")
5460 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5461 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5465 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5466 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5467 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5472 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
5473 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5475 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5476 (match_operand:VSDQ_HSI 2 "register_operand" "w")
5477 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5480 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5481 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5484 ;; sqrdml[as]h_lane.
5486 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5487 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5489 [(match_operand:VDQHS 1 "register_operand" "0")
5490 (match_operand:VDQHS 2 "register_operand" "w")
5492 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5493 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5497 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5499 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5501 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5504 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5505 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5507 [(match_operand:SD_HSI 1 "register_operand" "0")
5508 (match_operand:SD_HSI 2 "register_operand" "w")
5510 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5511 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5515 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5517 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5519 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5522 ;; sqrdml[as]h_laneq.
5524 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5525 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5527 [(match_operand:VDQHS 1 "register_operand" "0")
5528 (match_operand:VDQHS 2 "register_operand" "w")
5530 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5531 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5535 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5537 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5539 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5542 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5543 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5545 [(match_operand:SD_HSI 1 "register_operand" "0")
5546 (match_operand:SD_HSI 2 "register_operand" "w")
5548 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5549 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5553 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5555 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5557 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5562 (define_insn "aarch64_sqdmlal<mode>"
5563 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5567 (sign_extend:<VWIDE>
5568 (match_operand:VSD_HSI 2 "register_operand" "w"))
5569 (sign_extend:<VWIDE>
5570 (match_operand:VSD_HSI 3 "register_operand" "w")))
5572 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5574 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5575 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5578 (define_insn "aarch64_sqdmlsl<mode>"
5579 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5581 (match_operand:<VWIDE> 1 "register_operand" "0")
5584 (sign_extend:<VWIDE>
5585 (match_operand:VSD_HSI 2 "register_operand" "w"))
5586 (sign_extend:<VWIDE>
5587 (match_operand:VSD_HSI 3 "register_operand" "w")))
5590 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5591 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5596 (define_insn "aarch64_sqdmlal_lane<mode>"
5597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5601 (sign_extend:<VWIDE>
5602 (match_operand:VD_HSI 2 "register_operand" "w"))
5603 (vec_duplicate:<VWIDE>
5604 (sign_extend:<VWIDE_S>
5606 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5607 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5610 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5613 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5615 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5617 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5620 (define_insn "aarch64_sqdmlsl_lane<mode>"
5621 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5623 (match_operand:<VWIDE> 1 "register_operand" "0")
5626 (sign_extend:<VWIDE>
5627 (match_operand:VD_HSI 2 "register_operand" "w"))
5628 (vec_duplicate:<VWIDE>
5629 (sign_extend:<VWIDE_S>
5631 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5632 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5637 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5639 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5641 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5645 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5646 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5648 (match_operand:<VWIDE> 1 "register_operand" "0")
5651 (sign_extend:<VWIDE>
5652 (match_operand:VD_HSI 2 "register_operand" "w"))
5653 (vec_duplicate:<VWIDE>
5654 (sign_extend:<VWIDE_S>
5656 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5657 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5662 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5664 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5666 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5669 (define_insn "aarch64_sqdmlal_laneq<mode>"
5670 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5674 (sign_extend:<VWIDE>
5675 (match_operand:VD_HSI 2 "register_operand" "w"))
5676 (vec_duplicate:<VWIDE>
5677 (sign_extend:<VWIDE_S>
5679 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5680 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5683 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5686 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5688 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5690 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5694 (define_insn "aarch64_sqdmlal_lane<mode>"
5695 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5699 (sign_extend:<VWIDE>
5700 (match_operand:SD_HSI 2 "register_operand" "w"))
5701 (sign_extend:<VWIDE>
5703 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5704 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5707 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5710 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5712 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5714 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5717 (define_insn "aarch64_sqdmlsl_lane<mode>"
5718 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5720 (match_operand:<VWIDE> 1 "register_operand" "0")
5723 (sign_extend:<VWIDE>
5724 (match_operand:SD_HSI 2 "register_operand" "w"))
5725 (sign_extend:<VWIDE>
5727 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5728 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5733 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5735 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5737 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5741 (define_insn "aarch64_sqdmlal_laneq<mode>"
5742 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5746 (sign_extend:<VWIDE>
5747 (match_operand:SD_HSI 2 "register_operand" "w"))
5748 (sign_extend:<VWIDE>
5750 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5751 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5754 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5757 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5759 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5761 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5764 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5765 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5767 (match_operand:<VWIDE> 1 "register_operand" "0")
5770 (sign_extend:<VWIDE>
5771 (match_operand:SD_HSI 2 "register_operand" "w"))
5772 (sign_extend:<VWIDE>
5774 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5775 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5780 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5782 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5784 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5789 (define_insn "aarch64_sqdmlsl_n<mode>"
5790 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5792 (match_operand:<VWIDE> 1 "register_operand" "0")
5795 (sign_extend:<VWIDE>
5796 (match_operand:VD_HSI 2 "register_operand" "w"))
5797 (vec_duplicate:<VWIDE>
5798 (sign_extend:<VWIDE_S>
5799 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5802 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5803 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5806 (define_insn "aarch64_sqdmlal_n<mode>"
5807 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5811 (sign_extend:<VWIDE>
5812 (match_operand:VD_HSI 2 "register_operand" "w"))
5813 (vec_duplicate:<VWIDE>
5814 (sign_extend:<VWIDE_S>
5815 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5817 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5819 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5820 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5826 (define_insn "aarch64_sqdmlal2<mode>_internal"
5827 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5831 (sign_extend:<VWIDE>
5833 (match_operand:VQ_HSI 2 "register_operand" "w")
5834 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5835 (sign_extend:<VWIDE>
5837 (match_operand:VQ_HSI 3 "register_operand" "w")
5840 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5842 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5843 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5846 (define_insn "aarch64_sqdmlsl2<mode>_internal"
5847 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5849 (match_operand:<VWIDE> 1 "register_operand" "0")
5852 (sign_extend:<VWIDE>
5854 (match_operand:VQ_HSI 2 "register_operand" "w")
5855 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5856 (sign_extend:<VWIDE>
5858 (match_operand:VQ_HSI 3 "register_operand" "w")
5862 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5863 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5866 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
5867 [(match_operand:<VWIDE> 0 "register_operand")
5869 (match_operand:<VWIDE> 1 "register_operand")
5871 (match_operand:VQ_HSI 2 "register_operand")
5872 (match_operand:VQ_HSI 3 "register_operand")]
5875 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5876 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
5877 operands[1], operands[2],
5884 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
5885 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5887 (match_operand:<VWIDE> 1 "register_operand" "0")
5890 (sign_extend:<VWIDE>
5892 (match_operand:VQ_HSI 2 "register_operand" "w")
5893 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5894 (vec_duplicate:<VWIDE>
5895 (sign_extend:<VWIDE_S>
5897 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5898 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5903 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5905 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5907 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5910 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
5911 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5915 (sign_extend:<VWIDE>
5917 (match_operand:VQ_HSI 2 "register_operand" "w")
5918 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5919 (vec_duplicate:<VWIDE>
5920 (sign_extend:<VWIDE_S>
5922 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5923 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5926 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5929 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5931 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5933 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5936 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
5937 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5939 (match_operand:<VWIDE> 1 "register_operand" "0")
5942 (sign_extend:<VWIDE>
5944 (match_operand:VQ_HSI 2 "register_operand" "w")
5945 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5946 (vec_duplicate:<VWIDE>
5947 (sign_extend:<VWIDE_S>
5949 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5950 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5955 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5957 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5959 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5962 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
5963 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5967 (sign_extend:<VWIDE>
5969 (match_operand:VQ_HSI 2 "register_operand" "w")
5970 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5971 (vec_duplicate:<VWIDE>
5972 (sign_extend:<VWIDE_S>
5974 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5975 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5978 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5981 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5983 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5985 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5988 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
5989 [(match_operand:<VWIDE> 0 "register_operand")
5991 (match_operand:<VWIDE> 1 "register_operand")
5993 (match_operand:VQ_HSI 2 "register_operand")
5994 (match_operand:<VCOND> 3 "register_operand")
5995 (match_operand:SI 4 "immediate_operand")]
5998 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5999 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
6000 operands[1], operands[2],
6001 operands[3], operands[4], p));
6005 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
6006 [(match_operand:<VWIDE> 0 "register_operand")
6008 (match_operand:<VWIDE> 1 "register_operand")
6010 (match_operand:VQ_HSI 2 "register_operand")
6011 (match_operand:<VCONQ> 3 "register_operand")
6012 (match_operand:SI 4 "immediate_operand")]
6015 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6016 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
6017 operands[1], operands[2],
6018 operands[3], operands[4], p));
6022 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
6023 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6025 (match_operand:<VWIDE> 1 "register_operand" "0")
6028 (sign_extend:<VWIDE>
6030 (match_operand:VQ_HSI 2 "register_operand" "w")
6031 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6032 (vec_duplicate:<VWIDE>
6033 (sign_extend:<VWIDE_S>
6034 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6037 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6038 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6041 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
6042 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6046 (sign_extend:<VWIDE>
6048 (match_operand:VQ_HSI 2 "register_operand" "w")
6049 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6050 (vec_duplicate:<VWIDE>
6051 (sign_extend:<VWIDE_S>
6052 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6054 (match_operand:<VWIDE> 1 "register_operand" "0")))]
6056 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6057 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6060 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
6061 [(match_operand:<VWIDE> 0 "register_operand")
6063 (match_operand:<VWIDE> 1 "register_operand")
6065 (match_operand:VQ_HSI 2 "register_operand")
6066 (match_operand:<VEL> 3 "register_operand")]
6069 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6070 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
6071 operands[1], operands[2],
6078 (define_insn "aarch64_sqdmull<mode>"
6079 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6082 (sign_extend:<VWIDE>
6083 (match_operand:VSD_HSI 1 "register_operand" "w"))
6084 (sign_extend:<VWIDE>
6085 (match_operand:VSD_HSI 2 "register_operand" "w")))
6088 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6089 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
6094 (define_insn "aarch64_sqdmull_lane<mode>"
6095 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6098 (sign_extend:<VWIDE>
6099 (match_operand:VD_HSI 1 "register_operand" "w"))
6100 (vec_duplicate:<VWIDE>
6101 (sign_extend:<VWIDE_S>
6103 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6104 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6109 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6110 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6112 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6115 (define_insn "aarch64_sqdmull_laneq<mode>"
6116 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6119 (sign_extend:<VWIDE>
6120 (match_operand:VD_HSI 1 "register_operand" "w"))
6121 (vec_duplicate:<VWIDE>
6122 (sign_extend:<VWIDE_S>
6124 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6125 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6130 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6131 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6133 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6136 (define_insn "aarch64_sqdmull_lane<mode>"
6137 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6140 (sign_extend:<VWIDE>
6141 (match_operand:SD_HSI 1 "register_operand" "w"))
6142 (sign_extend:<VWIDE>
6144 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6145 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6150 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6151 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6153 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6156 (define_insn "aarch64_sqdmull_laneq<mode>"
6157 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6160 (sign_extend:<VWIDE>
6161 (match_operand:SD_HSI 1 "register_operand" "w"))
6162 (sign_extend:<VWIDE>
6164 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6165 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6170 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6171 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6173 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6178 (define_insn "aarch64_sqdmull_n<mode>"
6179 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6182 (sign_extend:<VWIDE>
6183 (match_operand:VD_HSI 1 "register_operand" "w"))
6184 (vec_duplicate:<VWIDE>
6185 (sign_extend:<VWIDE_S>
6186 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6190 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6191 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6196 (define_insn "aarch64_sqdmull2<mode>_internal"
6197 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6200 (sign_extend:<VWIDE>
6202 (match_operand:VQ_HSI 1 "register_operand" "w")
6203 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6204 (sign_extend:<VWIDE>
6206 (match_operand:VQ_HSI 2 "register_operand" "w")
6211 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6212 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6215 (define_expand "aarch64_sqdmull2<mode>"
6216 [(match_operand:<VWIDE> 0 "register_operand")
6217 (match_operand:VQ_HSI 1 "register_operand")
6218 (match_operand:VQ_HSI 2 "register_operand")]
6221 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6222 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6229 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6230 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6233 (sign_extend:<VWIDE>
6235 (match_operand:VQ_HSI 1 "register_operand" "w")
6236 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6237 (vec_duplicate:<VWIDE>
6238 (sign_extend:<VWIDE_S>
6240 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6241 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6246 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6247 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6249 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6252 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6253 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6256 (sign_extend:<VWIDE>
6258 (match_operand:VQ_HSI 1 "register_operand" "w")
6259 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6260 (vec_duplicate:<VWIDE>
6261 (sign_extend:<VWIDE_S>
6263 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6264 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6269 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6270 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6272 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6275 (define_expand "aarch64_sqdmull2_lane<mode>"
6276 [(match_operand:<VWIDE> 0 "register_operand")
6277 (match_operand:VQ_HSI 1 "register_operand")
6278 (match_operand:<VCOND> 2 "register_operand")
6279 (match_operand:SI 3 "immediate_operand")]
6282 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6283 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6284 operands[2], operands[3],
6289 (define_expand "aarch64_sqdmull2_laneq<mode>"
6290 [(match_operand:<VWIDE> 0 "register_operand")
6291 (match_operand:VQ_HSI 1 "register_operand")
6292 (match_operand:<VCONQ> 2 "register_operand")
6293 (match_operand:SI 3 "immediate_operand")]
6296 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6297 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6298 operands[2], operands[3],
6305 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6306 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6309 (sign_extend:<VWIDE>
6311 (match_operand:VQ_HSI 1 "register_operand" "w")
6312 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6313 (vec_duplicate:<VWIDE>
6314 (sign_extend:<VWIDE_S>
6315 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6319 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6320 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6323 (define_expand "aarch64_sqdmull2_n<mode>"
6324 [(match_operand:<VWIDE> 0 "register_operand")
6325 (match_operand:VQ_HSI 1 "register_operand")
6326 (match_operand:<VEL> 2 "register_operand")]
6329 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6330 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6337 (define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
6338 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6340 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6341 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6344 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6345 [(set_attr "type" "neon_shift_reg<q>")]
6351 (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
6352 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6354 [(match_operand:VSDQ_I 1 "register_operand" "w")
6355 (match_operand:VSDQ_I 2 "register_operand" "w")]
6358 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6359 [(set_attr "type" "neon_sat_shift_reg<q>")]
6364 (define_insn "aarch64_<su>shll<mode>"
6365 [(set (match_operand:<VWIDE> 0 "register_operand")
6366 (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6367 (match_operand:VD_BHSI 1 "register_operand"))
6368 (match_operand:<VWIDE> 2
6369 "aarch64_simd_shll_imm_vec")))]
6372 [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6373 [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6375 [(set_attr "type" "neon_shift_imm_long")]
6378 (define_expand "aarch64_<sur>shll_n<mode>"
6379 [(set (match_operand:<VWIDE> 0 "register_operand")
6380 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
6382 "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6386 rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6387 emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
6394 (define_insn "aarch64_<su>shll2<mode>"
6395 [(set (match_operand:<VWIDE> 0 "register_operand")
6396 (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6398 (match_operand:VQW 1 "register_operand")
6399 (match_operand:VQW 2 "vect_par_cnst_hi_half")))
6400 (match_operand:<VWIDE> 3
6401 "aarch64_simd_shll_imm_vec")))]
6403 {@ [cons: =0, 1, 2, 3]
6404 [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6405 [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6407 [(set_attr "type" "neon_shift_imm_long")]
6410 (define_expand "aarch64_<sur>shll2_n<mode>"
6411 [(set (match_operand:<VWIDE> 0 "register_operand")
6412 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
6414 "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6418 rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6419 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6420 emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
6427 (define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn"
6428 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6432 (<SHIFTEXTEND>:<V2XWIDE>
6433 (match_operand:VSDQ_I_DI 1 "register_operand" "w"))
6434 (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6435 (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))]
6437 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6438 "<sra_op>rshr\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6439 [(set_attr "type" "neon_sat_shift_imm<q>")]
6442 (define_expand "aarch64_<sra_op>rshr_n<mode>"
6443 [(match_operand:VSDQ_I_DI 0 "register_operand")
6445 (match_operand:VSDQ_I_DI 1 "register_operand")
6446 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
6449 /* Use this expander to create the rounding constant vector, which is
6450 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6451 RTL is generated when handling the DImode expanders. */
6452 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6453 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6454 rtx shft = gen_int_mode (INTVAL (operands[2]), DImode);
6455 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6456 if (VECTOR_MODE_P (<MODE>mode))
6458 shft = gen_const_vec_duplicate (<MODE>mode, shft);
6459 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6462 emit_insn (gen_aarch64_<sra_op>rshr_n<mode>_insn (operands[0], operands[1],
6470 (define_insn "aarch64_<sur>sra_ndi"
6471 [(set (match_operand:DI 0 "register_operand" "=w")
6472 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6473 (match_operand:DI 2 "register_operand" "w")
6475 "aarch64_simd_shift_imm_offset_di" "i")]
6478 "<sur>sra\\t%d0, %d2, %3"
6479 [(set_attr "type" "neon_shift_acc")]
6484 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6485 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6486 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6487 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6489 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6492 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6493 [(set_attr "type" "neon_shift_imm<q>")]
6498 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6499 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6500 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6502 "aarch64_simd_shift_imm_<ve_mode>" "i")]
6505 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6506 [(set_attr "type" "neon_sat_shift_imm<q>")]
6512 (define_insn "aarch64_<shrn_op>shrn_n<mode>"
6513 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6514 (SAT_TRUNC:<VNARROWQ>
6515 (<TRUNC_SHIFT>:SD_HSDI
6516 (match_operand:SD_HSDI 1 "register_operand" "w")
6517 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6519 "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6520 [(set_attr "type" "neon_shift_imm_narrow_q")]
6523 (define_insn "*aarch64_<shrn_op><shrn_s>shrn_n<mode>_insn<vczle><vczbe>"
6524 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6525 (ALL_TRUNC:<VNARROWQ>
6527 (match_operand:VQN 1 "register_operand" "w")
6528 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6529 "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6530 "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6531 [(set_attr "type" "neon_shift_imm_narrow_q")]
6534 (define_expand "aarch64_<shrn_op>shrn_n<mode>"
6535 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6536 (ALL_TRUNC:<VNARROWQ>
6538 (match_operand:VQN 1 "register_operand")
6539 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6542 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6543 INTVAL (operands[2]));
6547 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
6548 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6549 (ALL_TRUNC:<VNARROWQ>
6550 (<TRUNC_SHIFT>:<V2XWIDE>
6552 (<TRUNCEXTEND>:<V2XWIDE>
6553 (match_operand:VQN 1 "register_operand" "w"))
6554 (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6555 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6557 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6558 "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6559 [(set_attr "type" "neon_shift_imm_narrow_q")]
6562 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
6563 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6564 (SAT_TRUNC:<VNARROWQ>
6565 (<TRUNC_SHIFT>:<DWI>
6567 (<TRUNCEXTEND>:<DWI>
6568 (match_operand:SD_HSDI 1 "register_operand" "w"))
6569 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6570 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6572 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6573 "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6574 [(set_attr "type" "neon_shift_imm_narrow_q")]
6577 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6578 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6579 (SAT_TRUNC:<VNARROWQ>
6580 (<TRUNC_SHIFT>:<V2XWIDE>
6582 (<TRUNCEXTEND>:<V2XWIDE>
6583 (match_operand:SD_HSDI 1 "register_operand"))
6585 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6588 /* Use this expander to create the rounding constant vector, which is
6589 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6590 RTL is generated when handling the DImode expanders. */
6591 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6592 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6593 operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6597 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6598 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6599 (ALL_TRUNC:<VNARROWQ>
6600 (<TRUNC_SHIFT>:<V2XWIDE>
6602 (<TRUNCEXTEND>:<V2XWIDE>
6603 (match_operand:VQN 1 "register_operand"))
6605 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6608 if (<CODE> == TRUNCATE
6609 && INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6611 rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
6612 emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
6615 /* Use this expander to create the rounding constant vector, which is
6616 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6617 RTL is generated when handling the DImode expanders. */
6618 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6619 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6620 operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6621 operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6622 operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6626 (define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
6627 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6628 (truncate:<VNARROWQ>
6632 (match_operand:VQN 1 "register_operand" "w")
6633 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6634 (match_operand:VQN 3 "aarch64_simd_imm_zero"))
6635 (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))]
6637 "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6638 [(set_attr "type" "neon_shift_imm_narrow_q")]
6641 (define_insn "aarch64_sqshrun_n<mode>_insn"
6642 [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
6646 (match_operand:SD_HSDI 1 "register_operand" "w")
6647 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6649 (const_int <half_mask>)))]
6651 "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6652 [(set_attr "type" "neon_shift_imm_narrow_q")]
6655 (define_expand "aarch64_sqshrun_n<mode>"
6656 [(match_operand:<VNARROWQ> 0 "register_operand")
6657 (match_operand:SD_HSDI 1 "register_operand")
6658 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6661 rtx dst = gen_reg_rtx (<MODE>mode);
6662 emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
6664 emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6669 (define_expand "aarch64_sqshrun_n<mode>"
6670 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6671 (truncate:<VNARROWQ>
6675 (match_operand:VQN 1 "register_operand")
6676 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6681 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6682 INTVAL (operands[2]));
6683 operands[3] = CONST0_RTX (<MODE>mode);
6685 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6686 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6690 (define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
6691 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6692 (truncate:<VNARROWQ>
6697 (sign_extend:<V2XWIDE>
6698 (match_operand:VQN 1 "register_operand" "w"))
6699 (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6700 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6701 (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
6702 (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
6704 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6705 "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6706 [(set_attr "type" "neon_shift_imm_narrow_q")]
6709 (define_insn "aarch64_sqrshrun_n<mode>_insn"
6710 [(set (match_operand:<DWI> 0 "register_operand" "=w")
6716 (match_operand:SD_HSDI 1 "register_operand" "w"))
6717 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6718 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6720 (const_int <half_mask>)))]
6722 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6723 "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6724 [(set_attr "type" "neon_shift_imm_narrow_q")]
6727 (define_expand "aarch64_sqrshrun_n<mode>"
6728 [(match_operand:<VNARROWQ> 0 "register_operand")
6729 (match_operand:SD_HSDI 1 "register_operand")
6730 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6733 int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
6734 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6735 rtx rnd = immed_wide_int_const (rnd_wi, <DWI>mode);
6736 rtx dst = gen_reg_rtx (<DWI>mode);
6737 emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
6738 emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6743 (define_expand "aarch64_sqrshrun_n<mode>"
6744 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6745 (truncate:<VNARROWQ>
6750 (sign_extend:<V2XWIDE>
6751 (match_operand:VQN 1 "register_operand"))
6753 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6758 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6759 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6760 operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6761 operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6762 operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6763 operands[4] = CONST0_RTX (<V2XWIDE>mode);
6765 = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
6766 operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]);
6770 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le"
6771 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6772 (vec_concat:<VNARROWQ2>
6773 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6774 (ALL_TRUNC:<VNARROWQ>
6776 (match_operand:VQN 2 "register_operand" "w")
6777 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6778 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6779 && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6780 "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6781 [(set_attr "type" "neon_shift_imm_narrow_q")]
6784 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be"
6785 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6786 (vec_concat:<VNARROWQ2>
6787 (ALL_TRUNC:<VNARROWQ>
6789 (match_operand:VQN 2 "register_operand" "w")
6790 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6791 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6792 "TARGET_SIMD && BYTES_BIG_ENDIAN
6793 && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6794 "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6795 [(set_attr "type" "neon_shift_imm_narrow_q")]
6798 (define_expand "aarch64_<shrn_op><sra_op>shrn2_n<mode>"
6799 [(match_operand:<VNARROWQ2> 0 "register_operand")
6800 (match_operand:<VNARROWQ> 1 "register_operand")
6801 (ALL_TRUNC:<VNARROWQ>
6802 (SHIFTRT:VQN (match_operand:VQN 2 "register_operand")))
6803 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6804 "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6806 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6807 INTVAL (operands[3]));
6809 if (BYTES_BIG_ENDIAN)
6810 emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be (
6811 operands[0], operands[1], operands[2], operands[3]));
6813 emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le (
6814 operands[0], operands[1], operands[2], operands[3]));
6819 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le"
6820 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6821 (vec_concat:<VNARROWQ2>
6822 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6823 (ALL_TRUNC:<VNARROWQ>
6824 (<TRUNC_SHIFT>:<V2XWIDE>
6826 (<TRUNCEXTEND>:<V2XWIDE>
6827 (match_operand:VQN 2 "register_operand" "w"))
6828 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6829 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6830 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6831 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6832 "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6833 [(set_attr "type" "neon_shift_imm_narrow_q")]
6836 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be"
6837 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6838 (vec_concat:<VNARROWQ2>
6839 (ALL_TRUNC:<VNARROWQ>
6840 (<TRUNC_SHIFT>:<V2XWIDE>
6842 (<TRUNCEXTEND>:<V2XWIDE>
6843 (match_operand:VQN 2 "register_operand" "w"))
6844 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6845 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6846 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6847 "TARGET_SIMD && BYTES_BIG_ENDIAN
6848 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6849 "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6850 [(set_attr "type" "neon_shift_imm_narrow_q")]
6853 (define_expand "aarch64_<shrn_op>rshrn2_n<mode>"
6854 [(match_operand:<VNARROWQ2> 0 "register_operand")
6855 (match_operand:<VNARROWQ> 1 "register_operand")
6856 (ALL_TRUNC:<VNARROWQ> (match_operand:VQN 2 "register_operand"))
6857 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6860 if (<CODE> == TRUNCATE
6861 && INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6863 rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
6864 emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
6868 /* Use this expander to create the rounding constant vector, which is
6869 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6870 RTL is generated when handling the DImode expanders. */
6871 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6872 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
6873 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6874 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6875 operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
6876 if (BYTES_BIG_ENDIAN)
6877 emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[0],
6883 emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[0],
6892 (define_insn "aarch64_sqshrun2_n<mode>_insn_le"
6893 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6894 (vec_concat:<VNARROWQ2>
6895 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6896 (truncate:<VNARROWQ>
6900 (match_operand:VQN 2 "register_operand" "w")
6901 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6902 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
6903 (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))))]
6904 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6905 "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6906 [(set_attr "type" "neon_shift_imm_narrow_q")]
6909 (define_insn "aarch64_sqshrun2_n<mode>_insn_be"
6910 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6911 (vec_concat:<VNARROWQ2>
6912 (truncate:<VNARROWQ>
6916 (match_operand:VQN 2 "register_operand" "w")
6917 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6918 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
6919 (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))
6920 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6921 "TARGET_SIMD && BYTES_BIG_ENDIAN"
6922 "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6923 [(set_attr "type" "neon_shift_imm_narrow_q")]
6926 (define_expand "aarch64_sqshrun2_n<mode>"
6927 [(match_operand:<VNARROWQ2> 0 "register_operand")
6928 (match_operand:<VNARROWQ> 1 "register_operand")
6929 (match_operand:VQN 2 "register_operand")
6930 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6933 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6934 INTVAL (operands[3]));
6935 rtx zeros = CONST0_RTX (<MODE>mode);
6937 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6938 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6939 if (BYTES_BIG_ENDIAN)
6940 emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[0],
6941 operands[1], operands[2], operands[3],
6944 emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[0],
6945 operands[1], operands[2], operands[3],
6951 (define_insn "aarch64_sqrshrun2_n<mode>_insn_le"
6952 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6953 (vec_concat:<VNARROWQ2>
6954 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6955 (truncate:<VNARROWQ>
6960 (sign_extend:<V2XWIDE>
6961 (match_operand:VQN 2 "register_operand" "w"))
6962 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6963 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6964 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
6965 (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))]
6966 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6967 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6968 "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6969 [(set_attr "type" "neon_shift_imm_narrow_q")]
6972 (define_insn "aarch64_sqrshrun2_n<mode>_insn_be"
6973 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6974 (vec_concat:<VNARROWQ2>
6975 (truncate:<VNARROWQ>
6980 (sign_extend:<V2XWIDE>
6981 (match_operand:VQN 2 "register_operand" "w"))
6982 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6983 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6984 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
6985 (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))
6986 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6987 "TARGET_SIMD && BYTES_BIG_ENDIAN
6988 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6989 "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6990 [(set_attr "type" "neon_shift_imm_narrow_q")]
6993 (define_expand "aarch64_sqrshrun2_n<mode>"
6994 [(match_operand:<VNARROWQ2> 0 "register_operand")
6995 (match_operand:<VNARROWQ> 1 "register_operand")
6996 (match_operand:VQN 2 "register_operand")
6997 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7000 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7001 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7002 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7003 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7004 rtx zero = CONST0_RTX (<V2XWIDE>mode);
7006 = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode,
7007 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7008 operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7009 if (BYTES_BIG_ENDIAN)
7010 emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[0],
7011 operands[1], operands[2], operands[3], rnd,
7014 emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[0],
7015 operands[1], operands[2], operands[3], rnd,
7021 ;; cm(eq|ge|gt|lt|le)
7022 ;; Note, we have constraints for Dz and Z as different expanders
7023 ;; have different ideas of what should be passed to this pattern.
7025 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7026 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7028 (COMPARISONS:<V_INT_EQUIV>
7029 (match_operand:VDQ_I 1 "register_operand")
7030 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")
7033 {@ [ cons: =0 , 1 , 2 ; attrs: type ]
7034 [ w , w , w ; neon_compare<q> ] cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7035 [ w , w , ZDz ; neon_compare_zero<q> ] cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0
7039 (define_insn_and_split "aarch64_cm<optab>di"
7040 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
7043 (match_operand:DI 1 "register_operand" "w,w,r")
7044 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
7046 (clobber (reg:CC CC_REGNUM))]
7049 "&& reload_completed"
7050 [(set (match_operand:DI 0 "register_operand")
7053 (match_operand:DI 1 "register_operand")
7054 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7057 /* If we are in the general purpose register file,
7058 we split to a sequence of comparison and store. */
7059 if (GP_REGNUM_P (REGNO (operands[0]))
7060 && GP_REGNUM_P (REGNO (operands[1])))
7062 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
7063 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7064 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7065 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7068 /* Otherwise, we expand to a similar pattern which does not
7069 clobber CC_REGNUM. */
7071 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
7074 (define_insn "*aarch64_cm<optab>di"
7075 [(set (match_operand:DI 0 "register_operand")
7078 (match_operand:DI 1 "register_operand")
7079 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7081 "TARGET_SIMD && reload_completed"
7082 {@ [ cons: =0 , 1 , 2 ; attrs: type ]
7083 [ w , w , w ; neon_compare ] cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
7084 [ w , w , ZDz ; neon_compare_zero ] cm<optab>\t%d0, %d1, #0
7090 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7091 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7093 (UCOMPARISONS:<V_INT_EQUIV>
7094 (match_operand:VDQ_I 1 "register_operand" "w")
7095 (match_operand:VDQ_I 2 "register_operand" "w")
7098 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7099 [(set_attr "type" "neon_compare<q>")]
7102 (define_insn_and_split "aarch64_cm<optab>di"
7103 [(set (match_operand:DI 0 "register_operand" "=w,r")
7106 (match_operand:DI 1 "register_operand" "w,r")
7107 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
7109 (clobber (reg:CC CC_REGNUM))]
7112 "&& reload_completed"
7113 [(set (match_operand:DI 0 "register_operand")
7116 (match_operand:DI 1 "register_operand")
7117 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7120 /* If we are in the general purpose register file,
7121 we split to a sequence of comparison and store. */
7122 if (GP_REGNUM_P (REGNO (operands[0]))
7123 && GP_REGNUM_P (REGNO (operands[1])))
7125 machine_mode mode = CCmode;
7126 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7127 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7128 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7131 /* Otherwise, we expand to a similar pattern which does not
7132 clobber CC_REGNUM. */
7134 [(set_attr "type" "neon_compare,multiple")]
7137 (define_insn "*aarch64_cm<optab>di"
7138 [(set (match_operand:DI 0 "register_operand" "=w")
7141 (match_operand:DI 1 "register_operand" "w")
7142 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
7144 "TARGET_SIMD && reload_completed"
7145 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
7146 [(set_attr "type" "neon_compare")]
7151 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
7152 ;; we don't have any insns using ne, and aarch64_vcond outputs
7153 ;; not (neg (eq (and x y) 0))
7154 ;; which is rewritten by simplify_rtx as
7155 ;; plus (eq (and x y) 0) -1.
7157 (define_insn "aarch64_cmtst<mode><vczle><vczbe>"
7158 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7162 (match_operand:VDQ_I 1 "register_operand" "w")
7163 (match_operand:VDQ_I 2 "register_operand" "w"))
7164 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
7165 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
7168 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7169 [(set_attr "type" "neon_tst<q>")]
7172 ;; One can also get a cmtsts by having to combine a
7173 ;; not (neq (eq x 0)) in which case you rewrite it to
7174 ;; a comparison against itself
7176 (define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
7177 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7180 (match_operand:VDQ_I 1 "register_operand" "w")
7181 (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
7182 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
7185 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
7186 [(set_attr "type" "neon_tst<q>")]
7189 (define_insn_and_split "aarch64_cmtstdi"
7190 [(set (match_operand:DI 0 "register_operand" "=w,r")
7194 (match_operand:DI 1 "register_operand" "w,r")
7195 (match_operand:DI 2 "register_operand" "w,r"))
7197 (clobber (reg:CC CC_REGNUM))]
7200 "&& reload_completed"
7201 [(set (match_operand:DI 0 "register_operand")
7205 (match_operand:DI 1 "register_operand")
7206 (match_operand:DI 2 "register_operand"))
7209 /* If we are in the general purpose register file,
7210 we split to a sequence of comparison and store. */
7211 if (GP_REGNUM_P (REGNO (operands[0]))
7212 && GP_REGNUM_P (REGNO (operands[1])))
7214 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
7215 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
7216 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
7217 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
7218 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7221 /* Otherwise, we expand to a similar pattern which does not
7222 clobber CC_REGNUM. */
7224 [(set_attr "type" "neon_tst,multiple")]
7227 (define_insn "*aarch64_cmtstdi<vczle><vczbe>"
7228 [(set (match_operand:DI 0 "register_operand" "=w")
7232 (match_operand:DI 1 "register_operand" "w")
7233 (match_operand:DI 2 "register_operand" "w"))
7236 "cmtst\t%d0, %d1, %d2"
7237 [(set_attr "type" "neon_tst")]
7240 ;; fcm(eq|ge|gt|le|lt)
7242 (define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
7243 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7245 (COMPARISONS:<V_INT_EQUIV>
7246 (match_operand:VHSDF_HSDF 1 "register_operand")
7247 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero")
7250 {@ [ cons: =0 , 1 , 2 ]
7251 [ w , w , w ] fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7252 [ w , w , YDz ] fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0
7254 [(set_attr "type" "neon_fp_compare_<stype><q>")]
7258 ;; Note we can also handle what would be fac(le|lt) by
7259 ;; generating fac(ge|gt).
7261 (define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
7262 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7264 (FAC_COMPARISONS:<V_INT_EQUIV>
7266 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
7268 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
7271 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7272 [(set_attr "type" "neon_fp_compare_<stype><q>")]
7277 ;; ADDP with two registers semantically concatenates them and performs
7278 ;; a pairwise addition on the result. For 128-bit input modes represent this
7279 ;; as a concatentation of the pairwise addition results of the two input
7280 ;; registers. This allow us to avoid using intermediate 256-bit modes.
7281 (define_insn "aarch64_addp<mode>_insn"
7282 [(set (match_operand:VQ_I 0 "register_operand" "=w")
7286 (match_operand:VQ_I 1 "register_operand" "w")
7287 (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))
7290 (match_operand:VQ_I 4 "vect_par_cnst_even_or_odd_half")))
7293 (match_operand:VQ_I 2 "register_operand" "w")
7298 "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7299 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7300 [(set_attr "type" "neon_reduc_add<q>")]
7303 ;; For 64-bit input modes an ADDP is represented as a concatentation
7304 ;; of the input registers into an 128-bit register which is then fed
7305 ;; into a pairwise add. That way we avoid having to create intermediate
7306 ;; 32-bit vector modes.
7307 (define_insn "aarch64_addp<mode><vczle><vczbe>_insn"
7308 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
7312 (match_operand:VD_BHSI 1 "register_operand" "w")
7313 (match_operand:VD_BHSI 2 "register_operand" "w"))
7314 (match_operand:<VDBL> 3 "vect_par_cnst_even_or_odd_half"))
7319 (match_operand:<VDBL> 4 "vect_par_cnst_even_or_odd_half"))))]
7320 "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7321 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7322 [(set_attr "type" "neon_reduc_add<q>")]
7325 ;; A common usecase of 64-bit ADDP is to have both operands come from the same
7326 ;; 128-bit vector and produce the pairwise addition results in the lower half.
7327 ;; Split into the 128-bit ADDP form and extract the low half.
7328 (define_insn_and_split "*aarch64_addp_same_reg<mode>"
7329 [(set (match_operand:<VHALF> 0 "register_operand" "=w")
7332 (match_operand:VQ_I 1 "register_operand" "w")
7333 (match_operand:VQ_I 2 "vect_par_cnst_even_or_odd_half"))
7336 (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))))]
7337 "TARGET_SIMD && !rtx_equal_p (operands[2], operands[3])"
7343 if (can_create_pseudo_p ())
7344 scratch = gen_reg_rtx (<MODE>mode);
7346 scratch = lowpart_subreg (<MODE>mode, operands[0], <VHALF>mode);
7348 emit_insn (gen_aarch64_addp<mode>_insn (scratch, operands[1], operands[1],
7349 operands[2], operands[3]));
7350 emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, scratch));
7355 (define_expand "aarch64_addp<mode>"
7356 [(match_operand:VDQ_I 0 "register_operand")
7357 (match_operand:VDQ_I 1 "register_operand")
7358 (match_operand:VDQ_I 2 "register_operand")]
7361 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant ();
7362 if (known_eq (GET_MODE_BITSIZE (<MODE>mode), 128))
7364 rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
7365 rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
7366 if (BYTES_BIG_ENDIAN)
7367 std::swap (operands[1], operands[2]);
7368 emit_insn (gen_aarch64_addp<mode>_insn (operands[0], operands[1],
7369 operands[2], par_even, par_odd));
7376 (define_expand "sqrt<mode>2"
7377 [(set (match_operand:VHSDF 0 "register_operand")
7378 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
7381 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
7385 (define_insn "*sqrt<mode>2<vczle><vczbe>"
7386 [(set (match_operand:VHSDF 0 "register_operand" "=w")
7387 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
7389 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
7390 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
7393 ;; Patterns for vector struct loads and stores.
7395 (define_insn "aarch64_simd_ld2<vstruct_elt>"
7396 [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
7397 (unspec:VSTRUCT_2Q [
7398 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
7401 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7402 [(set_attr "type" "neon_load2_2reg<q>")]
7405 (define_insn "aarch64_simd_ld2r<vstruct_elt>"
7406 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7407 (unspec:VSTRUCT_2QD [
7408 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7411 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7412 [(set_attr "type" "neon_load2_all_lanes<q>")]
7415 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7416 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7417 (unspec:VSTRUCT_2QD [
7418 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7419 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
7420 (match_operand:SI 3 "immediate_operand" "i")]
7424 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7425 INTVAL (operands[3]));
7426 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
7428 [(set_attr "type" "neon_load2_one_lane")]
7431 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7432 [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
7433 (unspec:VSTRUCT_2Q [
7434 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
7438 if (BYTES_BIG_ENDIAN)
7440 rtx tmp = gen_reg_rtx (<MODE>mode);
7441 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7442 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7443 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
7444 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7447 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
7451 (define_insn "aarch64_simd_st2<vstruct_elt>"
7452 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
7453 (unspec:VSTRUCT_2Q [
7454 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
7457 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7458 [(set_attr "type" "neon_store2_2reg<q>")]
7461 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7462 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7463 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7464 (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
7465 (match_operand:SI 2 "immediate_operand" "i")]
7469 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7470 INTVAL (operands[2]));
7471 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
7473 [(set_attr "type" "neon_store2_one_lane<q>")]
7476 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7477 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
7478 (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
7482 if (BYTES_BIG_ENDIAN)
7484 rtx tmp = gen_reg_rtx (<MODE>mode);
7485 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7486 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7487 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7488 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
7491 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
7495 (define_insn "aarch64_simd_ld3<vstruct_elt>"
7496 [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
7497 (unspec:VSTRUCT_3Q [
7498 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
7501 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7502 [(set_attr "type" "neon_load3_3reg<q>")]
7505 (define_insn "aarch64_simd_ld3r<vstruct_elt>"
7506 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7507 (unspec:VSTRUCT_3QD [
7508 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7511 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7512 [(set_attr "type" "neon_load3_all_lanes<q>")]
7515 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7516 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7517 (unspec:VSTRUCT_3QD [
7518 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7519 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
7520 (match_operand:SI 3 "immediate_operand" "i")]
7524 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7525 INTVAL (operands[3]));
7526 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
7528 [(set_attr "type" "neon_load3_one_lane")]
7531 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7532 [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
7533 (unspec:VSTRUCT_3Q [
7534 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
7538 if (BYTES_BIG_ENDIAN)
7540 rtx tmp = gen_reg_rtx (<MODE>mode);
7541 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7542 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7543 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
7544 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7547 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
7551 (define_insn "aarch64_simd_st3<vstruct_elt>"
7552 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
7553 (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
7556 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7557 [(set_attr "type" "neon_store3_3reg<q>")]
7560 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7561 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7562 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7563 (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
7564 (match_operand:SI 2 "immediate_operand" "i")]
7568 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7569 INTVAL (operands[2]));
7570 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
7572 [(set_attr "type" "neon_store3_one_lane<q>")]
7575 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7576 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
7577 (unspec:VSTRUCT_3Q [
7578 (match_operand:VSTRUCT_3Q 1 "register_operand")]
7582 if (BYTES_BIG_ENDIAN)
7584 rtx tmp = gen_reg_rtx (<MODE>mode);
7585 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7586 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7587 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7588 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
7591 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
7595 (define_insn "aarch64_simd_ld4<vstruct_elt>"
7596 [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
7597 (unspec:VSTRUCT_4Q [
7598 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
7601 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7602 [(set_attr "type" "neon_load4_4reg<q>")]
7605 (define_insn "aarch64_simd_ld4r<vstruct_elt>"
7606 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7607 (unspec:VSTRUCT_4QD [
7608 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7611 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7612 [(set_attr "type" "neon_load4_all_lanes<q>")]
7615 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7616 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7617 (unspec:VSTRUCT_4QD [
7618 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7619 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7620 (match_operand:SI 3 "immediate_operand" "i")]
7624 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7625 INTVAL (operands[3]));
7626 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7628 [(set_attr "type" "neon_load4_one_lane")]
7631 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7632 [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7633 (unspec:VSTRUCT_4Q [
7634 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7638 if (BYTES_BIG_ENDIAN)
7640 rtx tmp = gen_reg_rtx (<MODE>mode);
7641 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7642 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7643 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7644 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7647 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7651 (define_insn "aarch64_simd_st4<vstruct_elt>"
7652 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7653 (unspec:VSTRUCT_4Q [
7654 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7657 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7658 [(set_attr "type" "neon_store4_4reg<q>")]
7661 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7662 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7663 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7664 (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7665 (match_operand:SI 2 "immediate_operand" "i")]
7669 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7670 INTVAL (operands[2]));
7671 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7673 [(set_attr "type" "neon_store4_one_lane<q>")]
7676 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7677 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7678 (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7682 if (BYTES_BIG_ENDIAN)
7684 rtx tmp = gen_reg_rtx (<MODE>mode);
7685 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7686 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7687 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7688 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7691 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7695 ;; Patterns for rcpc3 vector lane loads and stores.
7697 (define_insn "aarch64_vec_stl1_lanes<mode>_lane<Vel>"
7698 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Q")
7699 (unspec:BLK [(match_operand:V12DIF 1 "register_operand" "w")
7700 (match_operand:SI 2 "immediate_operand" "i")]
7704 operands[2] = aarch64_endian_lane_rtx (<MODE>mode,
7705 INTVAL (operands[2]));
7706 return "stl1\\t{%S1.<Vetype>}[%2], %0";
7708 [(set_attr "type" "neon_store2_one_lane")]
7711 (define_expand "aarch64_vec_stl1_lane<mode>"
7712 [(match_operand:DI 0 "register_operand")
7713 (match_operand:V12DIF 1 "register_operand")
7714 (match_operand:SI 2 "immediate_operand")]
7717 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7718 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7720 aarch64_simd_lane_bounds (operands[2], 0,
7721 GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7722 emit_insn (gen_aarch64_vec_stl1_lanes<mode>_lane<Vel> (mem,
7723 operands[1], operands[2]));
7727 (define_insn "aarch64_vec_ldap1_lanes<mode>_lane<Vel>"
7728 [(set (match_operand:V12DIF 0 "register_operand" "=w")
7730 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Q")
7731 (match_operand:V12DIF 2 "register_operand" "0")
7732 (match_operand:SI 3 "immediate_operand" "i")]
7733 UNSPEC_LDAP1_LANE))]
7736 operands[3] = aarch64_endian_lane_rtx (<MODE>mode,
7737 INTVAL (operands[3]));
7738 return "ldap1\\t{%S0.<Vetype>}[%3], %1";
7740 [(set_attr "type" "neon_load2_one_lane")]
7743 (define_expand "aarch64_vec_ldap1_lane<mode>"
7744 [(match_operand:V12DIF 0 "register_operand")
7745 (match_operand:DI 1 "register_operand")
7746 (match_operand:V12DIF 2 "register_operand")
7747 (match_operand:SI 3 "immediate_operand")]
7750 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7751 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7753 aarch64_simd_lane_bounds (operands[3], 0,
7754 GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7755 emit_insn (gen_aarch64_vec_ldap1_lanes<mode>_lane<Vel> (operands[0],
7756 mem, operands[2], operands[3]));
7760 (define_insn_and_split "aarch64_rev_reglist<mode>"
7761 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7763 [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7764 (match_operand:V16QI 2 "register_operand" "w")]
7765 UNSPEC_REV_REGLIST))]
7768 "&& reload_completed"
7772 int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7773 for (i = 0; i < nregs; i++)
7775 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7776 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7777 emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7781 [(set_attr "type" "neon_tbl1_q")
7782 (set_attr "length" "<insn_count>")]
7785 ;; Reload patterns for AdvSIMD register list operands.
7787 (define_expand "mov<mode>"
7788 [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7789 (match_operand:VSTRUCT_QD 1 "general_operand"))]
7792 if (can_create_pseudo_p ())
7794 if (GET_CODE (operands[0]) != REG)
7795 operands[1] = force_reg (<MODE>mode, operands[1]);
7799 (define_expand "mov<mode>"
7800 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7801 (match_operand:VSTRUCT 1 "general_operand"))]
7804 if (can_create_pseudo_p ())
7806 if (GET_CODE (operands[0]) != REG)
7807 operands[1] = force_reg (<MODE>mode, operands[1]);
7811 (define_expand "movv8di"
7812 [(set (match_operand:V8DI 0 "nonimmediate_operand")
7813 (match_operand:V8DI 1 "general_operand"))]
7816 if (can_create_pseudo_p () && MEM_P (operands[0]))
7817 operands[1] = force_reg (V8DImode, operands[1]);
7820 (define_expand "aarch64_ld1x3<vstruct_elt>"
7821 [(match_operand:VSTRUCT_3QD 0 "register_operand")
7822 (match_operand:DI 1 "register_operand")]
7825 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7826 emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
7830 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
7831 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7833 [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
7836 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7837 [(set_attr "type" "neon_load1_3reg<q>")]
7840 (define_expand "aarch64_ld1x4<vstruct_elt>"
7841 [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7842 (match_operand:DI 1 "register_operand" "r")]
7845 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7846 emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
7850 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
7851 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7853 [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
7856 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7857 [(set_attr "type" "neon_load1_4reg<q>")]
7860 (define_expand "aarch64_st1x2<vstruct_elt>"
7861 [(match_operand:DI 0 "register_operand")
7862 (match_operand:VSTRUCT_2QD 1 "register_operand")]
7865 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7866 emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
7870 (define_insn "aarch64_st1_x2_<vstruct_elt>"
7871 [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
7873 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
7876 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7877 [(set_attr "type" "neon_store1_2reg<q>")]
7880 (define_expand "aarch64_st1x3<vstruct_elt>"
7881 [(match_operand:DI 0 "register_operand")
7882 (match_operand:VSTRUCT_3QD 1 "register_operand")]
7885 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7886 emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
7890 (define_insn "aarch64_st1_x3_<vstruct_elt>"
7891 [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
7893 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
7896 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7897 [(set_attr "type" "neon_store1_3reg<q>")]
7900 (define_expand "aarch64_st1x4<vstruct_elt>"
7901 [(match_operand:DI 0 "register_operand" "")
7902 (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
7905 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7906 emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
7910 (define_insn "aarch64_st1_x4_<vstruct_elt>"
7911 [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
7913 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
7916 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7917 [(set_attr "type" "neon_store1_4reg<q>")]
7920 (define_insn "*aarch64_mov<mode>"
7921 [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand")
7922 (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand"))]
7923 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7924 && (register_operand (operands[0], <MODE>mode)
7925 || register_operand (operands[1], <MODE>mode))"
7926 {@ [ cons: =0 , 1 ; attrs: type , length ]
7927 [ w , w ; multiple , <insn_count> ] #
7928 [ Utv , w ; neon_store<nregs>_<nregs>reg_q , 4 ] st1\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
7929 [ w , Utv ; neon_load<nregs>_<nregs>reg_q , 4 ] ld1\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1
7933 (define_insn "*aarch64_mov<mode>"
7934 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand")
7935 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand"))]
7936 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7937 && (register_operand (operands[0], <MODE>mode)
7938 || register_operand (operands[1], <MODE>mode))"
7939 {@ [ cons: =0 , 1 ; attrs: type , length ]
7940 [ w , w ; multiple , <insn_count> ] #
7941 [ Utv , w ; neon_store<nregs>_<nregs>reg_q , 4 ] st1\t{%S1.16b - %<Vendreg>1.16b}, %0
7942 [ w , Utv ; neon_load<nregs>_<nregs>reg_q , 4 ] ld1\t{%S0.16b - %<Vendreg>0.16b}, %1
7946 (define_insn "*aarch64_movv8di"
7947 [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
7948 (match_operand:V8DI 1 "general_operand" " r,r,m"))]
7949 "(register_operand (operands[0], V8DImode)
7950 || register_operand (operands[1], V8DImode))"
7952 [(set_attr "type" "multiple,multiple,multiple")
7953 (set_attr "length" "32,16,16")]
7956 (define_insn "aarch64_be_ld1<mode>"
7957 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
7958 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
7959 "aarch64_simd_struct_operand" "Utv")]
7962 "ld1\\t{%0<Vmtype>}, %1"
7963 [(set_attr "type" "neon_load1_1reg<q>")]
7966 (define_insn "aarch64_be_st1<mode>"
7967 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
7968 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
7971 "st1\\t{%1<Vmtype>}, %0"
7972 [(set_attr "type" "neon_store1_1reg<q>")]
7975 (define_insn "*aarch64_be_mov<mode>"
7976 [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
7977 (match_operand:VSTRUCT_2D 1 "general_operand"))]
7979 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7980 && (register_operand (operands[0], <MODE>mode)
7981 || register_operand (operands[1], <MODE>mode))"
7982 {@ [ cons: =0 , 1 ; attrs: type , length ]
7983 [ w , w ; multiple , 8 ] #
7984 [ m , w ; neon_stp , 4 ] stp\t%d1, %R1, %0
7985 [ w , m ; neon_ldp , 4 ] ldp\t%d0, %R0, %1
7989 (define_insn "*aarch64_be_mov<mode>"
7990 [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
7991 (match_operand:VSTRUCT_2Q 1 "general_operand"))]
7993 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
7994 && (register_operand (operands[0], <MODE>mode)
7995 || register_operand (operands[1], <MODE>mode))"
7996 {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
7997 [ w , w ; multiple , simd , 8 ] #
7998 [ m , w ; neon_stp_q , * , 4 ] stp\t%q1, %R1, %0
7999 [ w , m ; neon_ldp_q , * , 4 ] ldp\t%q0, %R0, %1
8003 (define_insn "*aarch64_be_movoi"
8004 [(set (match_operand:OI 0 "nonimmediate_operand")
8005 (match_operand:OI 1 "general_operand"))]
8007 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8008 && (register_operand (operands[0], OImode)
8009 || register_operand (operands[1], OImode))"
8010 {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
8011 [ w , w ; multiple , simd , 8 ] #
8012 [ m , w ; neon_stp_q , * , 4 ] stp\t%q1, %R1, %0
8013 [ w , m ; neon_ldp_q , * , 4 ] ldp\t%q0, %R0, %1
8017 (define_insn "*aarch64_be_mov<mode>"
8018 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
8019 (match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))]
8021 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8022 && (register_operand (operands[0], <MODE>mode)
8023 || register_operand (operands[1], <MODE>mode))"
8025 [(set_attr "type" "multiple")
8026 (set_attr "arch" "fp<q>,*,*")
8027 (set_attr "length" "12,8,8")]
8030 (define_insn "*aarch64_be_movci"
8031 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
8032 (match_operand:CI 1 "general_operand" " w,w,o"))]
8034 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8035 && (register_operand (operands[0], CImode)
8036 || register_operand (operands[1], CImode))"
8038 [(set_attr "type" "multiple")
8039 (set_attr "arch" "simd,*,*")
8040 (set_attr "length" "12,8,8")]
8043 (define_insn "*aarch64_be_mov<mode>"
8044 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
8045 (match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))]
8047 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8048 && (register_operand (operands[0], <MODE>mode)
8049 || register_operand (operands[1], <MODE>mode))"
8051 [(set_attr "type" "multiple")
8052 (set_attr "arch" "fp<q>,*,*")
8053 (set_attr "length" "16,8,8")]
8056 (define_insn "*aarch64_be_movxi"
8057 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
8058 (match_operand:XI 1 "general_operand" " w,w,o"))]
8060 && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8061 && (register_operand (operands[0], XImode)
8062 || register_operand (operands[1], XImode))"
8064 [(set_attr "type" "multiple")
8065 (set_attr "arch" "simd,*,*")
8066 (set_attr "length" "16,8,8")]
8070 [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
8071 (match_operand:VSTRUCT_2QD 1 "register_operand"))]
8072 "TARGET_FLOAT && reload_completed"
8075 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
8080 [(set (match_operand:OI 0 "register_operand")
8081 (match_operand:OI 1 "register_operand"))]
8082 "TARGET_FLOAT && reload_completed"
8085 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
8090 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
8091 (match_operand:VSTRUCT_3QD 1 "general_operand"))]
8092 "TARGET_FLOAT && reload_completed"
8095 if (register_operand (operands[0], <MODE>mode)
8096 && register_operand (operands[1], <MODE>mode))
8098 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
8101 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8103 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8104 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8105 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8107 simplify_gen_subreg (pair_mode, operands[1],
8109 emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
8110 simplify_gen_subreg (<VSTRUCT_ELT>mode,
8114 gen_lowpart (<VSTRUCT_ELT>mode,
8115 simplify_gen_subreg (<VSTRUCT_ELT>mode,
8126 [(set (match_operand:CI 0 "nonimmediate_operand")
8127 (match_operand:CI 1 "general_operand"))]
8128 "TARGET_FLOAT && reload_completed"
8131 if (register_operand (operands[0], CImode)
8132 && register_operand (operands[1], CImode))
8134 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
8137 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8139 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
8140 simplify_gen_subreg (OImode, operands[1], CImode, 0));
8141 emit_move_insn (gen_lowpart (V16QImode,
8142 simplify_gen_subreg (TImode, operands[0],
8144 gen_lowpart (V16QImode,
8145 simplify_gen_subreg (TImode, operands[1],
8154 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
8155 (match_operand:VSTRUCT_4QD 1 "general_operand"))]
8156 "TARGET_FLOAT && reload_completed"
8159 if (register_operand (operands[0], <MODE>mode)
8160 && register_operand (operands[1], <MODE>mode))
8162 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
8165 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8167 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8168 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8169 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8171 simplify_gen_subreg (pair_mode, operands[1],
8173 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8174 <MODE>mode, 2 * elt_size),
8175 simplify_gen_subreg (pair_mode, operands[1],
8176 <MODE>mode, 2 * elt_size));
8184 [(set (match_operand:XI 0 "nonimmediate_operand")
8185 (match_operand:XI 1 "general_operand"))]
8186 "TARGET_FLOAT && reload_completed"
8189 if (register_operand (operands[0], XImode)
8190 && register_operand (operands[1], XImode))
8192 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
8195 else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
8197 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
8198 simplify_gen_subreg (OImode, operands[1], XImode, 0));
8199 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
8200 simplify_gen_subreg (OImode, operands[1], XImode, 32));
8208 [(set (match_operand:V8DI 0 "nonimmediate_operand")
8209 (match_operand:V8DI 1 "general_operand"))]
8213 if (register_operand (operands[0], V8DImode)
8214 && register_operand (operands[1], V8DImode))
8216 aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
8219 else if ((register_operand (operands[0], V8DImode)
8220 && memory_operand (operands[1], V8DImode))
8221 || (memory_operand (operands[0], V8DImode)
8222 && register_operand (operands[1], V8DImode)))
8224 /* V8DI only guarantees 8-byte alignment, whereas TImode requires 16. */
8225 auto mode = STRICT_ALIGNMENT ? DImode : TImode;
8226 int increment = GET_MODE_SIZE (mode);
8227 std::pair<rtx, rtx> last_pair = {};
8228 for (int offset = 0; offset < 64; offset += increment)
8230 std::pair<rtx, rtx> pair = {
8231 simplify_gen_subreg (mode, operands[0], V8DImode, offset),
8232 simplify_gen_subreg (mode, operands[1], V8DImode, offset)
8234 if (register_operand (pair.first, mode)
8235 && reg_overlap_mentioned_p (pair.first, pair.second))
8238 emit_move_insn (pair.first, pair.second);
8240 if (last_pair.first)
8241 emit_move_insn (last_pair.first, last_pair.second);
8248 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
8249 [(match_operand:VSTRUCT_QD 0 "register_operand")
8250 (match_operand:DI 1 "register_operand")]
8253 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8254 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8256 emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
8260 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8261 [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
8262 (unspec:VSTRUCT_2DNX [
8263 (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
8266 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8267 [(set_attr "type" "neon_load2_2reg<q>")]
8270 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8271 [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
8272 (unspec:VSTRUCT_2DX [
8273 (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
8276 "ld1\\t{%S0.1d - %T0.1d}, %1"
8277 [(set_attr "type" "neon_load1_2reg<q>")]
8280 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8281 [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
8282 (unspec:VSTRUCT_3DNX [
8283 (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
8286 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8287 [(set_attr "type" "neon_load3_3reg<q>")]
8290 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8291 [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
8292 (unspec:VSTRUCT_3DX [
8293 (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
8296 "ld1\\t{%S0.1d - %U0.1d}, %1"
8297 [(set_attr "type" "neon_load1_3reg<q>")]
8300 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8301 [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
8302 (unspec:VSTRUCT_4DNX [
8303 (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
8306 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8307 [(set_attr "type" "neon_load4_4reg<q>")]
8310 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8311 [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
8312 (unspec:VSTRUCT_4DX [
8313 (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
8316 "ld1\\t{%S0.1d - %V0.1d}, %1"
8317 [(set_attr "type" "neon_load1_4reg<q>")]
8320 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8321 [(match_operand:VSTRUCT_D 0 "register_operand")
8322 (match_operand:DI 1 "register_operand")]
8325 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8326 emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
8330 (define_expand "aarch64_ld1<VALL_F16:mode>"
8331 [(match_operand:VALL_F16 0 "register_operand")
8332 (match_operand:DI 1 "register_operand")]
8335 machine_mode mode = <VALL_F16:MODE>mode;
8336 rtx mem = gen_rtx_MEM (mode, operands[1]);
8338 if (BYTES_BIG_ENDIAN)
8339 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
8341 emit_move_insn (operands[0], mem);
8345 (define_expand "aarch64_ld<nregs><vstruct_elt>"
8346 [(match_operand:VSTRUCT_Q 0 "register_operand")
8347 (match_operand:DI 1 "register_operand")]
8350 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8351 emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
8355 (define_expand "aarch64_ld1x2<vstruct_elt>"
8356 [(match_operand:VSTRUCT_2QD 0 "register_operand")
8357 (match_operand:DI 1 "register_operand")]
8360 machine_mode mode = <MODE>mode;
8361 rtx mem = gen_rtx_MEM (mode, operands[1]);
8363 emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
8367 (define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
8368 [(match_operand:VSTRUCT_QD 0 "register_operand")
8369 (match_operand:DI 1 "register_operand")
8370 (match_operand:VSTRUCT_QD 2 "register_operand")
8371 (match_operand:SI 3 "immediate_operand")]
8374 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8375 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8377 aarch64_simd_lane_bounds (operands[3], 0,
8378 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8379 emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
8380 mem, operands[2], operands[3]));
8384 ;; Permuted-store expanders for neon intrinsics.
8386 ;; Permute instructions
8390 (define_expand "vec_perm<mode>"
8391 [(match_operand:VB 0 "register_operand")
8392 (match_operand:VB 1 "register_operand")
8393 (match_operand:VB 2 "register_operand")
8394 (match_operand:VB 3 "register_operand")]
8397 aarch64_expand_vec_perm (operands[0], operands[1],
8398 operands[2], operands[3], <nunits>);
8402 (define_insn "aarch64_qtbl1<mode>"
8403 [(set (match_operand:VB 0 "register_operand" "=w")
8404 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
8405 (match_operand:VB 2 "register_operand" "w")]
8408 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
8409 [(set_attr "type" "neon_tbl1<q>")]
8412 (define_insn "aarch64_qtbx1<mode>"
8413 [(set (match_operand:VB 0 "register_operand" "=w")
8414 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8415 (match_operand:V16QI 2 "register_operand" "w")
8416 (match_operand:VB 3 "register_operand" "w")]
8419 "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
8420 [(set_attr "type" "neon_tbl1<q>")]
8423 ;; Two source registers.
8425 (define_insn "aarch64_qtbl2<mode>"
8426 [(set (match_operand:VB 0 "register_operand" "=w")
8427 (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
8428 (match_operand:VB 2 "register_operand" "w")]
8431 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
8432 [(set_attr "type" "neon_tbl2")]
8435 (define_insn "aarch64_qtbx2<mode>"
8436 [(set (match_operand:VB 0 "register_operand" "=w")
8437 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8438 (match_operand:V2x16QI 2 "register_operand" "w")
8439 (match_operand:VB 3 "register_operand" "w")]
8442 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
8443 [(set_attr "type" "neon_tbl2")]
8446 ;; Three source registers.
8448 (define_insn "aarch64_qtbl3<mode>"
8449 [(set (match_operand:VB 0 "register_operand" "=w")
8450 (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
8451 (match_operand:VB 2 "register_operand" "w")]
8454 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
8455 [(set_attr "type" "neon_tbl3")]
8458 (define_insn "aarch64_qtbx3<mode>"
8459 [(set (match_operand:VB 0 "register_operand" "=w")
8460 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8461 (match_operand:V3x16QI 2 "register_operand" "w")
8462 (match_operand:VB 3 "register_operand" "w")]
8465 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
8466 [(set_attr "type" "neon_tbl3")]
8469 ;; Four source registers.
8471 (define_insn "aarch64_qtbl4<mode>"
8472 [(set (match_operand:VB 0 "register_operand" "=w")
8473 (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
8474 (match_operand:VB 2 "register_operand" "w")]
8477 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
8478 [(set_attr "type" "neon_tbl4")]
8481 (define_insn "aarch64_qtbx4<mode>"
8482 [(set (match_operand:VB 0 "register_operand" "=w")
8483 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8484 (match_operand:V4x16QI 2 "register_operand" "w")
8485 (match_operand:VB 3 "register_operand" "w")]
8488 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
8489 [(set_attr "type" "neon_tbl4")]
8492 (define_insn_and_split "aarch64_combinev16qi"
8493 [(set (match_operand:V2x16QI 0 "register_operand" "=w")
8494 (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
8495 (match_operand:V16QI 2 "register_operand" "w")]
8499 "&& reload_completed"
8502 aarch64_split_combinev16qi (operands);
8505 [(set_attr "type" "multiple")]
8508 ;; This instruction's pattern is generated directly by
8509 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8510 ;; need corresponding changes there.
8511 (define_insn "aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
8512 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8513 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8514 (match_operand:VALL_F16 2 "register_operand" "w")]
8517 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8518 [(set_attr "type" "neon_permute<q>")]
8521 ;; ZIP1 ignores the contents of the upper halves of the registers,
8522 ;; so we can describe 128-bit operations in terms of 64-bit inputs.
8523 (define_insn "aarch64_zip1<mode>_low"
8524 [(set (match_operand:VQ 0 "register_operand" "=w")
8525 (unspec:VQ [(match_operand:<VHALF> 1 "register_operand" "w")
8526 (match_operand:<VHALF> 2 "register_operand" "w")]
8529 "zip1\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8530 [(set_attr "type" "neon_permute_q")]
8533 ;; This instruction's pattern is generated directly by
8534 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8535 ;; need corresponding changes there. Note that the immediate (third)
8536 ;; operand is a lane index not a byte index.
8537 (define_insn "aarch64_ext<mode>"
8538 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8539 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8540 (match_operand:VALL_F16 2 "register_operand" "w")
8541 (match_operand:SI 3 "immediate_operand" "i")]
8545 operands[3] = GEN_INT (INTVAL (operands[3])
8546 * GET_MODE_UNIT_SIZE (<MODE>mode));
8547 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
8549 [(set_attr "type" "neon_ext<q>")]
8552 ;; This instruction's pattern is generated directly by
8553 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8554 ;; need corresponding changes there.
8555 (define_insn "aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
8556 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8557 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
8560 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
8561 [(set_attr "type" "neon_rev<q>")]
8564 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8565 [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
8566 (unspec:VSTRUCT_2DNX [
8567 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
8570 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8571 [(set_attr "type" "neon_store2_2reg")]
8574 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8575 [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
8576 (unspec:VSTRUCT_2DX [
8577 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
8580 "st1\\t{%S1.1d - %T1.1d}, %0"
8581 [(set_attr "type" "neon_store1_2reg")]
8584 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8585 [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
8586 (unspec:VSTRUCT_3DNX [
8587 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
8590 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8591 [(set_attr "type" "neon_store3_3reg")]
8594 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8595 [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
8596 (unspec:VSTRUCT_3DX [
8597 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
8600 "st1\\t{%S1.1d - %U1.1d}, %0"
8601 [(set_attr "type" "neon_store1_3reg")]
8604 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8605 [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
8606 (unspec:VSTRUCT_4DNX [
8607 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
8610 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8611 [(set_attr "type" "neon_store4_4reg")]
8614 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8615 [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
8616 (unspec:VSTRUCT_4DX [
8617 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
8620 "st1\\t{%S1.1d - %V1.1d}, %0"
8621 [(set_attr "type" "neon_store1_4reg")]
8624 (define_expand "aarch64_st<nregs><vstruct_elt>"
8625 [(match_operand:DI 0 "register_operand")
8626 (match_operand:VSTRUCT_D 1 "register_operand")]
8629 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8630 emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
8634 (define_expand "aarch64_st<nregs><vstruct_elt>"
8635 [(match_operand:DI 0 "register_operand")
8636 (match_operand:VSTRUCT_Q 1 "register_operand")]
8639 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8640 emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
8644 (define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
8645 [(match_operand:DI 0 "register_operand")
8646 (match_operand:VSTRUCT_QD 1 "register_operand")
8647 (match_operand:SI 2 "immediate_operand")]
8650 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
8651 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8653 aarch64_simd_lane_bounds (operands[2], 0,
8654 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8655 emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
8656 operands[1], operands[2]));
8660 (define_expand "aarch64_st1<VALL_F16:mode>"
8661 [(match_operand:DI 0 "register_operand")
8662 (match_operand:VALL_F16 1 "register_operand")]
8665 machine_mode mode = <VALL_F16:MODE>mode;
8666 rtx mem = gen_rtx_MEM (mode, operands[0]);
8668 if (BYTES_BIG_ENDIAN)
8669 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
8671 emit_move_insn (mem, operands[1]);
8675 ;; Standard pattern name vec_init<mode><Vel>.
8677 (define_expand "vec_init<mode><Vel>"
8678 [(match_operand:VALL_F16 0 "register_operand")
8679 (match_operand 1 "" "")]
8682 aarch64_expand_vector_init (operands[0], operands[1]);
8686 (define_expand "vec_init<mode><Vhalf>"
8687 [(match_operand:VQ_NO2E 0 "register_operand")
8688 (match_operand 1 "" "")]
8691 aarch64_expand_vector_init (operands[0], operands[1]);
8695 (define_insn "*aarch64_simd_ld1r<mode>"
8696 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8697 (vec_duplicate:VALL_F16
8698 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
8700 "ld1r\\t{%0.<Vtype>}, %1"
8701 [(set_attr "type" "neon_load1_all_lanes")]
8704 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
8705 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
8706 (unspec:VSTRUCT_2QD [
8707 (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
8710 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8711 [(set_attr "type" "neon_load1_2reg<q>")]
8715 (define_insn "@aarch64_frecpe<mode>"
8716 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8718 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8721 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8722 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8725 (define_insn "aarch64_frecpx<mode>"
8726 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8727 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8730 "frecpx\t%<s>0, %<s>1"
8731 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8734 (define_insn "@aarch64_frecps<mode>"
8735 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8737 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8738 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8741 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8742 [(set_attr "type" "neon_fp_recps_<stype><q>")]
8745 (define_insn "aarch64_urecpe<mode>"
8746 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8747 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8750 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8751 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8753 ;; Standard pattern name vec_extract<mode><Vel>.
8755 (define_expand "vec_extract<mode><Vel>"
8756 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8757 (match_operand:VALL_F16 1 "register_operand")
8758 (match_operand:SI 2 "immediate_operand")]
8762 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8766 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8767 (define_expand "vec_extract<mode><Vhalf>"
8768 [(match_operand:<VHALF> 0 "register_operand")
8769 (match_operand:VQMOV_NO2E 1 "register_operand")
8770 (match_operand 2 "immediate_operand")]
8773 int start = INTVAL (operands[2]);
8774 gcc_assert (start == 0 || start == 1);
8775 start *= <nunits> / 2;
8776 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8777 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8781 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8782 (define_expand "vec_extract<mode><V1half>"
8783 [(match_operand:<V1HALF> 0 "register_operand")
8784 (match_operand:VQ_2E 1 "register_operand")
8785 (match_operand 2 "immediate_operand")]
8788 /* V1DI and V1DF are rarely used by other patterns, so it should be better
8789 to hide it in a subreg destination of a normal DI or DF op. */
8790 rtx scalar0 = gen_lowpart (<VHALF>mode, operands[0]);
8791 emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands[1], operands[2]));
8797 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8798 [(set (match_operand:V16QI 0 "register_operand" "=w")
8801 (match_operand:V16QI 1 "register_operand" "%0")
8802 (match_operand:V16QI 2 "register_operand" "w"))]
8805 "aes<aes_op>\\t%0.16b, %2.16b"
8806 [(set_attr "type" "crypto_aese")]
8809 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8810 [(set (match_operand:V16QI 0 "register_operand" "=w")
8811 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8814 "aes<aesmc_op>\\t%0.16b, %1.16b"
8815 [(set_attr "type" "crypto_aesmc")]
8818 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8819 ;; and enforce the register dependency without scheduling or register
8820 ;; allocation messing up the order or introducing moves inbetween.
8821 ;; Mash the two together during combine.
8823 (define_insn "*aarch64_crypto_aese_fused"
8824 [(set (match_operand:V16QI 0 "register_operand" "=w")
8828 (match_operand:V16QI 1 "register_operand" "%0")
8829 (match_operand:V16QI 2 "register_operand" "w"))]
8833 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8834 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8835 [(set_attr "type" "crypto_aese")
8836 (set_attr "length" "8")]
8839 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8840 ;; and enforce the register dependency without scheduling or register
8841 ;; allocation messing up the order or introducing moves inbetween.
8842 ;; Mash the two together during combine.
8844 (define_insn "*aarch64_crypto_aesd_fused"
8845 [(set (match_operand:V16QI 0 "register_operand" "=w")
8849 (match_operand:V16QI 1 "register_operand" "%0")
8850 (match_operand:V16QI 2 "register_operand" "w"))]
8854 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8855 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8856 [(set_attr "type" "crypto_aese")
8857 (set_attr "length" "8")]
8862 (define_insn "aarch64_crypto_sha1hsi"
8863 [(set (match_operand:SI 0 "register_operand" "=w")
8864 (unspec:SI [(match_operand:SI 1
8865 "register_operand" "w")]
8869 [(set_attr "type" "crypto_sha1_fast")]
8872 (define_insn "aarch64_crypto_sha1hv4si"
8873 [(set (match_operand:SI 0 "register_operand" "=w")
8874 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8875 (parallel [(const_int 0)]))]
8877 "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8879 [(set_attr "type" "crypto_sha1_fast")]
8882 (define_insn "aarch64_be_crypto_sha1hv4si"
8883 [(set (match_operand:SI 0 "register_operand" "=w")
8884 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8885 (parallel [(const_int 3)]))]
8887 "TARGET_SHA2 && BYTES_BIG_ENDIAN"
8889 [(set_attr "type" "crypto_sha1_fast")]
8892 (define_insn "aarch64_crypto_sha1su1v4si"
8893 [(set (match_operand:V4SI 0 "register_operand" "=w")
8894 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8895 (match_operand:V4SI 2 "register_operand" "w")]
8898 "sha1su1\\t%0.4s, %2.4s"
8899 [(set_attr "type" "crypto_sha1_fast")]
8902 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
8903 [(set (match_operand:V4SI 0 "register_operand" "=w")
8904 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8905 (match_operand:SI 2 "register_operand" "w")
8906 (match_operand:V4SI 3 "register_operand" "w")]
8909 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
8910 [(set_attr "type" "crypto_sha1_slow")]
8913 (define_insn "aarch64_crypto_sha1su0v4si"
8914 [(set (match_operand:V4SI 0 "register_operand" "=w")
8915 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8916 (match_operand:V4SI 2 "register_operand" "w")
8917 (match_operand:V4SI 3 "register_operand" "w")]
8920 "sha1su0\\t%0.4s, %2.4s, %3.4s"
8921 [(set_attr "type" "crypto_sha1_xor")]
8926 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
8927 [(set (match_operand:V4SI 0 "register_operand" "=w")
8928 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8929 (match_operand:V4SI 2 "register_operand" "w")
8930 (match_operand:V4SI 3 "register_operand" "w")]
8933 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
8934 [(set_attr "type" "crypto_sha256_slow")]
8937 (define_insn "aarch64_crypto_sha256su0v4si"
8938 [(set (match_operand:V4SI 0 "register_operand" "=w")
8939 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8940 (match_operand:V4SI 2 "register_operand" "w")]
8943 "sha256su0\\t%0.4s, %2.4s"
8944 [(set_attr "type" "crypto_sha256_fast")]
8947 (define_insn "aarch64_crypto_sha256su1v4si"
8948 [(set (match_operand:V4SI 0 "register_operand" "=w")
8949 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8950 (match_operand:V4SI 2 "register_operand" "w")
8951 (match_operand:V4SI 3 "register_operand" "w")]
8954 "sha256su1\\t%0.4s, %2.4s, %3.4s"
8955 [(set_attr "type" "crypto_sha256_slow")]
8960 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
8961 [(set (match_operand:V2DI 0 "register_operand" "=w")
8962 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8963 (match_operand:V2DI 2 "register_operand" "w")
8964 (match_operand:V2DI 3 "register_operand" "w")]
8967 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
8968 [(set_attr "type" "crypto_sha512")]
8971 (define_insn "aarch64_crypto_sha512su0qv2di"
8972 [(set (match_operand:V2DI 0 "register_operand" "=w")
8973 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8974 (match_operand:V2DI 2 "register_operand" "w")]
8977 "sha512su0\\t%0.2d, %2.2d"
8978 [(set_attr "type" "crypto_sha512")]
8981 (define_insn "aarch64_crypto_sha512su1qv2di"
8982 [(set (match_operand:V2DI 0 "register_operand" "=w")
8983 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8984 (match_operand:V2DI 2 "register_operand" "w")
8985 (match_operand:V2DI 3 "register_operand" "w")]
8988 "sha512su1\\t%0.2d, %2.2d, %3.2d"
8989 [(set_attr "type" "crypto_sha512")]
8994 (define_insn "eor3q<mode>4"
8995 [(set (match_operand:VQ_I 0 "register_operand" "=w")
8998 (match_operand:VQ_I 2 "register_operand" "w")
8999 (match_operand:VQ_I 3 "register_operand" "w"))
9000 (match_operand:VQ_I 1 "register_operand" "w")))]
9002 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
9003 [(set_attr "type" "crypto_sha3")]
9006 (define_insn "aarch64_rax1qv2di"
9007 [(set (match_operand:V2DI 0 "register_operand" "=w")
9010 (match_operand:V2DI 2 "register_operand" "w")
9012 (match_operand:V2DI 1 "register_operand" "w")))]
9014 "rax1\\t%0.2d, %1.2d, %2.2d"
9015 [(set_attr "type" "crypto_sha3")]
9018 (define_insn "aarch64_xarqv2di"
9019 [(set (match_operand:V2DI 0 "register_operand" "=w")
9022 (match_operand:V2DI 1 "register_operand" "%w")
9023 (match_operand:V2DI 2 "register_operand" "w"))
9024 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
9026 "xar\\t%0.2d, %1.2d, %2.2d, %3"
9027 [(set_attr "type" "crypto_sha3")]
9030 (define_insn "bcaxq<mode>4"
9031 [(set (match_operand:VQ_I 0 "register_operand" "=w")
9034 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
9035 (match_operand:VQ_I 2 "register_operand" "w"))
9036 (match_operand:VQ_I 1 "register_operand" "w")))]
9038 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
9039 [(set_attr "type" "crypto_sha3")]
9044 (define_insn "aarch64_sm3ss1qv4si"
9045 [(set (match_operand:V4SI 0 "register_operand" "=w")
9046 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9047 (match_operand:V4SI 2 "register_operand" "w")
9048 (match_operand:V4SI 3 "register_operand" "w")]
9051 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
9052 [(set_attr "type" "crypto_sm3")]
9056 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
9057 [(set (match_operand:V4SI 0 "register_operand" "=w")
9058 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9059 (match_operand:V4SI 2 "register_operand" "w")
9060 (match_operand:V4SI 3 "register_operand" "w")
9061 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
9064 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
9065 [(set_attr "type" "crypto_sm3")]
9068 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
9069 [(set (match_operand:V4SI 0 "register_operand" "=w")
9070 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9071 (match_operand:V4SI 2 "register_operand" "w")
9072 (match_operand:V4SI 3 "register_operand" "w")]
9075 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
9076 [(set_attr "type" "crypto_sm3")]
9081 (define_insn "aarch64_sm4eqv4si"
9082 [(set (match_operand:V4SI 0 "register_operand" "=w")
9083 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9084 (match_operand:V4SI 2 "register_operand" "w")]
9087 "sm4e\\t%0.4s, %2.4s"
9088 [(set_attr "type" "crypto_sm4")]
9091 (define_insn "aarch64_sm4ekeyqv4si"
9092 [(set (match_operand:V4SI 0 "register_operand" "=w")
9093 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9094 (match_operand:V4SI 2 "register_operand" "w")]
9097 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
9098 [(set_attr "type" "crypto_sm4")]
9103 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
9104 [(set (match_operand:VDQSF 0 "register_operand")
9106 [(match_operand:VDQSF 1 "register_operand")
9107 (match_operand:<VFMLA_W> 2 "register_operand")
9108 (match_operand:<VFMLA_W> 3 "register_operand")]
9112 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9113 <nunits> * 2, false);
9114 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9115 <nunits> * 2, false);
9117 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
9126 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
9127 [(set (match_operand:VDQSF 0 "register_operand")
9129 [(match_operand:VDQSF 1 "register_operand")
9130 (match_operand:<VFMLA_W> 2 "register_operand")
9131 (match_operand:<VFMLA_W> 3 "register_operand")]
9135 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9136 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9138 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
9146 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
9147 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9150 (vec_select:<VFMLA_SEL_W>
9151 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9152 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
9154 (vec_select:<VFMLA_SEL_W>
9155 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9156 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9157 (match_operand:VDQSF 1 "register_operand" "0")))]
9159 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9160 [(set_attr "type" "neon_fp_mul_s")]
9163 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
9164 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9168 (vec_select:<VFMLA_SEL_W>
9169 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9170 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
9172 (vec_select:<VFMLA_SEL_W>
9173 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9174 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9175 (match_operand:VDQSF 1 "register_operand" "0")))]
9177 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9178 [(set_attr "type" "neon_fp_mul_s")]
9181 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
9182 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9185 (vec_select:<VFMLA_SEL_W>
9186 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9187 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
9189 (vec_select:<VFMLA_SEL_W>
9190 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9191 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9192 (match_operand:VDQSF 1 "register_operand" "0")))]
9194 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9195 [(set_attr "type" "neon_fp_mul_s")]
9198 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
9199 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9203 (vec_select:<VFMLA_SEL_W>
9204 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9205 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
9207 (vec_select:<VFMLA_SEL_W>
9208 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9209 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9210 (match_operand:VDQSF 1 "register_operand" "0")))]
9212 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9213 [(set_attr "type" "neon_fp_mul_s")]
9216 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
9217 [(set (match_operand:V2SF 0 "register_operand")
9218 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9219 (match_operand:V4HF 2 "register_operand")
9220 (match_operand:V4HF 3 "register_operand")
9221 (match_operand:SI 4 "aarch64_imm2")]
9225 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9226 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9228 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
9237 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
9238 [(set (match_operand:V2SF 0 "register_operand")
9239 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9240 (match_operand:V4HF 2 "register_operand")
9241 (match_operand:V4HF 3 "register_operand")
9242 (match_operand:SI 4 "aarch64_imm2")]
9246 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9247 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9249 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
9257 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
9258 [(set (match_operand:V2SF 0 "register_operand" "=w")
9262 (match_operand:V4HF 2 "register_operand" "w")
9263 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9267 (match_operand:V4HF 3 "register_operand" "x")
9268 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9269 (match_operand:V2SF 1 "register_operand" "0")))]
9271 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9272 [(set_attr "type" "neon_fp_mul_s")]
9275 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
9276 [(set (match_operand:V2SF 0 "register_operand" "=w")
9281 (match_operand:V4HF 2 "register_operand" "w")
9282 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9286 (match_operand:V4HF 3 "register_operand" "x")
9287 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9288 (match_operand:V2SF 1 "register_operand" "0")))]
9290 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9291 [(set_attr "type" "neon_fp_mul_s")]
9294 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
9295 [(set (match_operand:V2SF 0 "register_operand" "=w")
9299 (match_operand:V4HF 2 "register_operand" "w")
9300 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9304 (match_operand:V4HF 3 "register_operand" "x")
9305 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9306 (match_operand:V2SF 1 "register_operand" "0")))]
9308 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9309 [(set_attr "type" "neon_fp_mul_s")]
9312 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
9313 [(set (match_operand:V2SF 0 "register_operand" "=w")
9318 (match_operand:V4HF 2 "register_operand" "w")
9319 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9323 (match_operand:V4HF 3 "register_operand" "x")
9324 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9325 (match_operand:V2SF 1 "register_operand" "0")))]
9327 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9328 [(set_attr "type" "neon_fp_mul_s")]
9331 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
9332 [(set (match_operand:V4SF 0 "register_operand")
9333 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9334 (match_operand:V8HF 2 "register_operand")
9335 (match_operand:V8HF 3 "register_operand")
9336 (match_operand:SI 4 "aarch64_lane_imm3")]
9340 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9341 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9343 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
9351 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
9352 [(set (match_operand:V4SF 0 "register_operand")
9353 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9354 (match_operand:V8HF 2 "register_operand")
9355 (match_operand:V8HF 3 "register_operand")
9356 (match_operand:SI 4 "aarch64_lane_imm3")]
9360 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9361 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9363 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
9371 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
9372 [(set (match_operand:V4SF 0 "register_operand" "=w")
9376 (match_operand:V8HF 2 "register_operand" "w")
9377 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9381 (match_operand:V8HF 3 "register_operand" "x")
9382 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9383 (match_operand:V4SF 1 "register_operand" "0")))]
9385 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9386 [(set_attr "type" "neon_fp_mul_s")]
9389 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
9390 [(set (match_operand:V4SF 0 "register_operand" "=w")
9395 (match_operand:V8HF 2 "register_operand" "w")
9396 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9400 (match_operand:V8HF 3 "register_operand" "x")
9401 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9402 (match_operand:V4SF 1 "register_operand" "0")))]
9404 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9405 [(set_attr "type" "neon_fp_mul_s")]
9408 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
9409 [(set (match_operand:V4SF 0 "register_operand" "=w")
9413 (match_operand:V8HF 2 "register_operand" "w")
9414 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9418 (match_operand:V8HF 3 "register_operand" "x")
9419 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9420 (match_operand:V4SF 1 "register_operand" "0")))]
9422 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9423 [(set_attr "type" "neon_fp_mul_s")]
9426 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
9427 [(set (match_operand:V4SF 0 "register_operand" "=w")
9432 (match_operand:V8HF 2 "register_operand" "w")
9433 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9437 (match_operand:V8HF 3 "register_operand" "x")
9438 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9439 (match_operand:V4SF 1 "register_operand" "0")))]
9441 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9442 [(set_attr "type" "neon_fp_mul_s")]
9445 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
9446 [(set (match_operand:V2SF 0 "register_operand")
9447 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9448 (match_operand:V4HF 2 "register_operand")
9449 (match_operand:V8HF 3 "register_operand")
9450 (match_operand:SI 4 "aarch64_lane_imm3")]
9454 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9455 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9457 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
9466 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
9467 [(set (match_operand:V2SF 0 "register_operand")
9468 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9469 (match_operand:V4HF 2 "register_operand")
9470 (match_operand:V8HF 3 "register_operand")
9471 (match_operand:SI 4 "aarch64_lane_imm3")]
9475 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9476 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9478 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
9487 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
9488 [(set (match_operand:V2SF 0 "register_operand" "=w")
9492 (match_operand:V4HF 2 "register_operand" "w")
9493 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9497 (match_operand:V8HF 3 "register_operand" "x")
9498 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9499 (match_operand:V2SF 1 "register_operand" "0")))]
9501 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9502 [(set_attr "type" "neon_fp_mul_s")]
9505 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
9506 [(set (match_operand:V2SF 0 "register_operand" "=w")
9511 (match_operand:V4HF 2 "register_operand" "w")
9512 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9516 (match_operand:V8HF 3 "register_operand" "x")
9517 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9518 (match_operand:V2SF 1 "register_operand" "0")))]
9520 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9521 [(set_attr "type" "neon_fp_mul_s")]
9524 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
9525 [(set (match_operand:V2SF 0 "register_operand" "=w")
9529 (match_operand:V4HF 2 "register_operand" "w")
9530 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9534 (match_operand:V8HF 3 "register_operand" "x")
9535 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9536 (match_operand:V2SF 1 "register_operand" "0")))]
9538 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9539 [(set_attr "type" "neon_fp_mul_s")]
9542 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
9543 [(set (match_operand:V2SF 0 "register_operand" "=w")
9548 (match_operand:V4HF 2 "register_operand" "w")
9549 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9553 (match_operand:V8HF 3 "register_operand" "x")
9554 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9555 (match_operand:V2SF 1 "register_operand" "0")))]
9557 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9558 [(set_attr "type" "neon_fp_mul_s")]
9561 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
9562 [(set (match_operand:V4SF 0 "register_operand")
9563 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9564 (match_operand:V8HF 2 "register_operand")
9565 (match_operand:V4HF 3 "register_operand")
9566 (match_operand:SI 4 "aarch64_imm2")]
9570 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9571 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9573 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
9581 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
9582 [(set (match_operand:V4SF 0 "register_operand")
9583 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9584 (match_operand:V8HF 2 "register_operand")
9585 (match_operand:V4HF 3 "register_operand")
9586 (match_operand:SI 4 "aarch64_imm2")]
9590 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9591 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9593 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
9601 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
9602 [(set (match_operand:V4SF 0 "register_operand" "=w")
9606 (match_operand:V8HF 2 "register_operand" "w")
9607 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9611 (match_operand:V4HF 3 "register_operand" "x")
9612 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9613 (match_operand:V4SF 1 "register_operand" "0")))]
9615 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9616 [(set_attr "type" "neon_fp_mul_s")]
9619 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
9620 [(set (match_operand:V4SF 0 "register_operand" "=w")
9625 (match_operand:V8HF 2 "register_operand" "w")
9626 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9630 (match_operand:V4HF 3 "register_operand" "x")
9631 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9632 (match_operand:V4SF 1 "register_operand" "0")))]
9634 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9635 [(set_attr "type" "neon_fp_mul_s")]
9638 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
9639 [(set (match_operand:V4SF 0 "register_operand" "=w")
9643 (match_operand:V8HF 2 "register_operand" "w")
9644 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9648 (match_operand:V4HF 3 "register_operand" "x")
9649 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9650 (match_operand:V4SF 1 "register_operand" "0")))]
9652 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9653 [(set_attr "type" "neon_fp_mul_s")]
9656 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
9657 [(set (match_operand:V4SF 0 "register_operand" "=w")
9662 (match_operand:V8HF 2 "register_operand" "w")
9663 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9667 (match_operand:V4HF 3 "register_operand" "x")
9668 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9669 (match_operand:V4SF 1 "register_operand" "0")))]
9671 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9672 [(set_attr "type" "neon_fp_mul_s")]
9677 (define_insn "aarch64_crypto_pmulldi"
9678 [(set (match_operand:TI 0 "register_operand" "=w")
9679 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
9680 (match_operand:DI 2 "register_operand" "w")]
9683 "pmull\\t%0.1q, %1.1d, %2.1d"
9684 [(set_attr "type" "crypto_pmull")]
9687 (define_insn "aarch64_crypto_pmullv2di"
9688 [(set (match_operand:TI 0 "register_operand" "=w")
9689 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
9690 (match_operand:V2DI 2 "register_operand" "w")]
9693 "pmull2\\t%0.1q, %1.2d, %2.2d"
9694 [(set_attr "type" "crypto_pmull")]
9697 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
9698 (define_insn_and_split "<optab><Vnarrowq><mode>2"
9699 [(set (match_operand:VQN 0 "register_operand" "=w")
9700 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9702 "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
9703 "&& <CODE> == ZERO_EXTEND
9704 && aarch64_split_simd_shift_p (insn)"
9707 /* On many cores, it is cheaper to implement UXTL using a ZIP1 with zero,
9708 provided that the cost of the zero can be amortized over several
9709 operations. We'll later recombine the zero and zip if there are
9710 not sufficient uses of the zero to make the split worthwhile. */
9711 rtx res = simplify_gen_subreg (<VNARROWQ2>mode, operands[0],
9713 rtx zero = aarch64_gen_shareable_zero (<VNARROWQ>mode);
9714 emit_insn (gen_aarch64_zip1<Vnarrowq2>_low (res, operands[1], zero));
9717 [(set_attr "type" "neon_shift_imm_long")]
9720 (define_expand "aarch64_<su>xtl<mode>"
9721 [(set (match_operand:VQN 0 "register_operand" "=w")
9722 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9727 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9728 (define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
9729 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9730 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9732 "xtn\t%0.<Vntype>, %1.<Vtype>"
9733 [(set_attr "type" "neon_move_narrow_q")]
9736 ;; Expander for the intrinsics that only takes one mode unlike the two-mode
9738 (define_expand "aarch64_xtn<mode>"
9739 [(set (match_operand:<VNARROWQ> 0 "register_operand")
9740 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
9745 (define_insn "aarch64_bfdot<mode>"
9746 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9749 [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9750 (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9752 (match_operand:VDQSF 1 "register_operand" "0")))]
9754 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9755 [(set_attr "type" "neon_dot<q>")]
9758 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9759 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9762 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9763 (match_operand:VBF 3 "register_operand" "w")
9764 (match_operand:SI 4 "const_int_operand" "n")]
9766 (match_operand:VDQSF 1 "register_operand" "0")))]
9769 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9770 int lane = INTVAL (operands[4]);
9771 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9772 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9774 [(set_attr "type" "neon_dot<VDQSF:q>")]
9777 ;; vget_low/high_bf16
9778 (define_expand "aarch64_vget_lo_halfv8bf"
9779 [(match_operand:V4BF 0 "register_operand")
9780 (match_operand:V8BF 1 "register_operand")]
9783 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
9784 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9788 (define_expand "aarch64_vget_hi_halfv8bf"
9789 [(match_operand:V4BF 0 "register_operand")
9790 (match_operand:V8BF 1 "register_operand")]
9793 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
9794 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9799 (define_insn "aarch64_bfmmlaqv4sf"
9800 [(set (match_operand:V4SF 0 "register_operand" "=w")
9801 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9802 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9803 (match_operand:V8BF 3 "register_operand" "w")]
9806 "bfmmla\\t%0.4s, %2.8h, %3.8h"
9807 [(set_attr "type" "neon_fp_mla_s_q")]
9811 (define_insn "aarch64_bfmlal<bt>v4sf"
9812 [(set (match_operand:V4SF 0 "register_operand" "=w")
9813 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9814 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9815 (match_operand:V8BF 3 "register_operand" "w")]
9818 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9819 [(set_attr "type" "neon_fp_mla_s_q")]
9822 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9823 [(set (match_operand:V4SF 0 "register_operand" "=w")
9824 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9825 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9826 (match_operand:VBF 3 "register_operand" "x")
9827 (match_operand:SI 4 "const_int_operand" "n")]
9831 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9832 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9834 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9837 ;; 8-bit integer matrix multiply-accumulate
9838 (define_insn "aarch64_simd_<sur>mmlav16qi"
9839 [(set (match_operand:V4SI 0 "register_operand" "=w")
9841 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9842 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9843 (match_operand:V4SI 1 "register_operand" "0")))]
9845 "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9846 [(set_attr "type" "neon_mla_s_q")]
9850 (define_insn "aarch64_bfcvtn<q><mode>"
9851 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9852 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9855 "bfcvtn\\t%0.4h, %1.4s"
9856 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9859 (define_insn "aarch64_bfcvtn2v8bf"
9860 [(set (match_operand:V8BF 0 "register_operand" "=w")
9861 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9862 (match_operand:V4SF 2 "register_operand" "w")]
9865 "bfcvtn2\\t%0.8h, %2.4s"
9866 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9869 (define_insn "aarch64_bfcvtbf"
9870 [(set (match_operand:BF 0 "register_operand" "=w")
9871 (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9875 [(set_attr "type" "f_cvt")]
9878 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
9879 (define_insn "aarch64_vbfcvt<mode>"
9880 [(set (match_operand:V4SF 0 "register_operand" "=w")
9881 (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
9884 "shll\\t%0.4s, %1.4h, #16"
9885 [(set_attr "type" "neon_shift_imm_long")]
9888 (define_insn "aarch64_vbfcvt_highv8bf"
9889 [(set (match_operand:V4SF 0 "register_operand" "=w")
9890 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
9893 "shll2\\t%0.4s, %1.8h, #16"
9894 [(set_attr "type" "neon_shift_imm_long")]
9897 (define_insn "aarch64_bfcvtsf"
9898 [(set (match_operand:SF 0 "register_operand" "=w")
9899 (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
9902 "shl\\t%d0, %d1, #16"
9903 [(set_attr "type" "neon_shift_imm")]