1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2015 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
23 (match_operand:VALL 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF 0 "register_operand" "=w")
57 (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
59 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
60 [(set_attr "type" "neon_dup<q>")]
63 (define_insn "aarch64_dup_lane<mode>"
64 [(set (match_operand:VALL 0 "register_operand" "=w")
67 (match_operand:VALL 1 "register_operand" "w")
68 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
72 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
73 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
75 [(set_attr "type" "neon_dup<q>")]
78 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
79 [(set (match_operand:VALL 0 "register_operand" "=w")
82 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
83 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
87 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
88 INTVAL (operands[2])));
89 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
91 [(set_attr "type" "neon_dup<q>")]
94 (define_insn "*aarch64_simd_mov<mode>"
95 [(set (match_operand:VD 0 "nonimmediate_operand"
96 "=w, m, w, ?r, ?w, ?r, w")
97 (match_operand:VD 1 "general_operand"
98 "m, w, w, w, r, r, Dn"))]
100 && (register_operand (operands[0], <MODE>mode)
101 || register_operand (operands[1], <MODE>mode))"
103 switch (which_alternative)
105 case 0: return "ldr\\t%d0, %1";
106 case 1: return "str\\t%d1, %0";
107 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
108 case 3: return "umov\t%0, %1.d[0]";
109 case 4: return "ins\t%0.d[0], %1";
110 case 5: return "mov\t%0, %1";
112 return aarch64_output_simd_mov_immediate (operands[1],
114 default: gcc_unreachable ();
117 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
118 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
119 mov_reg, neon_move<q>")]
122 (define_insn "*aarch64_simd_mov<mode>"
123 [(set (match_operand:VQ 0 "nonimmediate_operand"
124 "=w, m, w, ?r, ?w, ?r, w")
125 (match_operand:VQ 1 "general_operand"
126 "m, w, w, w, r, r, Dn"))]
128 && (register_operand (operands[0], <MODE>mode)
129 || register_operand (operands[1], <MODE>mode))"
131 switch (which_alternative)
134 return "ldr\\t%q0, %1";
136 return "str\\t%q1, %0";
138 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
144 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
149 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
150 neon_logic<q>, multiple, multiple, multiple,\
152 (set_attr "length" "4,4,4,8,8,8,4")]
156 [(set (match_operand:VQ 0 "register_operand" "")
157 (match_operand:VQ 1 "register_operand" ""))]
158 "TARGET_SIMD && reload_completed
159 && GP_REGNUM_P (REGNO (operands[0]))
160 && GP_REGNUM_P (REGNO (operands[1]))"
163 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
168 [(set (match_operand:VQ 0 "register_operand" "")
169 (match_operand:VQ 1 "register_operand" ""))]
170 "TARGET_SIMD && reload_completed
171 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
172 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
175 aarch64_split_simd_move (operands[0], operands[1]);
179 (define_expand "aarch64_split_simd_mov<mode>"
180 [(set (match_operand:VQ 0)
181 (match_operand:VQ 1))]
184 rtx dst = operands[0];
185 rtx src = operands[1];
187 if (GP_REGNUM_P (REGNO (src)))
189 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
190 rtx src_high_part = gen_highpart (<VHALF>mode, src);
193 (gen_move_lo_quad_<mode> (dst, src_low_part));
195 (gen_move_hi_quad_<mode> (dst, src_high_part));
200 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
201 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
202 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
203 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
206 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
208 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
214 (define_insn "aarch64_simd_mov_from_<mode>low"
215 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
217 (match_operand:VQ 1 "register_operand" "w")
218 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
219 "TARGET_SIMD && reload_completed"
221 [(set_attr "type" "neon_to_gp<q>")
222 (set_attr "length" "4")
225 (define_insn "aarch64_simd_mov_from_<mode>high"
226 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
228 (match_operand:VQ 1 "register_operand" "w")
229 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
230 "TARGET_SIMD && reload_completed"
232 [(set_attr "type" "neon_to_gp<q>")
233 (set_attr "length" "4")
236 (define_insn "orn<mode>3"
237 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
238 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
239 (match_operand:VDQ_I 2 "register_operand" "w")))]
241 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
242 [(set_attr "type" "neon_logic<q>")]
245 (define_insn "bic<mode>3"
246 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
247 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
248 (match_operand:VDQ_I 2 "register_operand" "w")))]
250 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
251 [(set_attr "type" "neon_logic<q>")]
254 (define_insn "add<mode>3"
255 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
256 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
257 (match_operand:VDQ_I 2 "register_operand" "w")))]
259 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
260 [(set_attr "type" "neon_add<q>")]
263 (define_insn "sub<mode>3"
264 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
265 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
266 (match_operand:VDQ_I 2 "register_operand" "w")))]
268 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
269 [(set_attr "type" "neon_sub<q>")]
272 (define_insn "mul<mode>3"
273 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
274 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
275 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
277 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
278 [(set_attr "type" "neon_mul_<Vetype><q>")]
281 (define_insn "bswap<mode>2"
282 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
283 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
285 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
286 [(set_attr "type" "neon_rev<q>")]
289 (define_insn "aarch64_rbit<mode>"
290 [(set (match_operand:VB 0 "register_operand" "=w")
291 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
294 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
295 [(set_attr "type" "neon_rbit")]
298 (define_expand "ctz<mode>2"
299 [(set (match_operand:VS 0 "register_operand")
300 (ctz:VS (match_operand:VS 1 "register_operand")))]
303 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
304 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
306 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
307 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
312 (define_insn "*aarch64_mul3_elt<mode>"
313 [(set (match_operand:VMUL 0 "register_operand" "=w")
317 (match_operand:VMUL 1 "register_operand" "<h_con>")
318 (parallel [(match_operand:SI 2 "immediate_operand")])))
319 (match_operand:VMUL 3 "register_operand" "w")))]
322 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
323 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
325 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
328 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
329 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
330 (mult:VMUL_CHANGE_NLANES
331 (vec_duplicate:VMUL_CHANGE_NLANES
333 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
334 (parallel [(match_operand:SI 2 "immediate_operand")])))
335 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
338 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
339 INTVAL (operands[2])));
340 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
342 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
345 (define_insn "*aarch64_mul3_elt_to_128df"
346 [(set (match_operand:V2DF 0 "register_operand" "=w")
349 (match_operand:DF 2 "register_operand" "w"))
350 (match_operand:V2DF 1 "register_operand" "w")))]
352 "fmul\\t%0.2d, %1.2d, %2.d[0]"
353 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
356 (define_insn "*aarch64_mul3_elt_to_64v2df"
357 [(set (match_operand:DF 0 "register_operand" "=w")
360 (match_operand:V2DF 1 "register_operand" "w")
361 (parallel [(match_operand:SI 2 "immediate_operand")]))
362 (match_operand:DF 3 "register_operand" "w")))]
365 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
366 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
368 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
371 (define_insn "neg<mode>2"
372 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
373 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
375 "neg\t%0.<Vtype>, %1.<Vtype>"
376 [(set_attr "type" "neon_neg<q>")]
379 (define_insn "abs<mode>2"
380 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
381 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
383 "abs\t%0.<Vtype>, %1.<Vtype>"
384 [(set_attr "type" "neon_abs<q>")]
387 ;; The intrinsic version of integer ABS must not be allowed to
388 ;; combine with any operation with an integerated ABS step, such
390 (define_insn "aarch64_abs<mode>"
391 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
393 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
396 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
397 [(set_attr "type" "neon_abs<q>")]
400 (define_insn "abd<mode>_3"
401 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
402 (abs:VDQ_BHSI (minus:VDQ_BHSI
403 (match_operand:VDQ_BHSI 1 "register_operand" "w")
404 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
406 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
407 [(set_attr "type" "neon_abd<q>")]
410 (define_insn "aba<mode>_3"
411 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
412 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
413 (match_operand:VDQ_BHSI 1 "register_operand" "w")
414 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
415 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
417 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
418 [(set_attr "type" "neon_arith_acc<q>")]
421 (define_insn "fabd<mode>_3"
422 [(set (match_operand:VDQF 0 "register_operand" "=w")
423 (abs:VDQF (minus:VDQF
424 (match_operand:VDQF 1 "register_operand" "w")
425 (match_operand:VDQF 2 "register_operand" "w"))))]
427 "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
428 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
431 (define_insn "*fabd_scalar<mode>3"
432 [(set (match_operand:GPF 0 "register_operand" "=w")
434 (match_operand:GPF 1 "register_operand" "w")
435 (match_operand:GPF 2 "register_operand" "w"))))]
437 "fabd\t%<s>0, %<s>1, %<s>2"
438 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
441 (define_insn "and<mode>3"
442 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
443 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
444 (match_operand:VDQ_I 2 "register_operand" "w")))]
446 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
447 [(set_attr "type" "neon_logic<q>")]
450 (define_insn "ior<mode>3"
451 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
452 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
453 (match_operand:VDQ_I 2 "register_operand" "w")))]
455 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
456 [(set_attr "type" "neon_logic<q>")]
459 (define_insn "xor<mode>3"
460 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
461 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
462 (match_operand:VDQ_I 2 "register_operand" "w")))]
464 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
465 [(set_attr "type" "neon_logic<q>")]
468 (define_insn "one_cmpl<mode>2"
469 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
470 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
472 "not\t%0.<Vbtype>, %1.<Vbtype>"
473 [(set_attr "type" "neon_logic<q>")]
476 (define_insn "aarch64_simd_vec_set<mode>"
477 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
479 (vec_duplicate:VDQ_BHSI
480 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
481 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
482 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
485 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
486 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
487 switch (which_alternative)
490 return "ins\\t%0.<Vetype>[%p2], %w1";
492 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
494 return "ld1\\t{%0.<Vetype>}[%p2], %1";
499 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
502 (define_insn "aarch64_simd_lshr<mode>"
503 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
504 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
505 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
507 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
508 [(set_attr "type" "neon_shift_imm<q>")]
511 (define_insn "aarch64_simd_ashr<mode>"
512 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
513 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
514 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
516 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
517 [(set_attr "type" "neon_shift_imm<q>")]
520 (define_insn "aarch64_simd_imm_shl<mode>"
521 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
522 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
523 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
525 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
526 [(set_attr "type" "neon_shift_imm<q>")]
529 (define_insn "aarch64_simd_reg_sshl<mode>"
530 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
531 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
532 (match_operand:VDQ_I 2 "register_operand" "w")))]
534 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
535 [(set_attr "type" "neon_shift_reg<q>")]
538 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
539 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
540 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
541 (match_operand:VDQ_I 2 "register_operand" "w")]
542 UNSPEC_ASHIFT_UNSIGNED))]
544 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
545 [(set_attr "type" "neon_shift_reg<q>")]
548 (define_insn "aarch64_simd_reg_shl<mode>_signed"
549 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
550 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
551 (match_operand:VDQ_I 2 "register_operand" "w")]
552 UNSPEC_ASHIFT_SIGNED))]
554 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
555 [(set_attr "type" "neon_shift_reg<q>")]
558 (define_expand "ashl<mode>3"
559 [(match_operand:VDQ_I 0 "register_operand" "")
560 (match_operand:VDQ_I 1 "register_operand" "")
561 (match_operand:SI 2 "general_operand" "")]
564 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
567 if (CONST_INT_P (operands[2]))
569 shift_amount = INTVAL (operands[2]);
570 if (shift_amount >= 0 && shift_amount < bit_width)
572 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
574 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
581 operands[2] = force_reg (SImode, operands[2]);
584 else if (MEM_P (operands[2]))
586 operands[2] = force_reg (SImode, operands[2]);
589 if (REG_P (operands[2]))
591 rtx tmp = gen_reg_rtx (<MODE>mode);
592 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
593 convert_to_mode (<VEL>mode,
596 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
605 (define_expand "lshr<mode>3"
606 [(match_operand:VDQ_I 0 "register_operand" "")
607 (match_operand:VDQ_I 1 "register_operand" "")
608 (match_operand:SI 2 "general_operand" "")]
611 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
614 if (CONST_INT_P (operands[2]))
616 shift_amount = INTVAL (operands[2]);
617 if (shift_amount > 0 && shift_amount <= bit_width)
619 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
621 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
627 operands[2] = force_reg (SImode, operands[2]);
629 else if (MEM_P (operands[2]))
631 operands[2] = force_reg (SImode, operands[2]);
634 if (REG_P (operands[2]))
636 rtx tmp = gen_reg_rtx (SImode);
637 rtx tmp1 = gen_reg_rtx (<MODE>mode);
638 emit_insn (gen_negsi2 (tmp, operands[2]));
639 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
640 convert_to_mode (<VEL>mode,
642 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
652 (define_expand "ashr<mode>3"
653 [(match_operand:VDQ_I 0 "register_operand" "")
654 (match_operand:VDQ_I 1 "register_operand" "")
655 (match_operand:SI 2 "general_operand" "")]
658 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
661 if (CONST_INT_P (operands[2]))
663 shift_amount = INTVAL (operands[2]);
664 if (shift_amount > 0 && shift_amount <= bit_width)
666 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
668 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
674 operands[2] = force_reg (SImode, operands[2]);
676 else if (MEM_P (operands[2]))
678 operands[2] = force_reg (SImode, operands[2]);
681 if (REG_P (operands[2]))
683 rtx tmp = gen_reg_rtx (SImode);
684 rtx tmp1 = gen_reg_rtx (<MODE>mode);
685 emit_insn (gen_negsi2 (tmp, operands[2]));
686 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
687 convert_to_mode (<VEL>mode,
689 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
699 (define_expand "vashl<mode>3"
700 [(match_operand:VDQ_I 0 "register_operand" "")
701 (match_operand:VDQ_I 1 "register_operand" "")
702 (match_operand:VDQ_I 2 "register_operand" "")]
705 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
710 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
711 ;; Negating individual lanes most certainly offsets the
712 ;; gain from vectorization.
713 (define_expand "vashr<mode>3"
714 [(match_operand:VDQ_BHSI 0 "register_operand" "")
715 (match_operand:VDQ_BHSI 1 "register_operand" "")
716 (match_operand:VDQ_BHSI 2 "register_operand" "")]
719 rtx neg = gen_reg_rtx (<MODE>mode);
720 emit (gen_neg<mode>2 (neg, operands[2]));
721 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
727 (define_expand "aarch64_ashr_simddi"
728 [(match_operand:DI 0 "register_operand" "=w")
729 (match_operand:DI 1 "register_operand" "w")
730 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
733 /* An arithmetic shift right by 64 fills the result with copies of the sign
734 bit, just like asr by 63 - however the standard pattern does not handle
736 if (INTVAL (operands[2]) == 64)
737 operands[2] = GEN_INT (63);
738 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
743 (define_expand "vlshr<mode>3"
744 [(match_operand:VDQ_BHSI 0 "register_operand" "")
745 (match_operand:VDQ_BHSI 1 "register_operand" "")
746 (match_operand:VDQ_BHSI 2 "register_operand" "")]
749 rtx neg = gen_reg_rtx (<MODE>mode);
750 emit (gen_neg<mode>2 (neg, operands[2]));
751 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
756 (define_expand "aarch64_lshr_simddi"
757 [(match_operand:DI 0 "register_operand" "=w")
758 (match_operand:DI 1 "register_operand" "w")
759 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
762 if (INTVAL (operands[2]) == 64)
763 emit_move_insn (operands[0], const0_rtx);
765 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
770 (define_expand "vec_set<mode>"
771 [(match_operand:VDQ_BHSI 0 "register_operand")
772 (match_operand:<VEL> 1 "register_operand")
773 (match_operand:SI 2 "immediate_operand")]
776 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
777 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
778 GEN_INT (elem), operands[0]));
783 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
784 (define_insn "vec_shr_<mode>"
785 [(set (match_operand:VD 0 "register_operand" "=w")
786 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
787 (match_operand:SI 2 "immediate_operand" "i")]
791 if (BYTES_BIG_ENDIAN)
792 return "shl %d0, %d1, %2";
794 return "ushr %d0, %d1, %2";
796 [(set_attr "type" "neon_shift_imm")]
799 (define_insn "aarch64_simd_vec_setv2di"
800 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
803 (match_operand:DI 1 "register_operand" "r,w"))
804 (match_operand:V2DI 3 "register_operand" "0,0")
805 (match_operand:SI 2 "immediate_operand" "i,i")))]
808 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
809 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
810 switch (which_alternative)
813 return "ins\\t%0.d[%p2], %1";
815 return "ins\\t%0.d[%p2], %1.d[0]";
820 [(set_attr "type" "neon_from_gp, neon_ins_q")]
823 (define_expand "vec_setv2di"
824 [(match_operand:V2DI 0 "register_operand")
825 (match_operand:DI 1 "register_operand")
826 (match_operand:SI 2 "immediate_operand")]
829 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
830 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
831 GEN_INT (elem), operands[0]));
836 (define_insn "aarch64_simd_vec_set<mode>"
837 [(set (match_operand:VDQF 0 "register_operand" "=w")
840 (match_operand:<VEL> 1 "register_operand" "w"))
841 (match_operand:VDQF 3 "register_operand" "0")
842 (match_operand:SI 2 "immediate_operand" "i")))]
845 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
847 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
848 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
850 [(set_attr "type" "neon_ins<q>")]
853 (define_expand "vec_set<mode>"
854 [(match_operand:VDQF 0 "register_operand" "+w")
855 (match_operand:<VEL> 1 "register_operand" "w")
856 (match_operand:SI 2 "immediate_operand" "")]
859 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
860 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
861 GEN_INT (elem), operands[0]));
867 (define_insn "aarch64_mla<mode>"
868 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
869 (plus:VDQ_BHSI (mult:VDQ_BHSI
870 (match_operand:VDQ_BHSI 2 "register_operand" "w")
871 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
872 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
874 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
875 [(set_attr "type" "neon_mla_<Vetype><q>")]
878 (define_insn "*aarch64_mla_elt<mode>"
879 [(set (match_operand:VDQHS 0 "register_operand" "=w")
884 (match_operand:VDQHS 1 "register_operand" "<h_con>")
885 (parallel [(match_operand:SI 2 "immediate_operand")])))
886 (match_operand:VDQHS 3 "register_operand" "w"))
887 (match_operand:VDQHS 4 "register_operand" "0")))]
890 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
891 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
893 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
896 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
897 [(set (match_operand:VDQHS 0 "register_operand" "=w")
902 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
903 (parallel [(match_operand:SI 2 "immediate_operand")])))
904 (match_operand:VDQHS 3 "register_operand" "w"))
905 (match_operand:VDQHS 4 "register_operand" "0")))]
908 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
909 INTVAL (operands[2])));
910 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
912 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
915 (define_insn "aarch64_mls<mode>"
916 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
917 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
918 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
919 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
921 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
922 [(set_attr "type" "neon_mla_<Vetype><q>")]
925 (define_insn "*aarch64_mls_elt<mode>"
926 [(set (match_operand:VDQHS 0 "register_operand" "=w")
928 (match_operand:VDQHS 4 "register_operand" "0")
932 (match_operand:VDQHS 1 "register_operand" "<h_con>")
933 (parallel [(match_operand:SI 2 "immediate_operand")])))
934 (match_operand:VDQHS 3 "register_operand" "w"))))]
937 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
938 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
940 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
943 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
944 [(set (match_operand:VDQHS 0 "register_operand" "=w")
946 (match_operand:VDQHS 4 "register_operand" "0")
950 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
951 (parallel [(match_operand:SI 2 "immediate_operand")])))
952 (match_operand:VDQHS 3 "register_operand" "w"))))]
955 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
956 INTVAL (operands[2])));
957 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
959 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
962 ;; Max/Min operations.
963 (define_insn "<su><maxmin><mode>3"
964 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
965 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
966 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
968 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
969 [(set_attr "type" "neon_minmax<q>")]
972 (define_expand "<su><maxmin>v2di3"
973 [(set (match_operand:V2DI 0 "register_operand" "")
974 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
975 (match_operand:V2DI 2 "register_operand" "")))]
978 enum rtx_code cmp_operator;
999 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1000 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
1001 operands[2], cmp_fmt, operands[1], operands[2]));
1005 ;; Pairwise Integer Max/Min operations.
1006 (define_insn "aarch64_<maxmin_uns>p<mode>"
1007 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1008 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1009 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1012 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1013 [(set_attr "type" "neon_minmax<q>")]
1016 ;; Pairwise FP Max/Min operations.
1017 (define_insn "aarch64_<maxmin_uns>p<mode>"
1018 [(set (match_operand:VDQF 0 "register_operand" "=w")
1019 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1020 (match_operand:VDQF 2 "register_operand" "w")]
1023 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1024 [(set_attr "type" "neon_minmax<q>")]
1027 ;; vec_concat gives a new vector with the low elements from operand 1, and
1028 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1029 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1030 ;; What that means, is that the RTL descriptions of the below patterns
1031 ;; need to change depending on endianness.
1033 ;; Move to the low architectural bits of the register.
1034 ;; On little-endian this is { operand, zeroes }
1035 ;; On big-endian this is { zeroes, operand }
1037 (define_insn "move_lo_quad_internal_<mode>"
1038 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1040 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1041 (vec_duplicate:<VHALF> (const_int 0))))]
1042 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1047 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1048 (set_attr "simd" "yes,*,yes")
1049 (set_attr "fp" "*,yes,*")
1050 (set_attr "length" "4")]
1053 (define_insn "move_lo_quad_internal_<mode>"
1054 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1056 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1058 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1063 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1064 (set_attr "simd" "yes,*,yes")
1065 (set_attr "fp" "*,yes,*")
1066 (set_attr "length" "4")]
1069 (define_insn "move_lo_quad_internal_be_<mode>"
1070 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1072 (vec_duplicate:<VHALF> (const_int 0))
1073 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1074 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1079 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1080 (set_attr "simd" "yes,*,yes")
1081 (set_attr "fp" "*,yes,*")
1082 (set_attr "length" "4")]
1085 (define_insn "move_lo_quad_internal_be_<mode>"
1086 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1089 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1090 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1095 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1096 (set_attr "simd" "yes,*,yes")
1097 (set_attr "fp" "*,yes,*")
1098 (set_attr "length" "4")]
1101 (define_expand "move_lo_quad_<mode>"
1102 [(match_operand:VQ 0 "register_operand")
1103 (match_operand:VQ 1 "register_operand")]
1106 if (BYTES_BIG_ENDIAN)
1107 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1109 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1114 ;; Move operand1 to the high architectural bits of the register, keeping
1115 ;; the low architectural bits of operand2.
1116 ;; For little-endian this is { operand2, operand1 }
1117 ;; For big-endian this is { operand1, operand2 }
1119 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1120 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1124 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1125 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1126 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1128 ins\\t%0.d[1], %1.d[0]
1130 [(set_attr "type" "neon_ins")]
1133 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1134 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1136 (match_operand:<VHALF> 1 "register_operand" "w,r")
1139 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1140 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1142 ins\\t%0.d[1], %1.d[0]
1144 [(set_attr "type" "neon_ins")]
1147 (define_expand "move_hi_quad_<mode>"
1148 [(match_operand:VQ 0 "register_operand" "")
1149 (match_operand:<VHALF> 1 "register_operand" "")]
1152 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1153 if (BYTES_BIG_ENDIAN)
1154 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1157 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1162 ;; Narrowing operations.
1165 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1166 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1167 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1169 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1170 [(set_attr "type" "neon_shift_imm_narrow_q")]
1173 (define_expand "vec_pack_trunc_<mode>"
1174 [(match_operand:<VNARROWD> 0 "register_operand" "")
1175 (match_operand:VDN 1 "register_operand" "")
1176 (match_operand:VDN 2 "register_operand" "")]
1179 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1180 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1181 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1183 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1184 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1185 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1191 (define_insn "vec_pack_trunc_<mode>"
1192 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1193 (vec_concat:<VNARROWQ2>
1194 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1195 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1198 if (BYTES_BIG_ENDIAN)
1199 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1201 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1203 [(set_attr "type" "multiple")
1204 (set_attr "length" "8")]
1207 ;; Widening operations.
1209 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1210 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1211 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1212 (match_operand:VQW 1 "register_operand" "w")
1213 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1216 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1217 [(set_attr "type" "neon_shift_imm_long")]
1220 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1221 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1222 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1223 (match_operand:VQW 1 "register_operand" "w")
1224 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1227 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1228 [(set_attr "type" "neon_shift_imm_long")]
1231 (define_expand "vec_unpack<su>_hi_<mode>"
1232 [(match_operand:<VWIDE> 0 "register_operand" "")
1233 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1236 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1237 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1243 (define_expand "vec_unpack<su>_lo_<mode>"
1244 [(match_operand:<VWIDE> 0 "register_operand" "")
1245 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1248 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1249 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1255 ;; Widening arithmetic.
1257 (define_insn "*aarch64_<su>mlal_lo<mode>"
1258 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1261 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1262 (match_operand:VQW 2 "register_operand" "w")
1263 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1264 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1265 (match_operand:VQW 4 "register_operand" "w")
1267 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1269 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1270 [(set_attr "type" "neon_mla_<Vetype>_long")]
1273 (define_insn "*aarch64_<su>mlal_hi<mode>"
1274 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1277 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1278 (match_operand:VQW 2 "register_operand" "w")
1279 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1280 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1281 (match_operand:VQW 4 "register_operand" "w")
1283 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1285 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1286 [(set_attr "type" "neon_mla_<Vetype>_long")]
1289 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1290 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1292 (match_operand:<VWIDE> 1 "register_operand" "0")
1294 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1295 (match_operand:VQW 2 "register_operand" "w")
1296 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1297 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1298 (match_operand:VQW 4 "register_operand" "w")
1301 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1302 [(set_attr "type" "neon_mla_<Vetype>_long")]
1305 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1306 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1308 (match_operand:<VWIDE> 1 "register_operand" "0")
1310 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1311 (match_operand:VQW 2 "register_operand" "w")
1312 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1313 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1314 (match_operand:VQW 4 "register_operand" "w")
1317 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1318 [(set_attr "type" "neon_mla_<Vetype>_long")]
1321 (define_insn "*aarch64_<su>mlal<mode>"
1322 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1326 (match_operand:VD_BHSI 1 "register_operand" "w"))
1328 (match_operand:VD_BHSI 2 "register_operand" "w")))
1329 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1331 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1332 [(set_attr "type" "neon_mla_<Vetype>_long")]
1335 (define_insn "*aarch64_<su>mlsl<mode>"
1336 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1338 (match_operand:<VWIDE> 1 "register_operand" "0")
1341 (match_operand:VD_BHSI 2 "register_operand" "w"))
1343 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1345 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1346 [(set_attr "type" "neon_mla_<Vetype>_long")]
1349 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1350 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1351 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1352 (match_operand:VQW 1 "register_operand" "w")
1353 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1354 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1355 (match_operand:VQW 2 "register_operand" "w")
1358 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1359 [(set_attr "type" "neon_mul_<Vetype>_long")]
1362 (define_expand "vec_widen_<su>mult_lo_<mode>"
1363 [(match_operand:<VWIDE> 0 "register_operand" "")
1364 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1365 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1368 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1369 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1376 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1377 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1378 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1379 (match_operand:VQW 1 "register_operand" "w")
1380 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1381 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1382 (match_operand:VQW 2 "register_operand" "w")
1385 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1386 [(set_attr "type" "neon_mul_<Vetype>_long")]
1389 (define_expand "vec_widen_<su>mult_hi_<mode>"
1390 [(match_operand:<VWIDE> 0 "register_operand" "")
1391 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1392 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1395 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1396 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1404 ;; FP vector operations.
1405 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1406 ;; double-precision (64-bit) floating-point data types and arithmetic as
1407 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1408 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1410 ;; Floating-point operations can raise an exception. Vectorizing such
1411 ;; operations are safe because of reasons explained below.
1413 ;; ARMv8 permits an extension to enable trapped floating-point
1414 ;; exception handling, however this is an optional feature. In the
1415 ;; event of a floating-point exception being raised by vectorised
1417 ;; 1. If trapped floating-point exceptions are available, then a trap
1418 ;; will be taken when any lane raises an enabled exception. A trap
1419 ;; handler may determine which lane raised the exception.
1420 ;; 2. Alternatively a sticky exception flag is set in the
1421 ;; floating-point status register (FPSR). Software may explicitly
1422 ;; test the exception flags, in which case the tests will either
1423 ;; prevent vectorisation, allowing precise identification of the
1424 ;; failing operation, or if tested outside of vectorisable regions
1425 ;; then the specific operation and lane are not of interest.
1427 ;; FP arithmetic operations.
1429 (define_insn "add<mode>3"
1430 [(set (match_operand:VDQF 0 "register_operand" "=w")
1431 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1432 (match_operand:VDQF 2 "register_operand" "w")))]
1434 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1435 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1438 (define_insn "sub<mode>3"
1439 [(set (match_operand:VDQF 0 "register_operand" "=w")
1440 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1441 (match_operand:VDQF 2 "register_operand" "w")))]
1443 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1444 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1447 (define_insn "mul<mode>3"
1448 [(set (match_operand:VDQF 0 "register_operand" "=w")
1449 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1450 (match_operand:VDQF 2 "register_operand" "w")))]
1452 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1453 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1456 (define_insn "div<mode>3"
1457 [(set (match_operand:VDQF 0 "register_operand" "=w")
1458 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1459 (match_operand:VDQF 2 "register_operand" "w")))]
1461 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1462 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1465 (define_insn "neg<mode>2"
1466 [(set (match_operand:VDQF 0 "register_operand" "=w")
1467 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1469 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1470 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1473 (define_insn "abs<mode>2"
1474 [(set (match_operand:VDQF 0 "register_operand" "=w")
1475 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1477 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1478 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1481 (define_insn "fma<mode>4"
1482 [(set (match_operand:VDQF 0 "register_operand" "=w")
1483 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1484 (match_operand:VDQF 2 "register_operand" "w")
1485 (match_operand:VDQF 3 "register_operand" "0")))]
1487 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1488 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1491 (define_insn "*aarch64_fma4_elt<mode>"
1492 [(set (match_operand:VDQF 0 "register_operand" "=w")
1496 (match_operand:VDQF 1 "register_operand" "<h_con>")
1497 (parallel [(match_operand:SI 2 "immediate_operand")])))
1498 (match_operand:VDQF 3 "register_operand" "w")
1499 (match_operand:VDQF 4 "register_operand" "0")))]
1502 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1503 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1505 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1508 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1509 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1511 (vec_duplicate:VDQSF
1513 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1514 (parallel [(match_operand:SI 2 "immediate_operand")])))
1515 (match_operand:VDQSF 3 "register_operand" "w")
1516 (match_operand:VDQSF 4 "register_operand" "0")))]
1519 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1520 INTVAL (operands[2])));
1521 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1523 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1526 (define_insn "*aarch64_fma4_elt_to_128df"
1527 [(set (match_operand:V2DF 0 "register_operand" "=w")
1530 (match_operand:DF 1 "register_operand" "w"))
1531 (match_operand:V2DF 2 "register_operand" "w")
1532 (match_operand:V2DF 3 "register_operand" "0")))]
1534 "fmla\\t%0.2d, %2.2d, %1.2d[0]"
1535 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1538 (define_insn "*aarch64_fma4_elt_to_64v2df"
1539 [(set (match_operand:DF 0 "register_operand" "=w")
1542 (match_operand:V2DF 1 "register_operand" "w")
1543 (parallel [(match_operand:SI 2 "immediate_operand")]))
1544 (match_operand:DF 3 "register_operand" "w")
1545 (match_operand:DF 4 "register_operand" "0")))]
1548 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1549 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1551 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1554 (define_insn "fnma<mode>4"
1555 [(set (match_operand:VDQF 0 "register_operand" "=w")
1557 (match_operand:VDQF 1 "register_operand" "w")
1559 (match_operand:VDQF 2 "register_operand" "w"))
1560 (match_operand:VDQF 3 "register_operand" "0")))]
1562 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1563 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1566 (define_insn "*aarch64_fnma4_elt<mode>"
1567 [(set (match_operand:VDQF 0 "register_operand" "=w")
1570 (match_operand:VDQF 3 "register_operand" "w"))
1573 (match_operand:VDQF 1 "register_operand" "<h_con>")
1574 (parallel [(match_operand:SI 2 "immediate_operand")])))
1575 (match_operand:VDQF 4 "register_operand" "0")))]
1578 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1579 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1581 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1584 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1585 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1588 (match_operand:VDQSF 3 "register_operand" "w"))
1589 (vec_duplicate:VDQSF
1591 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1592 (parallel [(match_operand:SI 2 "immediate_operand")])))
1593 (match_operand:VDQSF 4 "register_operand" "0")))]
1596 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1597 INTVAL (operands[2])));
1598 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1600 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1603 (define_insn "*aarch64_fnma4_elt_to_128df"
1604 [(set (match_operand:V2DF 0 "register_operand" "=w")
1607 (match_operand:V2DF 2 "register_operand" "w"))
1609 (match_operand:DF 1 "register_operand" "w"))
1610 (match_operand:V2DF 3 "register_operand" "0")))]
1612 "fmls\\t%0.2d, %2.2d, %1.2d[0]"
1613 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1616 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1617 [(set (match_operand:DF 0 "register_operand" "=w")
1620 (match_operand:V2DF 1 "register_operand" "w")
1621 (parallel [(match_operand:SI 2 "immediate_operand")]))
1623 (match_operand:DF 3 "register_operand" "w"))
1624 (match_operand:DF 4 "register_operand" "0")))]
1627 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1628 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1630 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1633 ;; Vector versions of the floating-point frint patterns.
1634 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1635 (define_insn "<frint_pattern><mode>2"
1636 [(set (match_operand:VDQF 0 "register_operand" "=w")
1637 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1640 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1641 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1644 ;; Vector versions of the fcvt standard patterns.
1645 ;; Expands to lbtrunc, lround, lceil, lfloor
1646 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1647 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1648 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1649 [(match_operand:VDQF 1 "register_operand" "w")]
1652 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1653 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1656 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1657 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1658 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1659 [(match_operand:VDQF 1 "register_operand")]
1664 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1665 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1666 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1667 [(match_operand:VDQF 1 "register_operand")]
1672 (define_expand "ftrunc<VDQF:mode>2"
1673 [(set (match_operand:VDQF 0 "register_operand")
1674 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1679 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1680 [(set (match_operand:VDQF 0 "register_operand" "=w")
1682 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1684 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1685 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1688 ;; Conversions between vectors of floats and doubles.
1689 ;; Contains a mix of patterns to match standard pattern names
1690 ;; and those for intrinsics.
1692 ;; Float widening operations.
1694 (define_insn "vec_unpacks_lo_v4sf"
1695 [(set (match_operand:V2DF 0 "register_operand" "=w")
1698 (match_operand:V4SF 1 "register_operand" "w")
1699 (parallel [(const_int 0) (const_int 1)])
1702 "fcvtl\\t%0.2d, %1.2s"
1703 [(set_attr "type" "neon_fp_cvt_widen_s")]
1706 (define_insn "aarch64_float_extend_lo_v2df"
1707 [(set (match_operand:V2DF 0 "register_operand" "=w")
1709 (match_operand:V2SF 1 "register_operand" "w")))]
1711 "fcvtl\\t%0.2d, %1.2s"
1712 [(set_attr "type" "neon_fp_cvt_widen_s")]
1715 (define_insn "vec_unpacks_hi_v4sf"
1716 [(set (match_operand:V2DF 0 "register_operand" "=w")
1719 (match_operand:V4SF 1 "register_operand" "w")
1720 (parallel [(const_int 2) (const_int 3)])
1723 "fcvtl2\\t%0.2d, %1.4s"
1724 [(set_attr "type" "neon_fp_cvt_widen_s")]
1727 ;; Float narrowing operations.
1729 (define_insn "aarch64_float_truncate_lo_v2sf"
1730 [(set (match_operand:V2SF 0 "register_operand" "=w")
1731 (float_truncate:V2SF
1732 (match_operand:V2DF 1 "register_operand" "w")))]
1734 "fcvtn\\t%0.2s, %1.2d"
1735 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1738 (define_insn "aarch64_float_truncate_hi_v4sf"
1739 [(set (match_operand:V4SF 0 "register_operand" "=w")
1741 (match_operand:V2SF 1 "register_operand" "0")
1742 (float_truncate:V2SF
1743 (match_operand:V2DF 2 "register_operand" "w"))))]
1745 "fcvtn2\\t%0.4s, %2.2d"
1746 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1749 (define_expand "vec_pack_trunc_v2df"
1750 [(set (match_operand:V4SF 0 "register_operand")
1752 (float_truncate:V2SF
1753 (match_operand:V2DF 1 "register_operand"))
1754 (float_truncate:V2SF
1755 (match_operand:V2DF 2 "register_operand"))
1759 rtx tmp = gen_reg_rtx (V2SFmode);
1760 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1761 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1763 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1764 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1765 tmp, operands[hi]));
1770 (define_expand "vec_pack_trunc_df"
1771 [(set (match_operand:V2SF 0 "register_operand")
1774 (match_operand:DF 1 "register_operand"))
1776 (match_operand:DF 2 "register_operand"))
1780 rtx tmp = gen_reg_rtx (V2SFmode);
1781 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1782 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1784 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1785 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1786 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1791 (define_insn "aarch64_vmls<mode>"
1792 [(set (match_operand:VDQF 0 "register_operand" "=w")
1793 (minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
1794 (mult:VDQF (match_operand:VDQF 2 "register_operand" "w")
1795 (match_operand:VDQF 3 "register_operand" "w"))))]
1797 "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1798 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1802 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1804 ;; a = (b < c) ? b : c;
1805 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1806 ;; either explicitly or indirectly via -ffast-math.
1808 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1809 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1810 ;; operand will be returned when both operands are zero (i.e. they may not
1811 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
1812 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
1815 (define_insn "<su><maxmin><mode>3"
1816 [(set (match_operand:VDQF 0 "register_operand" "=w")
1817 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
1818 (match_operand:VDQF 2 "register_operand" "w")))]
1820 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1821 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1824 (define_insn "<maxmin_uns><mode>3"
1825 [(set (match_operand:VDQF 0 "register_operand" "=w")
1826 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1827 (match_operand:VDQF 2 "register_operand" "w")]
1830 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1831 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1834 ;; 'across lanes' add.
1836 (define_expand "reduc_plus_scal_<mode>"
1837 [(match_operand:<VEL> 0 "register_operand" "=w")
1838 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
1842 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1843 rtx scratch = gen_reg_rtx (<MODE>mode);
1844 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1845 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1850 (define_expand "reduc_plus_scal_<mode>"
1851 [(match_operand:<VEL> 0 "register_operand" "=w")
1852 (match_operand:V2F 1 "register_operand" "w")]
1855 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1856 rtx scratch = gen_reg_rtx (<MODE>mode);
1857 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1858 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1863 (define_insn "aarch64_reduc_plus_internal<mode>"
1864 [(set (match_operand:VDQV 0 "register_operand" "=w")
1865 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
1868 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
1869 [(set_attr "type" "neon_reduc_add<q>")]
1872 (define_insn "aarch64_reduc_plus_internalv2si"
1873 [(set (match_operand:V2SI 0 "register_operand" "=w")
1874 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1877 "addp\\t%0.2s, %1.2s, %1.2s"
1878 [(set_attr "type" "neon_reduc_add")]
1881 (define_insn "aarch64_reduc_plus_internal<mode>"
1882 [(set (match_operand:V2F 0 "register_operand" "=w")
1883 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
1886 "faddp\\t%<Vetype>0, %1.<Vtype>"
1887 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
1890 (define_insn "aarch64_addpv4sf"
1891 [(set (match_operand:V4SF 0 "register_operand" "=w")
1892 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
1895 "faddp\\t%0.4s, %1.4s, %1.4s"
1896 [(set_attr "type" "neon_fp_reduc_add_s_q")]
1899 (define_expand "reduc_plus_scal_v4sf"
1900 [(set (match_operand:SF 0 "register_operand")
1901 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
1905 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
1906 rtx scratch = gen_reg_rtx (V4SFmode);
1907 emit_insn (gen_aarch64_addpv4sf (scratch, operands[1]));
1908 emit_insn (gen_aarch64_addpv4sf (scratch, scratch));
1909 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
1913 (define_insn "clrsb<mode>2"
1914 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1915 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1917 "cls\\t%0.<Vtype>, %1.<Vtype>"
1918 [(set_attr "type" "neon_cls<q>")]
1921 (define_insn "clz<mode>2"
1922 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1923 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1925 "clz\\t%0.<Vtype>, %1.<Vtype>"
1926 [(set_attr "type" "neon_cls<q>")]
1929 (define_insn "popcount<mode>2"
1930 [(set (match_operand:VB 0 "register_operand" "=w")
1931 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
1933 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
1934 [(set_attr "type" "neon_cnt<q>")]
1937 ;; 'across lanes' max and min ops.
1939 ;; Template for outputting a scalar, so we can create __builtins which can be
1940 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
1941 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1942 [(match_operand:<VEL> 0 "register_operand")
1943 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1947 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1948 rtx scratch = gen_reg_rtx (<MODE>mode);
1949 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
1951 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1956 ;; Likewise for integer cases, signed and unsigned.
1957 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1958 [(match_operand:<VEL> 0 "register_operand")
1959 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
1963 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1964 rtx scratch = gen_reg_rtx (<MODE>mode);
1965 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
1967 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1972 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
1973 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
1974 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
1977 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
1978 [(set_attr "type" "neon_reduc_minmax<q>")]
1981 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
1982 [(set (match_operand:V2SI 0 "register_operand" "=w")
1983 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1986 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
1987 [(set_attr "type" "neon_reduc_minmax")]
1990 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
1991 [(set (match_operand:VDQF 0 "register_operand" "=w")
1992 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1995 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
1996 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
1999 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2001 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2004 ;; Thus our BSL is of the form:
2005 ;; op0 = bsl (mask, op2, op3)
2006 ;; We can use any of:
2009 ;; bsl mask, op1, op2
2010 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2011 ;; bit op0, op2, mask
2012 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2013 ;; bif op0, op1, mask
2015 (define_insn "aarch64_simd_bsl<mode>_internal"
2016 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2020 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2021 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2022 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2023 (match_dup:<V_cmp_result> 3)
2027 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2028 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2029 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2030 [(set_attr "type" "neon_bsl<q>")]
2033 (define_expand "aarch64_simd_bsl<mode>"
2034 [(match_operand:VALLDIF 0 "register_operand")
2035 (match_operand:<V_cmp_result> 1 "register_operand")
2036 (match_operand:VALLDIF 2 "register_operand")
2037 (match_operand:VALLDIF 3 "register_operand")]
2040 /* We can't alias operands together if they have different modes. */
2041 rtx tmp = operands[0];
2042 if (FLOAT_MODE_P (<MODE>mode))
2044 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2045 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2046 tmp = gen_reg_rtx (<V_cmp_result>mode);
2048 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2049 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2053 if (tmp != operands[0])
2054 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2059 (define_expand "aarch64_vcond_internal<mode><mode>"
2060 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2061 (if_then_else:VSDQ_I_DI
2062 (match_operator 3 "comparison_operator"
2063 [(match_operand:VSDQ_I_DI 4 "register_operand")
2064 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2065 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2066 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2069 rtx op1 = operands[1];
2070 rtx op2 = operands[2];
2071 rtx mask = gen_reg_rtx (<MODE>mode);
2072 enum rtx_code code = GET_CODE (operands[3]);
2074 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2075 and desirable for other comparisons if it results in FOO ? -1 : 0
2076 (this allows direct use of the comparison result without a bsl). */
2079 && op1 == CONST0_RTX (<V_cmp_result>mode)
2080 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2086 case LE: code = GT; break;
2087 case LT: code = GE; break;
2088 case GE: code = LT; break;
2089 case GT: code = LE; break;
2091 case NE: code = EQ; break;
2092 case LTU: code = GEU; break;
2093 case LEU: code = GTU; break;
2094 case GTU: code = LEU; break;
2095 case GEU: code = LTU; break;
2096 default: gcc_unreachable ();
2100 /* Make sure we can handle the last operand. */
2104 /* Normalized to EQ above. */
2112 /* These instructions have a form taking an immediate zero. */
2113 if (operands[5] == CONST0_RTX (<MODE>mode))
2115 /* Fall through, as may need to load into register. */
2117 if (!REG_P (operands[5]))
2118 operands[5] = force_reg (<MODE>mode, operands[5]);
2125 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2129 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2133 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2137 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2141 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2145 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2149 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2153 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2156 /* NE has been normalized to EQ above. */
2158 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2165 /* If we have (a = (b CMP c) ? -1 : 0);
2166 Then we can simply move the generated mask. */
2168 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2169 && op2 == CONST0_RTX (<V_cmp_result>mode))
2170 emit_move_insn (operands[0], mask);
2174 op1 = force_reg (<MODE>mode, op1);
2176 op2 = force_reg (<MODE>mode, op2);
2177 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2184 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2185 [(set (match_operand:VDQF_COND 0 "register_operand")
2187 (match_operator 3 "comparison_operator"
2188 [(match_operand:VDQF 4 "register_operand")
2189 (match_operand:VDQF 5 "nonmemory_operand")])
2190 (match_operand:VDQF_COND 1 "nonmemory_operand")
2191 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2195 int use_zero_form = 0;
2196 int swap_bsl_operands = 0;
2197 rtx op1 = operands[1];
2198 rtx op2 = operands[2];
2199 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2200 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2202 rtx (*base_comparison) (rtx, rtx, rtx);
2203 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2205 switch (GET_CODE (operands[3]))
2212 if (operands[5] == CONST0_RTX (<MODE>mode))
2219 if (!REG_P (operands[5]))
2220 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2223 switch (GET_CODE (operands[3]))
2233 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2234 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2242 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2243 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2248 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2249 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2255 switch (GET_CODE (operands[3]))
2262 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2263 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2269 Note that there also exist direct comparison against 0 forms,
2270 so catch those as a special case. */
2274 switch (GET_CODE (operands[3]))
2277 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2280 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2283 /* Do nothing, other zero form cases already have the correct
2290 emit_insn (base_comparison (mask, operands[4], operands[5]));
2292 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2299 /* FCM returns false for lanes which are unordered, so if we use
2300 the inverse of the comparison we actually want to emit, then
2301 swap the operands to BSL, we will end up with the correct result.
2302 Note that a NE NaN and NaN NE b are true for all a, b.
2304 Our transformations are:
2309 a NE b -> !(a EQ b) */
2312 emit_insn (base_comparison (mask, operands[4], operands[5]));
2314 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2316 swap_bsl_operands = 1;
2319 /* We check (a > b || b > a). combining these comparisons give us
2320 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2321 will then give us (a == b || a UNORDERED b) as intended. */
2323 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2324 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2325 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2326 swap_bsl_operands = 1;
2329 /* Operands are ORDERED iff (a > b || b >= a).
2330 Swapping the operands to BSL will give the UNORDERED case. */
2331 swap_bsl_operands = 1;
2334 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2335 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2336 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2342 if (swap_bsl_operands)
2348 /* If we have (a = (b CMP c) ? -1 : 0);
2349 Then we can simply move the generated mask. */
2351 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2352 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2353 emit_move_insn (operands[0], mask);
2357 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2359 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2360 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2367 (define_expand "vcond<mode><mode>"
2368 [(set (match_operand:VALLDI 0 "register_operand")
2369 (if_then_else:VALLDI
2370 (match_operator 3 "comparison_operator"
2371 [(match_operand:VALLDI 4 "register_operand")
2372 (match_operand:VALLDI 5 "nonmemory_operand")])
2373 (match_operand:VALLDI 1 "nonmemory_operand")
2374 (match_operand:VALLDI 2 "nonmemory_operand")))]
2377 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2378 operands[2], operands[3],
2379 operands[4], operands[5]));
2383 (define_expand "vcond<v_cmp_result><mode>"
2384 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2385 (if_then_else:<V_cmp_result>
2386 (match_operator 3 "comparison_operator"
2387 [(match_operand:VDQF 4 "register_operand")
2388 (match_operand:VDQF 5 "nonmemory_operand")])
2389 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2390 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2393 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2394 operands[0], operands[1],
2395 operands[2], operands[3],
2396 operands[4], operands[5]));
2400 (define_expand "vcondu<mode><mode>"
2401 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2402 (if_then_else:VSDQ_I_DI
2403 (match_operator 3 "comparison_operator"
2404 [(match_operand:VSDQ_I_DI 4 "register_operand")
2405 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2406 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2407 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2410 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2411 operands[2], operands[3],
2412 operands[4], operands[5]));
2416 ;; Patterns for AArch64 SIMD Intrinsics.
2418 ;; Lane extraction with sign extension to general purpose register.
2419 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2420 [(set (match_operand:GPI 0 "register_operand" "=r")
2423 (match_operand:VDQQH 1 "register_operand" "w")
2424 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2427 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2428 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2430 [(set_attr "type" "neon_to_gp<q>")]
2433 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2434 [(set (match_operand:SI 0 "register_operand" "=r")
2437 (match_operand:VDQQH 1 "register_operand" "w")
2438 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2441 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2442 return "umov\\t%w0, %1.<Vetype>[%2]";
2444 [(set_attr "type" "neon_to_gp<q>")]
2447 ;; Lane extraction of a value, neither sign nor zero extension
2448 ;; is guaranteed so upper bits should be considered undefined.
2449 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2450 (define_insn "aarch64_get_lane<mode>"
2451 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2453 (match_operand:VALL 1 "register_operand" "w, w, w")
2454 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2457 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2458 switch (which_alternative)
2461 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2463 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2465 return "st1\\t{%1.<Vetype>}[%2], %0";
2470 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2473 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2476 (define_insn "*aarch64_combinez<mode>"
2477 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2479 (match_operand:VD_BHSI 1 "register_operand" "w")
2480 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz")))]
2481 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2482 "mov\\t%0.8b, %1.8b"
2483 [(set_attr "type" "neon_move<q>")]
2486 (define_insn "*aarch64_combinez_be<mode>"
2487 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2489 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz")
2490 (match_operand:VD_BHSI 1 "register_operand" "w")))]
2491 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2492 "mov\\t%0.8b, %1.8b"
2493 [(set_attr "type" "neon_move<q>")]
2496 (define_expand "aarch64_combine<mode>"
2497 [(match_operand:<VDBL> 0 "register_operand")
2498 (match_operand:VDC 1 "register_operand")
2499 (match_operand:VDC 2 "register_operand")]
2503 if (BYTES_BIG_ENDIAN)
2513 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2518 (define_insn_and_split "aarch64_combine_internal<mode>"
2519 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2520 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2521 (match_operand:VDC 2 "register_operand" "w")))]
2524 "&& reload_completed"
2527 if (BYTES_BIG_ENDIAN)
2528 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2530 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2533 [(set_attr "type" "multiple")]
2536 (define_expand "aarch64_simd_combine<mode>"
2537 [(match_operand:<VDBL> 0 "register_operand")
2538 (match_operand:VDC 1 "register_operand")
2539 (match_operand:VDC 2 "register_operand")]
2542 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2543 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2546 [(set_attr "type" "multiple")]
2549 ;; <su><addsub>l<q>.
2551 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2552 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2553 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2554 (match_operand:VQW 1 "register_operand" "w")
2555 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2556 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2557 (match_operand:VQW 2 "register_operand" "w")
2560 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2561 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2564 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2565 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2566 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2567 (match_operand:VQW 1 "register_operand" "w")
2568 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2569 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2570 (match_operand:VQW 2 "register_operand" "w")
2573 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2574 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2578 (define_expand "aarch64_saddl2<mode>"
2579 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2580 (match_operand:VQW 1 "register_operand" "w")
2581 (match_operand:VQW 2 "register_operand" "w")]
2584 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2585 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2590 (define_expand "aarch64_uaddl2<mode>"
2591 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2592 (match_operand:VQW 1 "register_operand" "w")
2593 (match_operand:VQW 2 "register_operand" "w")]
2596 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2597 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2602 (define_expand "aarch64_ssubl2<mode>"
2603 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2604 (match_operand:VQW 1 "register_operand" "w")
2605 (match_operand:VQW 2 "register_operand" "w")]
2608 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2609 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2614 (define_expand "aarch64_usubl2<mode>"
2615 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2616 (match_operand:VQW 1 "register_operand" "w")
2617 (match_operand:VQW 2 "register_operand" "w")]
2620 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2621 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2626 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2627 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2628 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2629 (match_operand:VD_BHSI 1 "register_operand" "w"))
2631 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2633 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2634 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2637 ;; <su><addsub>w<q>.
2639 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2640 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2641 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2643 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2645 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2646 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2649 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2650 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2651 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2654 (match_operand:VQW 2 "register_operand" "w")
2655 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2657 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2658 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2661 (define_expand "aarch64_saddw2<mode>"
2662 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2663 (match_operand:<VWIDE> 1 "register_operand" "w")
2664 (match_operand:VQW 2 "register_operand" "w")]
2667 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2668 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2673 (define_expand "aarch64_uaddw2<mode>"
2674 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2675 (match_operand:<VWIDE> 1 "register_operand" "w")
2676 (match_operand:VQW 2 "register_operand" "w")]
2679 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2680 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2686 (define_expand "aarch64_ssubw2<mode>"
2687 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2688 (match_operand:<VWIDE> 1 "register_operand" "w")
2689 (match_operand:VQW 2 "register_operand" "w")]
2692 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2693 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2698 (define_expand "aarch64_usubw2<mode>"
2699 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2700 (match_operand:<VWIDE> 1 "register_operand" "w")
2701 (match_operand:VQW 2 "register_operand" "w")]
2704 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2705 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2710 ;; <su><r>h<addsub>.
2712 (define_insn "aarch64_<sur>h<addsub><mode>"
2713 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2714 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
2715 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
2718 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2719 [(set_attr "type" "neon_<addsub>_halve<q>")]
2722 ;; <r><addsub>hn<q>.
2724 (define_insn "aarch64_<sur><addsub>hn<mode>"
2725 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2726 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
2727 (match_operand:VQN 2 "register_operand" "w")]
2730 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
2731 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2734 (define_insn "aarch64_<sur><addsub>hn2<mode>"
2735 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2736 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
2737 (match_operand:VQN 2 "register_operand" "w")
2738 (match_operand:VQN 3 "register_operand" "w")]
2741 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
2742 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2747 (define_insn "aarch64_pmul<mode>"
2748 [(set (match_operand:VB 0 "register_operand" "=w")
2749 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
2750 (match_operand:VB 2 "register_operand" "w")]
2753 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2754 [(set_attr "type" "neon_mul_<Vetype><q>")]
2759 (define_insn "aarch64_<su_optab><optab><mode>"
2760 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2761 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
2762 (match_operand:VSDQ_I 2 "register_operand" "w")))]
2764 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2765 [(set_attr "type" "neon_<optab><q>")]
2768 ;; suqadd and usqadd
2770 (define_insn "aarch64_<sur>qadd<mode>"
2771 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2772 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
2773 (match_operand:VSDQ_I 2 "register_operand" "w")]
2776 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
2777 [(set_attr "type" "neon_qadd<q>")]
2782 (define_insn "aarch64_sqmovun<mode>"
2783 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2784 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2787 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2788 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2791 ;; sqmovn and uqmovn
2793 (define_insn "aarch64_<sur>qmovn<mode>"
2794 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2795 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2798 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2799 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2804 (define_insn "aarch64_s<optab><mode>"
2805 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2807 (match_operand:VSDQ_I 1 "register_operand" "w")))]
2809 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
2810 [(set_attr "type" "neon_<optab><q>")]
2815 (define_insn "aarch64_sq<r>dmulh<mode>"
2816 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
2818 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
2819 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
2822 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2823 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
2828 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2829 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2831 [(match_operand:VDQHS 1 "register_operand" "w")
2833 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2834 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2838 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
2839 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2840 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2843 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2844 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2846 [(match_operand:VDQHS 1 "register_operand" "w")
2848 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2849 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2853 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
2854 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2855 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2858 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2859 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2861 [(match_operand:SD_HSI 1 "register_operand" "w")
2863 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2864 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2868 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
2869 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2870 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2873 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2874 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2876 [(match_operand:SD_HSI 1 "register_operand" "w")
2878 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2879 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2883 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
2884 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2885 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2890 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
2891 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2893 (match_operand:<VWIDE> 1 "register_operand" "0")
2896 (sign_extend:<VWIDE>
2897 (match_operand:VSD_HSI 2 "register_operand" "w"))
2898 (sign_extend:<VWIDE>
2899 (match_operand:VSD_HSI 3 "register_operand" "w")))
2902 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
2903 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
2908 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
2909 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2911 (match_operand:<VWIDE> 1 "register_operand" "0")
2914 (sign_extend:<VWIDE>
2915 (match_operand:VD_HSI 2 "register_operand" "w"))
2916 (sign_extend:<VWIDE>
2917 (vec_duplicate:VD_HSI
2919 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2920 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2925 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
2927 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2929 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2932 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
2933 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2935 (match_operand:<VWIDE> 1 "register_operand" "0")
2938 (sign_extend:<VWIDE>
2939 (match_operand:VD_HSI 2 "register_operand" "w"))
2940 (sign_extend:<VWIDE>
2941 (vec_duplicate:VD_HSI
2943 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2944 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2949 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
2951 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2953 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2956 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
2957 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2959 (match_operand:<VWIDE> 1 "register_operand" "0")
2962 (sign_extend:<VWIDE>
2963 (match_operand:SD_HSI 2 "register_operand" "w"))
2964 (sign_extend:<VWIDE>
2966 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2967 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2972 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
2974 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2976 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2979 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
2980 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2982 (match_operand:<VWIDE> 1 "register_operand" "0")
2985 (sign_extend:<VWIDE>
2986 (match_operand:SD_HSI 2 "register_operand" "w"))
2987 (sign_extend:<VWIDE>
2989 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2990 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2995 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
2997 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2999 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3004 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3005 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3007 (match_operand:<VWIDE> 1 "register_operand" "0")
3010 (sign_extend:<VWIDE>
3011 (match_operand:VD_HSI 2 "register_operand" "w"))
3012 (sign_extend:<VWIDE>
3013 (vec_duplicate:VD_HSI
3014 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3017 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3018 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3023 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3024 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3026 (match_operand:<VWIDE> 1 "register_operand" "0")
3029 (sign_extend:<VWIDE>
3031 (match_operand:VQ_HSI 2 "register_operand" "w")
3032 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3033 (sign_extend:<VWIDE>
3035 (match_operand:VQ_HSI 3 "register_operand" "w")
3039 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3040 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3043 (define_expand "aarch64_sqdmlal2<mode>"
3044 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3045 (match_operand:<VWIDE> 1 "register_operand" "w")
3046 (match_operand:VQ_HSI 2 "register_operand" "w")
3047 (match_operand:VQ_HSI 3 "register_operand" "w")]
3050 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3051 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3052 operands[2], operands[3], p));
3056 (define_expand "aarch64_sqdmlsl2<mode>"
3057 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3058 (match_operand:<VWIDE> 1 "register_operand" "w")
3059 (match_operand:VQ_HSI 2 "register_operand" "w")
3060 (match_operand:VQ_HSI 3 "register_operand" "w")]
3063 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3064 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3065 operands[2], operands[3], p));
3071 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3072 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3074 (match_operand:<VWIDE> 1 "register_operand" "0")
3077 (sign_extend:<VWIDE>
3079 (match_operand:VQ_HSI 2 "register_operand" "w")
3080 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3081 (sign_extend:<VWIDE>
3082 (vec_duplicate:<VHALF>
3084 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3085 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3090 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3092 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3094 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3097 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3098 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3100 (match_operand:<VWIDE> 1 "register_operand" "0")
3103 (sign_extend:<VWIDE>
3105 (match_operand:VQ_HSI 2 "register_operand" "w")
3106 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3107 (sign_extend:<VWIDE>
3108 (vec_duplicate:<VHALF>
3110 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3111 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3116 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3118 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3120 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3123 (define_expand "aarch64_sqdmlal2_lane<mode>"
3124 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3125 (match_operand:<VWIDE> 1 "register_operand" "w")
3126 (match_operand:VQ_HSI 2 "register_operand" "w")
3127 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3128 (match_operand:SI 4 "immediate_operand" "i")]
3131 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3132 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3133 operands[2], operands[3],
3138 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3139 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3140 (match_operand:<VWIDE> 1 "register_operand" "w")
3141 (match_operand:VQ_HSI 2 "register_operand" "w")
3142 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3143 (match_operand:SI 4 "immediate_operand" "i")]
3146 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3147 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3148 operands[2], operands[3],
3153 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3154 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3155 (match_operand:<VWIDE> 1 "register_operand" "w")
3156 (match_operand:VQ_HSI 2 "register_operand" "w")
3157 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3158 (match_operand:SI 4 "immediate_operand" "i")]
3161 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3162 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3163 operands[2], operands[3],
3168 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3169 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3170 (match_operand:<VWIDE> 1 "register_operand" "w")
3171 (match_operand:VQ_HSI 2 "register_operand" "w")
3172 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3173 (match_operand:SI 4 "immediate_operand" "i")]
3176 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3177 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3178 operands[2], operands[3],
3183 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3184 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3186 (match_operand:<VWIDE> 1 "register_operand" "0")
3189 (sign_extend:<VWIDE>
3191 (match_operand:VQ_HSI 2 "register_operand" "w")
3192 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3193 (sign_extend:<VWIDE>
3194 (vec_duplicate:<VHALF>
3195 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3198 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3199 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3202 (define_expand "aarch64_sqdmlal2_n<mode>"
3203 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3204 (match_operand:<VWIDE> 1 "register_operand" "w")
3205 (match_operand:VQ_HSI 2 "register_operand" "w")
3206 (match_operand:<VEL> 3 "register_operand" "w")]
3209 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3210 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3211 operands[2], operands[3],
3216 (define_expand "aarch64_sqdmlsl2_n<mode>"
3217 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3218 (match_operand:<VWIDE> 1 "register_operand" "w")
3219 (match_operand:VQ_HSI 2 "register_operand" "w")
3220 (match_operand:<VEL> 3 "register_operand" "w")]
3223 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3224 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3225 operands[2], operands[3],
3232 (define_insn "aarch64_sqdmull<mode>"
3233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3236 (sign_extend:<VWIDE>
3237 (match_operand:VSD_HSI 1 "register_operand" "w"))
3238 (sign_extend:<VWIDE>
3239 (match_operand:VSD_HSI 2 "register_operand" "w")))
3242 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3243 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3248 (define_insn "aarch64_sqdmull_lane<mode>"
3249 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3252 (sign_extend:<VWIDE>
3253 (match_operand:VD_HSI 1 "register_operand" "w"))
3254 (sign_extend:<VWIDE>
3255 (vec_duplicate:VD_HSI
3257 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3258 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3263 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3264 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3266 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3269 (define_insn "aarch64_sqdmull_laneq<mode>"
3270 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3273 (sign_extend:<VWIDE>
3274 (match_operand:VD_HSI 1 "register_operand" "w"))
3275 (sign_extend:<VWIDE>
3276 (vec_duplicate:VD_HSI
3278 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3279 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3284 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3285 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3287 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3290 (define_insn "aarch64_sqdmull_lane<mode>"
3291 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3294 (sign_extend:<VWIDE>
3295 (match_operand:SD_HSI 1 "register_operand" "w"))
3296 (sign_extend:<VWIDE>
3298 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3299 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3304 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3305 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3307 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3310 (define_insn "aarch64_sqdmull_laneq<mode>"
3311 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3314 (sign_extend:<VWIDE>
3315 (match_operand:SD_HSI 1 "register_operand" "w"))
3316 (sign_extend:<VWIDE>
3318 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3319 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3324 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3325 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3327 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3332 (define_insn "aarch64_sqdmull_n<mode>"
3333 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3336 (sign_extend:<VWIDE>
3337 (match_operand:VD_HSI 1 "register_operand" "w"))
3338 (sign_extend:<VWIDE>
3339 (vec_duplicate:VD_HSI
3340 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3344 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3345 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3352 (define_insn "aarch64_sqdmull2<mode>_internal"
3353 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3356 (sign_extend:<VWIDE>
3358 (match_operand:VQ_HSI 1 "register_operand" "w")
3359 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3360 (sign_extend:<VWIDE>
3362 (match_operand:VQ_HSI 2 "register_operand" "w")
3367 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3368 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3371 (define_expand "aarch64_sqdmull2<mode>"
3372 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3373 (match_operand:VQ_HSI 1 "register_operand" "w")
3374 (match_operand:VQ_HSI 2 "register_operand" "w")]
3377 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3378 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3385 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3386 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3389 (sign_extend:<VWIDE>
3391 (match_operand:VQ_HSI 1 "register_operand" "w")
3392 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3393 (sign_extend:<VWIDE>
3394 (vec_duplicate:<VHALF>
3396 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3397 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3402 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3403 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3405 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3408 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3409 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3412 (sign_extend:<VWIDE>
3414 (match_operand:VQ_HSI 1 "register_operand" "w")
3415 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3416 (sign_extend:<VWIDE>
3417 (vec_duplicate:<VHALF>
3419 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3420 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3425 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3426 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3428 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3431 (define_expand "aarch64_sqdmull2_lane<mode>"
3432 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3433 (match_operand:VQ_HSI 1 "register_operand" "w")
3434 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3435 (match_operand:SI 3 "immediate_operand" "i")]
3438 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3439 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3440 operands[2], operands[3],
3445 (define_expand "aarch64_sqdmull2_laneq<mode>"
3446 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3447 (match_operand:VQ_HSI 1 "register_operand" "w")
3448 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3449 (match_operand:SI 3 "immediate_operand" "i")]
3452 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3453 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3454 operands[2], operands[3],
3461 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3462 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3465 (sign_extend:<VWIDE>
3467 (match_operand:VQ_HSI 1 "register_operand" "w")
3468 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3469 (sign_extend:<VWIDE>
3470 (vec_duplicate:<VHALF>
3471 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3475 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3476 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3479 (define_expand "aarch64_sqdmull2_n<mode>"
3480 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3481 (match_operand:VQ_HSI 1 "register_operand" "w")
3482 (match_operand:<VEL> 2 "register_operand" "w")]
3485 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3486 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3493 (define_insn "aarch64_<sur>shl<mode>"
3494 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3496 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3497 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3500 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3501 [(set_attr "type" "neon_shift_reg<q>")]
3507 (define_insn "aarch64_<sur>q<r>shl<mode>"
3508 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3510 [(match_operand:VSDQ_I 1 "register_operand" "w")
3511 (match_operand:VSDQ_I 2 "register_operand" "w")]
3514 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3515 [(set_attr "type" "neon_sat_shift_reg<q>")]
3520 (define_insn "aarch64_<sur>shll_n<mode>"
3521 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3522 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
3524 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
3528 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3529 if (INTVAL (operands[2]) == bit_width)
3531 return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3534 return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3536 [(set_attr "type" "neon_shift_imm_long")]
3541 (define_insn "aarch64_<sur>shll2_n<mode>"
3542 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3543 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
3544 (match_operand:SI 2 "immediate_operand" "i")]
3548 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3549 if (INTVAL (operands[2]) == bit_width)
3551 return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3554 return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3556 [(set_attr "type" "neon_shift_imm_long")]
3561 (define_insn "aarch64_<sur>shr_n<mode>"
3562 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3563 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3565 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3568 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3569 [(set_attr "type" "neon_sat_shift_imm<q>")]
3574 (define_insn "aarch64_<sur>sra_n<mode>"
3575 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3576 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3577 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3579 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3582 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3583 [(set_attr "type" "neon_shift_acc<q>")]
3588 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
3589 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3590 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3591 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3593 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
3596 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3597 [(set_attr "type" "neon_shift_imm<q>")]
3602 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
3603 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3604 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
3606 "aarch64_simd_shift_imm_<ve_mode>" "i")]
3609 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3610 [(set_attr "type" "neon_sat_shift_imm<q>")]
3616 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
3617 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3618 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
3620 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3623 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
3624 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3628 ;; cm(eq|ge|gt|lt|le)
3629 ;; Note, we have constraints for Dz and Z as different expanders
3630 ;; have different ideas of what should be passed to this pattern.
3632 (define_insn "aarch64_cm<optab><mode>"
3633 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3635 (COMPARISONS:<V_cmp_result>
3636 (match_operand:VDQ_I 1 "register_operand" "w,w")
3637 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3641 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3642 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
3643 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
3646 (define_insn_and_split "aarch64_cm<optab>di"
3647 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
3650 (match_operand:DI 1 "register_operand" "w,w,r")
3651 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
3653 (clobber (reg:CC CC_REGNUM))]
3657 [(set (match_operand:DI 0 "register_operand")
3660 (match_operand:DI 1 "register_operand")
3661 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
3664 /* If we are in the general purpose register file,
3665 we split to a sequence of comparison and store. */
3666 if (GP_REGNUM_P (REGNO (operands[0]))
3667 && GP_REGNUM_P (REGNO (operands[1])))
3669 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
3670 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3671 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3672 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3675 /* Otherwise, we expand to a similar pattern which does not
3676 clobber CC_REGNUM. */
3678 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
3681 (define_insn "*aarch64_cm<optab>di"
3682 [(set (match_operand:DI 0 "register_operand" "=w,w")
3685 (match_operand:DI 1 "register_operand" "w,w")
3686 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3688 "TARGET_SIMD && reload_completed"
3690 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
3691 cm<optab>\t%d0, %d1, #0"
3692 [(set_attr "type" "neon_compare, neon_compare_zero")]
3697 (define_insn "aarch64_cm<optab><mode>"
3698 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3700 (UCOMPARISONS:<V_cmp_result>
3701 (match_operand:VDQ_I 1 "register_operand" "w")
3702 (match_operand:VDQ_I 2 "register_operand" "w")
3705 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3706 [(set_attr "type" "neon_compare<q>")]
3709 (define_insn_and_split "aarch64_cm<optab>di"
3710 [(set (match_operand:DI 0 "register_operand" "=w,r")
3713 (match_operand:DI 1 "register_operand" "w,r")
3714 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
3716 (clobber (reg:CC CC_REGNUM))]
3720 [(set (match_operand:DI 0 "register_operand")
3723 (match_operand:DI 1 "register_operand")
3724 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
3727 /* If we are in the general purpose register file,
3728 we split to a sequence of comparison and store. */
3729 if (GP_REGNUM_P (REGNO (operands[0]))
3730 && GP_REGNUM_P (REGNO (operands[1])))
3732 machine_mode mode = CCmode;
3733 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3734 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3735 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3738 /* Otherwise, we expand to a similar pattern which does not
3739 clobber CC_REGNUM. */
3741 [(set_attr "type" "neon_compare,multiple")]
3744 (define_insn "*aarch64_cm<optab>di"
3745 [(set (match_operand:DI 0 "register_operand" "=w")
3748 (match_operand:DI 1 "register_operand" "w")
3749 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
3751 "TARGET_SIMD && reload_completed"
3752 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
3753 [(set_attr "type" "neon_compare")]
3758 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
3759 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
3760 ;; not (neg (eq (and x y) 0))
3761 ;; which is rewritten by simplify_rtx as
3762 ;; plus (eq (and x y) 0) -1.
3764 (define_insn "aarch64_cmtst<mode>"
3765 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3766 (plus:<V_cmp_result>
3769 (match_operand:VDQ_I 1 "register_operand" "w")
3770 (match_operand:VDQ_I 2 "register_operand" "w"))
3771 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
3772 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
3775 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3776 [(set_attr "type" "neon_tst<q>")]
3779 (define_insn_and_split "aarch64_cmtstdi"
3780 [(set (match_operand:DI 0 "register_operand" "=w,r")
3784 (match_operand:DI 1 "register_operand" "w,r")
3785 (match_operand:DI 2 "register_operand" "w,r"))
3787 (clobber (reg:CC CC_REGNUM))]
3791 [(set (match_operand:DI 0 "register_operand")
3795 (match_operand:DI 1 "register_operand")
3796 (match_operand:DI 2 "register_operand"))
3799 /* If we are in the general purpose register file,
3800 we split to a sequence of comparison and store. */
3801 if (GP_REGNUM_P (REGNO (operands[0]))
3802 && GP_REGNUM_P (REGNO (operands[1])))
3804 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
3805 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
3806 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
3807 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
3808 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3811 /* Otherwise, we expand to a similar pattern which does not
3812 clobber CC_REGNUM. */
3814 [(set_attr "type" "neon_tst,multiple")]
3817 (define_insn "*aarch64_cmtstdi"
3818 [(set (match_operand:DI 0 "register_operand" "=w")
3822 (match_operand:DI 1 "register_operand" "w")
3823 (match_operand:DI 2 "register_operand" "w"))
3826 "cmtst\t%d0, %d1, %d2"
3827 [(set_attr "type" "neon_tst")]
3830 ;; fcm(eq|ge|gt|le|lt)
3832 (define_insn "aarch64_cm<optab><mode>"
3833 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3835 (COMPARISONS:<V_cmp_result>
3836 (match_operand:VALLF 1 "register_operand" "w,w")
3837 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
3841 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3842 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
3843 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3847 ;; Note we can also handle what would be fac(le|lt) by
3848 ;; generating fac(ge|gt).
3850 (define_insn "*aarch64_fac<optab><mode>"
3851 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3853 (FAC_COMPARISONS:<V_cmp_result>
3854 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
3855 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
3858 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3859 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3864 (define_insn "aarch64_addp<mode>"
3865 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
3867 [(match_operand:VD_BHSI 1 "register_operand" "w")
3868 (match_operand:VD_BHSI 2 "register_operand" "w")]
3871 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3872 [(set_attr "type" "neon_reduc_add<q>")]
3875 (define_insn "aarch64_addpdi"
3876 [(set (match_operand:DI 0 "register_operand" "=w")
3878 [(match_operand:V2DI 1 "register_operand" "w")]
3882 [(set_attr "type" "neon_reduc_add")]
3887 (define_insn "sqrt<mode>2"
3888 [(set (match_operand:VDQF 0 "register_operand" "=w")
3889 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
3891 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
3892 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
3895 ;; Patterns for vector struct loads and stores.
3897 (define_insn "aarch64_simd_ld2<mode>"
3898 [(set (match_operand:OI 0 "register_operand" "=w")
3899 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
3900 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3903 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3904 [(set_attr "type" "neon_load2_2reg<q>")]
3907 (define_insn "aarch64_simd_ld2r<mode>"
3908 [(set (match_operand:OI 0 "register_operand" "=w")
3909 (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3910 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3913 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3914 [(set_attr "type" "neon_load2_all_lanes<q>")]
3917 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
3918 [(set (match_operand:OI 0 "register_operand" "=w")
3919 (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3920 (match_operand:OI 2 "register_operand" "0")
3921 (match_operand:SI 3 "immediate_operand" "i")
3922 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3925 "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"
3926 [(set_attr "type" "neon_load2_one_lane")]
3929 (define_expand "vec_load_lanesoi<mode>"
3930 [(set (match_operand:OI 0 "register_operand" "=w")
3931 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
3932 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3936 if (BYTES_BIG_ENDIAN)
3938 rtx tmp = gen_reg_rtx (OImode);
3939 rtx mask = aarch64_reverse_mask (<MODE>mode);
3940 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
3941 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
3944 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
3948 (define_insn "aarch64_simd_st2<mode>"
3949 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3950 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
3951 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3954 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
3955 [(set_attr "type" "neon_store2_2reg<q>")]
3958 ;; RTL uses GCC vector extension indices, so flip only for assembly.
3959 (define_insn "vec_store_lanesoi_lane<mode>"
3960 [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
3961 (unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
3962 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
3963 (match_operand:SI 2 "immediate_operand" "i")]
3967 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
3968 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
3970 [(set_attr "type" "neon_store3_one_lane<q>")]
3973 (define_expand "vec_store_lanesoi<mode>"
3974 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3975 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
3976 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3980 if (BYTES_BIG_ENDIAN)
3982 rtx tmp = gen_reg_rtx (OImode);
3983 rtx mask = aarch64_reverse_mask (<MODE>mode);
3984 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
3985 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
3988 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
3992 (define_insn "aarch64_simd_ld3<mode>"
3993 [(set (match_operand:CI 0 "register_operand" "=w")
3994 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
3995 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3998 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3999 [(set_attr "type" "neon_load3_3reg<q>")]
4002 (define_insn "aarch64_simd_ld3r<mode>"
4003 [(set (match_operand:CI 0 "register_operand" "=w")
4004 (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
4005 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4008 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4009 [(set_attr "type" "neon_load3_all_lanes<q>")]
4012 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4013 [(set (match_operand:CI 0 "register_operand" "=w")
4014 (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
4015 (match_operand:CI 2 "register_operand" "0")
4016 (match_operand:SI 3 "immediate_operand" "i")
4017 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4020 "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"
4021 [(set_attr "type" "neon_load3_one_lane")]
4024 (define_expand "vec_load_lanesci<mode>"
4025 [(set (match_operand:CI 0 "register_operand" "=w")
4026 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4027 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4031 if (BYTES_BIG_ENDIAN)
4033 rtx tmp = gen_reg_rtx (CImode);
4034 rtx mask = aarch64_reverse_mask (<MODE>mode);
4035 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4036 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4039 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4043 (define_insn "aarch64_simd_st3<mode>"
4044 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4045 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4046 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4049 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4050 [(set_attr "type" "neon_store3_3reg<q>")]
4053 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4054 (define_insn "vec_store_lanesci_lane<mode>"
4055 [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
4056 (unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
4057 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4058 (match_operand:SI 2 "immediate_operand" "i")]
4062 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4063 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4065 [(set_attr "type" "neon_store3_one_lane<q>")]
4068 (define_expand "vec_store_lanesci<mode>"
4069 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4070 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4071 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4075 if (BYTES_BIG_ENDIAN)
4077 rtx tmp = gen_reg_rtx (CImode);
4078 rtx mask = aarch64_reverse_mask (<MODE>mode);
4079 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4080 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4083 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4087 (define_insn "aarch64_simd_ld4<mode>"
4088 [(set (match_operand:XI 0 "register_operand" "=w")
4089 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4090 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4093 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4094 [(set_attr "type" "neon_load4_4reg<q>")]
4097 (define_insn "aarch64_simd_ld4r<mode>"
4098 [(set (match_operand:XI 0 "register_operand" "=w")
4099 (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
4100 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4103 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4104 [(set_attr "type" "neon_load4_all_lanes<q>")]
4107 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4108 [(set (match_operand:XI 0 "register_operand" "=w")
4109 (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
4110 (match_operand:XI 2 "register_operand" "0")
4111 (match_operand:SI 3 "immediate_operand" "i")
4112 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4115 "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"
4116 [(set_attr "type" "neon_load4_one_lane")]
4119 (define_expand "vec_load_lanesxi<mode>"
4120 [(set (match_operand:XI 0 "register_operand" "=w")
4121 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4122 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4126 if (BYTES_BIG_ENDIAN)
4128 rtx tmp = gen_reg_rtx (XImode);
4129 rtx mask = aarch64_reverse_mask (<MODE>mode);
4130 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4131 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4134 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4138 (define_insn "aarch64_simd_st4<mode>"
4139 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4140 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4141 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4144 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4145 [(set_attr "type" "neon_store4_4reg<q>")]
4148 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4149 (define_insn "vec_store_lanesxi_lane<mode>"
4150 [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
4151 (unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
4152 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4153 (match_operand:SI 2 "immediate_operand" "i")]
4157 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4158 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4160 [(set_attr "type" "neon_store4_one_lane<q>")]
4163 (define_expand "vec_store_lanesxi<mode>"
4164 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4165 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4166 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4170 if (BYTES_BIG_ENDIAN)
4172 rtx tmp = gen_reg_rtx (XImode);
4173 rtx mask = aarch64_reverse_mask (<MODE>mode);
4174 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4175 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4178 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4182 (define_insn_and_split "aarch64_rev_reglist<mode>"
4183 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4185 [(match_operand:VSTRUCT 1 "register_operand" "w")
4186 (match_operand:V16QI 2 "register_operand" "w")]
4187 UNSPEC_REV_REGLIST))]
4190 "&& reload_completed"
4194 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4195 for (i = 0; i < nregs; i++)
4197 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4198 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4199 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4203 [(set_attr "type" "neon_tbl1_q")
4204 (set_attr "length" "<insn_count>")]
4207 ;; Reload patterns for AdvSIMD register list operands.
4209 (define_expand "mov<mode>"
4210 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4211 (match_operand:VSTRUCT 1 "general_operand" ""))]
4214 if (can_create_pseudo_p ())
4216 if (GET_CODE (operands[0]) != REG)
4217 operands[1] = force_reg (<MODE>mode, operands[1]);
4221 (define_insn "*aarch64_mov<mode>"
4222 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4223 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4224 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4225 && (register_operand (operands[0], <MODE>mode)
4226 || register_operand (operands[1], <MODE>mode))"
4229 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4230 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4231 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4232 neon_load<nregs>_<nregs>reg_q")
4233 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4236 (define_insn "aarch64_be_ld1<mode>"
4237 [(set (match_operand:VALLDI 0 "register_operand" "=w")
4238 (unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
4241 "ld1\\t{%0<Vmtype>}, %1"
4242 [(set_attr "type" "neon_load1_1reg<q>")]
4245 (define_insn "aarch64_be_st1<mode>"
4246 [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
4247 (unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
4250 "st1\\t{%1<Vmtype>}, %0"
4251 [(set_attr "type" "neon_store1_1reg<q>")]
4254 (define_insn "*aarch64_be_movoi"
4255 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4256 (match_operand:OI 1 "general_operand" " w,w,m"))]
4257 "TARGET_SIMD && BYTES_BIG_ENDIAN
4258 && (register_operand (operands[0], OImode)
4259 || register_operand (operands[1], OImode))"
4264 [(set_attr "type" "multiple,neon_store2_2reg_q,neon_load2_2reg_q")
4265 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4268 (define_insn "*aarch64_be_movci"
4269 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4270 (match_operand:CI 1 "general_operand" " w,w,o"))]
4271 "TARGET_SIMD && BYTES_BIG_ENDIAN
4272 && (register_operand (operands[0], CImode)
4273 || register_operand (operands[1], CImode))"
4275 [(set_attr "type" "multiple")
4276 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4279 (define_insn "*aarch64_be_movxi"
4280 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4281 (match_operand:XI 1 "general_operand" " w,w,o"))]
4282 "TARGET_SIMD && BYTES_BIG_ENDIAN
4283 && (register_operand (operands[0], XImode)
4284 || register_operand (operands[1], XImode))"
4286 [(set_attr "type" "multiple")
4287 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4291 [(set (match_operand:OI 0 "register_operand")
4292 (match_operand:OI 1 "register_operand"))]
4293 "TARGET_SIMD && reload_completed"
4296 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4301 [(set (match_operand:CI 0 "nonimmediate_operand")
4302 (match_operand:CI 1 "general_operand"))]
4303 "TARGET_SIMD && reload_completed"
4306 if (register_operand (operands[0], CImode)
4307 && register_operand (operands[1], CImode))
4309 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4312 else if (BYTES_BIG_ENDIAN)
4314 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4315 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4316 emit_move_insn (gen_lowpart (V16QImode,
4317 simplify_gen_subreg (TImode, operands[0],
4319 gen_lowpart (V16QImode,
4320 simplify_gen_subreg (TImode, operands[1],
4329 [(set (match_operand:XI 0 "nonimmediate_operand")
4330 (match_operand:XI 1 "general_operand"))]
4331 "TARGET_SIMD && reload_completed"
4334 if (register_operand (operands[0], XImode)
4335 && register_operand (operands[1], XImode))
4337 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4340 else if (BYTES_BIG_ENDIAN)
4342 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4343 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4344 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4345 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4352 (define_expand "aarch64_ld2r<mode>"
4353 [(match_operand:OI 0 "register_operand" "=w")
4354 (match_operand:DI 1 "register_operand" "w")
4355 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4358 machine_mode mode = <V_TWO_ELEM>mode;
4359 rtx mem = gen_rtx_MEM (mode, operands[1]);
4361 emit_insn (gen_aarch64_simd_ld2r<mode> (operands[0], mem));
4365 (define_expand "aarch64_ld3r<mode>"
4366 [(match_operand:CI 0 "register_operand" "=w")
4367 (match_operand:DI 1 "register_operand" "w")
4368 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4371 machine_mode mode = <V_THREE_ELEM>mode;
4372 rtx mem = gen_rtx_MEM (mode, operands[1]);
4374 emit_insn (gen_aarch64_simd_ld3r<mode> (operands[0], mem));
4378 (define_expand "aarch64_ld4r<mode>"
4379 [(match_operand:XI 0 "register_operand" "=w")
4380 (match_operand:DI 1 "register_operand" "w")
4381 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4384 machine_mode mode = <V_FOUR_ELEM>mode;
4385 rtx mem = gen_rtx_MEM (mode, operands[1]);
4387 emit_insn (gen_aarch64_simd_ld4r<mode> (operands[0],mem));
4391 (define_insn "aarch64_ld2<mode>_dreg"
4392 [(set (match_operand:OI 0 "register_operand" "=w")
4396 (unspec:VD [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
4398 (vec_duplicate:VD (const_int 0)))
4400 (unspec:VD [(match_dup 1)]
4402 (vec_duplicate:VD (const_int 0)))) 0))]
4404 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4405 [(set_attr "type" "neon_load2_2reg<q>")]
4408 (define_insn "aarch64_ld2<mode>_dreg"
4409 [(set (match_operand:OI 0 "register_operand" "=w")
4413 (unspec:DX [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
4417 (unspec:DX [(match_dup 1)]
4419 (const_int 0))) 0))]
4421 "ld1\\t{%S0.1d - %T0.1d}, %1"
4422 [(set_attr "type" "neon_load1_2reg<q>")]
4425 (define_insn "aarch64_ld3<mode>_dreg"
4426 [(set (match_operand:CI 0 "register_operand" "=w")
4431 (unspec:VD [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
4433 (vec_duplicate:VD (const_int 0)))
4435 (unspec:VD [(match_dup 1)]
4437 (vec_duplicate:VD (const_int 0))))
4439 (unspec:VD [(match_dup 1)]
4441 (vec_duplicate:VD (const_int 0)))) 0))]
4443 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4444 [(set_attr "type" "neon_load3_3reg<q>")]
4447 (define_insn "aarch64_ld3<mode>_dreg"
4448 [(set (match_operand:CI 0 "register_operand" "=w")
4453 (unspec:DX [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
4457 (unspec:DX [(match_dup 1)]
4461 (unspec:DX [(match_dup 1)]
4463 (const_int 0))) 0))]
4465 "ld1\\t{%S0.1d - %U0.1d}, %1"
4466 [(set_attr "type" "neon_load1_3reg<q>")]
4469 (define_insn "aarch64_ld4<mode>_dreg"
4470 [(set (match_operand:XI 0 "register_operand" "=w")
4475 (unspec:VD [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
4477 (vec_duplicate:VD (const_int 0)))
4479 (unspec:VD [(match_dup 1)]
4481 (vec_duplicate:VD (const_int 0))))
4484 (unspec:VD [(match_dup 1)]
4486 (vec_duplicate:VD (const_int 0)))
4488 (unspec:VD [(match_dup 1)]
4490 (vec_duplicate:VD (const_int 0))))) 0))]
4492 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4493 [(set_attr "type" "neon_load4_4reg<q>")]
4496 (define_insn "aarch64_ld4<mode>_dreg"
4497 [(set (match_operand:XI 0 "register_operand" "=w")
4502 (unspec:DX [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
4506 (unspec:DX [(match_dup 1)]
4511 (unspec:DX [(match_dup 1)]
4515 (unspec:DX [(match_dup 1)]
4517 (const_int 0)))) 0))]
4519 "ld1\\t{%S0.1d - %V0.1d}, %1"
4520 [(set_attr "type" "neon_load1_4reg<q>")]
4523 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
4524 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4525 (match_operand:DI 1 "register_operand" "r")
4526 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4529 machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
4530 rtx mem = gen_rtx_MEM (mode, operands[1]);
4532 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
4536 (define_expand "aarch64_ld1<VALL:mode>"
4537 [(match_operand:VALL 0 "register_operand")
4538 (match_operand:DI 1 "register_operand")]
4541 machine_mode mode = <VALL:MODE>mode;
4542 rtx mem = gen_rtx_MEM (mode, operands[1]);
4544 if (BYTES_BIG_ENDIAN)
4545 emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem));
4547 emit_move_insn (operands[0], mem);
4551 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
4552 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4553 (match_operand:DI 1 "register_operand" "r")
4554 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4557 machine_mode mode = <VSTRUCT:MODE>mode;
4558 rtx mem = gen_rtx_MEM (mode, operands[1]);
4560 emit_insn (gen_vec_load_lanes<VSTRUCT:mode><VQ:mode> (operands[0], mem));
4564 (define_expand "aarch64_ld2_lane<mode>"
4565 [(match_operand:OI 0 "register_operand" "=w")
4566 (match_operand:DI 1 "register_operand" "w")
4567 (match_operand:OI 2 "register_operand" "0")
4568 (match_operand:SI 3 "immediate_operand" "i")
4569 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4572 machine_mode mode = <V_TWO_ELEM>mode;
4573 rtx mem = gen_rtx_MEM (mode, operands[1]);
4575 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4577 emit_insn (gen_aarch64_vec_load_lanesoi_lane<mode> (operands[0],
4584 (define_expand "aarch64_ld3_lane<mode>"
4585 [(match_operand:CI 0 "register_operand" "=w")
4586 (match_operand:DI 1 "register_operand" "w")
4587 (match_operand:CI 2 "register_operand" "0")
4588 (match_operand:SI 3 "immediate_operand" "i")
4589 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4592 machine_mode mode = <V_THREE_ELEM>mode;
4593 rtx mem = gen_rtx_MEM (mode, operands[1]);
4595 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4597 emit_insn (gen_aarch64_vec_load_lanesci_lane<mode> (operands[0],
4604 (define_expand "aarch64_ld4_lane<mode>"
4605 [(match_operand:XI 0 "register_operand" "=w")
4606 (match_operand:DI 1 "register_operand" "w")
4607 (match_operand:XI 2 "register_operand" "0")
4608 (match_operand:SI 3 "immediate_operand" "i")
4609 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4612 machine_mode mode = <V_FOUR_ELEM>mode;
4613 rtx mem = gen_rtx_MEM (mode, operands[1]);
4615 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4617 emit_insn (gen_aarch64_vec_load_lanesxi_lane<mode> (operands[0],
4626 ;; Expanders for builtins to extract vector registers from large
4627 ;; opaque integer modes.
4631 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
4632 [(match_operand:VDC 0 "register_operand" "=w")
4633 (match_operand:VSTRUCT 1 "register_operand" "w")
4634 (match_operand:SI 2 "immediate_operand" "i")]
4637 int part = INTVAL (operands[2]);
4638 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
4639 int offset = part * 16;
4641 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
4642 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
4648 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
4649 [(match_operand:VQ 0 "register_operand" "=w")
4650 (match_operand:VSTRUCT 1 "register_operand" "w")
4651 (match_operand:SI 2 "immediate_operand" "i")]
4654 int part = INTVAL (operands[2]);
4655 int offset = part * 16;
4657 emit_move_insn (operands[0],
4658 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
4662 ;; Permuted-store expanders for neon intrinsics.
4664 ;; Permute instructions
4668 (define_expand "vec_perm_const<mode>"
4669 [(match_operand:VALL 0 "register_operand")
4670 (match_operand:VALL 1 "register_operand")
4671 (match_operand:VALL 2 "register_operand")
4672 (match_operand:<V_cmp_result> 3)]
4675 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
4676 operands[2], operands[3]))
4682 (define_expand "vec_perm<mode>"
4683 [(match_operand:VB 0 "register_operand")
4684 (match_operand:VB 1 "register_operand")
4685 (match_operand:VB 2 "register_operand")
4686 (match_operand:VB 3 "register_operand")]
4689 aarch64_expand_vec_perm (operands[0], operands[1],
4690 operands[2], operands[3]);
4694 (define_insn "aarch64_tbl1<mode>"
4695 [(set (match_operand:VB 0 "register_operand" "=w")
4696 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
4697 (match_operand:VB 2 "register_operand" "w")]
4700 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
4701 [(set_attr "type" "neon_tbl1<q>")]
4704 ;; Two source registers.
4706 (define_insn "aarch64_tbl2v16qi"
4707 [(set (match_operand:V16QI 0 "register_operand" "=w")
4708 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
4709 (match_operand:V16QI 2 "register_operand" "w")]
4712 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
4713 [(set_attr "type" "neon_tbl2_q")]
4716 (define_insn_and_split "aarch64_combinev16qi"
4717 [(set (match_operand:OI 0 "register_operand" "=w")
4718 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
4719 (match_operand:V16QI 2 "register_operand" "w")]
4723 "&& reload_completed"
4726 aarch64_split_combinev16qi (operands);
4729 [(set_attr "type" "multiple")]
4732 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
4733 [(set (match_operand:VALL 0 "register_operand" "=w")
4734 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
4735 (match_operand:VALL 2 "register_operand" "w")]
4738 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4739 [(set_attr "type" "neon_permute<q>")]
4742 ;; Note immediate (third) operand is lane index not byte index.
4743 (define_insn "aarch64_ext<mode>"
4744 [(set (match_operand:VALL 0 "register_operand" "=w")
4745 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
4746 (match_operand:VALL 2 "register_operand" "w")
4747 (match_operand:SI 3 "immediate_operand" "i")]
4751 operands[3] = GEN_INT (INTVAL (operands[3])
4752 * GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
4753 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
4755 [(set_attr "type" "neon_ext<q>")]
4758 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
4759 [(set (match_operand:VALL 0 "register_operand" "=w")
4760 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")]
4763 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
4764 [(set_attr "type" "neon_rev<q>")]
4767 (define_insn "aarch64_st2<mode>_dreg"
4768 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
4769 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
4770 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4773 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4774 [(set_attr "type" "neon_store2_2reg")]
4777 (define_insn "aarch64_st2<mode>_dreg"
4778 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
4779 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
4780 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4783 "st1\\t{%S1.1d - %T1.1d}, %0"
4784 [(set_attr "type" "neon_store1_2reg")]
4787 (define_insn "aarch64_st3<mode>_dreg"
4788 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
4789 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
4790 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4793 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4794 [(set_attr "type" "neon_store3_3reg")]
4797 (define_insn "aarch64_st3<mode>_dreg"
4798 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
4799 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
4800 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4803 "st1\\t{%S1.1d - %U1.1d}, %0"
4804 [(set_attr "type" "neon_store1_3reg")]
4807 (define_insn "aarch64_st4<mode>_dreg"
4808 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4809 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
4810 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4813 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4814 [(set_attr "type" "neon_store4_4reg")]
4817 (define_insn "aarch64_st4<mode>_dreg"
4818 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4819 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
4820 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4823 "st1\\t{%S1.1d - %V1.1d}, %0"
4824 [(set_attr "type" "neon_store1_4reg")]
4827 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
4828 [(match_operand:DI 0 "register_operand" "r")
4829 (match_operand:VSTRUCT 1 "register_operand" "w")
4830 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4833 machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
4834 rtx mem = gen_rtx_MEM (mode, operands[0]);
4836 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
4840 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
4841 [(match_operand:DI 0 "register_operand" "r")
4842 (match_operand:VSTRUCT 1 "register_operand" "w")
4843 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4846 machine_mode mode = <VSTRUCT:MODE>mode;
4847 rtx mem = gen_rtx_MEM (mode, operands[0]);
4849 emit_insn (gen_vec_store_lanes<VSTRUCT:mode><VQ:mode> (mem, operands[1]));
4853 (define_expand "aarch64_st2_lane<VQ:mode>"
4854 [(match_operand:DI 0 "register_operand" "r")
4855 (match_operand:OI 1 "register_operand" "w")
4856 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4857 (match_operand:SI 2 "immediate_operand")]
4860 machine_mode mode = <V_TWO_ELEM>mode;
4861 rtx mem = gen_rtx_MEM (mode, operands[0]);
4862 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4864 emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem,
4870 (define_expand "aarch64_st3_lane<VQ:mode>"
4871 [(match_operand:DI 0 "register_operand" "r")
4872 (match_operand:CI 1 "register_operand" "w")
4873 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4874 (match_operand:SI 2 "immediate_operand")]
4877 machine_mode mode = <V_THREE_ELEM>mode;
4878 rtx mem = gen_rtx_MEM (mode, operands[0]);
4879 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4881 emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem,
4887 (define_expand "aarch64_st4_lane<VQ:mode>"
4888 [(match_operand:DI 0 "register_operand" "r")
4889 (match_operand:XI 1 "register_operand" "w")
4890 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4891 (match_operand:SI 2 "immediate_operand")]
4894 machine_mode mode = <V_FOUR_ELEM>mode;
4895 rtx mem = gen_rtx_MEM (mode, operands[0]);
4896 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4898 emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem,
4904 (define_expand "aarch64_st1<VALL:mode>"
4905 [(match_operand:DI 0 "register_operand")
4906 (match_operand:VALL 1 "register_operand")]
4909 machine_mode mode = <VALL:MODE>mode;
4910 rtx mem = gen_rtx_MEM (mode, operands[0]);
4912 if (BYTES_BIG_ENDIAN)
4913 emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1]));
4915 emit_move_insn (mem, operands[1]);
4919 ;; Expander for builtins to insert vector registers into large
4920 ;; opaque integer modes.
4922 ;; Q-register list. We don't need a D-reg inserter as we zero
4923 ;; extend them in arm_neon.h and insert the resulting Q-regs.
4925 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
4926 [(match_operand:VSTRUCT 0 "register_operand" "+w")
4927 (match_operand:VSTRUCT 1 "register_operand" "0")
4928 (match_operand:VQ 2 "register_operand" "w")
4929 (match_operand:SI 3 "immediate_operand" "i")]
4932 int part = INTVAL (operands[3]);
4933 int offset = part * 16;
4935 emit_move_insn (operands[0], operands[1]);
4936 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
4941 ;; Standard pattern name vec_init<mode>.
4943 (define_expand "vec_init<mode>"
4944 [(match_operand:VALL 0 "register_operand" "")
4945 (match_operand 1 "" "")]
4948 aarch64_expand_vector_init (operands[0], operands[1]);
4952 (define_insn "*aarch64_simd_ld1r<mode>"
4953 [(set (match_operand:VALL 0 "register_operand" "=w")
4955 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
4957 "ld1r\\t{%0.<Vtype>}, %1"
4958 [(set_attr "type" "neon_load1_all_lanes")]
4961 (define_insn "aarch64_frecpe<mode>"
4962 [(set (match_operand:VDQF 0 "register_operand" "=w")
4963 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
4966 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
4967 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
4970 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
4971 [(set (match_operand:GPF 0 "register_operand" "=w")
4972 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
4975 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
4976 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
4979 (define_insn "aarch64_frecps<mode>"
4980 [(set (match_operand:VALLF 0 "register_operand" "=w")
4981 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
4982 (match_operand:VALLF 2 "register_operand" "w")]
4985 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4986 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
4989 (define_insn "aarch64_urecpe<mode>"
4990 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
4991 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
4994 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
4995 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
4997 ;; Standard pattern name vec_extract<mode>.
4999 (define_expand "vec_extract<mode>"
5000 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5001 (match_operand:VALL 1 "register_operand" "")
5002 (match_operand:SI 2 "immediate_operand" "")]
5006 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5012 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5013 [(set (match_operand:V16QI 0 "register_operand" "=w")
5014 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5015 (match_operand:V16QI 2 "register_operand" "w")]
5017 "TARGET_SIMD && TARGET_CRYPTO"
5018 "aes<aes_op>\\t%0.16b, %2.16b"
5019 [(set_attr "type" "crypto_aese")]
5022 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5023 [(set (match_operand:V16QI 0 "register_operand" "=w")
5024 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
5026 "TARGET_SIMD && TARGET_CRYPTO"
5027 "aes<aesmc_op>\\t%0.16b, %1.16b"
5028 [(set_attr "type" "crypto_aesmc")]
5033 (define_insn "aarch64_crypto_sha1hsi"
5034 [(set (match_operand:SI 0 "register_operand" "=w")
5035 (unspec:SI [(match_operand:SI 1
5036 "register_operand" "w")]
5038 "TARGET_SIMD && TARGET_CRYPTO"
5040 [(set_attr "type" "crypto_sha1_fast")]
5043 (define_insn "aarch64_crypto_sha1su1v4si"
5044 [(set (match_operand:V4SI 0 "register_operand" "=w")
5045 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5046 (match_operand:V4SI 2 "register_operand" "w")]
5048 "TARGET_SIMD && TARGET_CRYPTO"
5049 "sha1su1\\t%0.4s, %2.4s"
5050 [(set_attr "type" "crypto_sha1_fast")]
5053 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5054 [(set (match_operand:V4SI 0 "register_operand" "=w")
5055 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5056 (match_operand:SI 2 "register_operand" "w")
5057 (match_operand:V4SI 3 "register_operand" "w")]
5059 "TARGET_SIMD && TARGET_CRYPTO"
5060 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5061 [(set_attr "type" "crypto_sha1_slow")]
5064 (define_insn "aarch64_crypto_sha1su0v4si"
5065 [(set (match_operand:V4SI 0 "register_operand" "=w")
5066 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5067 (match_operand:V4SI 2 "register_operand" "w")
5068 (match_operand:V4SI 3 "register_operand" "w")]
5070 "TARGET_SIMD && TARGET_CRYPTO"
5071 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5072 [(set_attr "type" "crypto_sha1_xor")]
5077 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5078 [(set (match_operand:V4SI 0 "register_operand" "=w")
5079 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5080 (match_operand:V4SI 2 "register_operand" "w")
5081 (match_operand:V4SI 3 "register_operand" "w")]
5083 "TARGET_SIMD && TARGET_CRYPTO"
5084 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5085 [(set_attr "type" "crypto_sha256_slow")]
5088 (define_insn "aarch64_crypto_sha256su0v4si"
5089 [(set (match_operand:V4SI 0 "register_operand" "=w")
5090 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5091 (match_operand:V4SI 2 "register_operand" "w")]
5093 "TARGET_SIMD &&TARGET_CRYPTO"
5094 "sha256su0\\t%0.4s, %2.4s"
5095 [(set_attr "type" "crypto_sha256_fast")]
5098 (define_insn "aarch64_crypto_sha256su1v4si"
5099 [(set (match_operand:V4SI 0 "register_operand" "=w")
5100 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5101 (match_operand:V4SI 2 "register_operand" "w")
5102 (match_operand:V4SI 3 "register_operand" "w")]
5104 "TARGET_SIMD &&TARGET_CRYPTO"
5105 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5106 [(set_attr "type" "crypto_sha256_slow")]
5111 (define_insn "aarch64_crypto_pmulldi"
5112 [(set (match_operand:TI 0 "register_operand" "=w")
5113 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5114 (match_operand:DI 2 "register_operand" "w")]
5116 "TARGET_SIMD && TARGET_CRYPTO"
5117 "pmull\\t%0.1q, %1.1d, %2.1d"
5118 [(set_attr "type" "neon_mul_d_long")]
5121 (define_insn "aarch64_crypto_pmullv2di"
5122 [(set (match_operand:TI 0 "register_operand" "=w")
5123 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5124 (match_operand:V2DI 2 "register_operand" "w")]
5126 "TARGET_SIMD && TARGET_CRYPTO"
5127 "pmull2\\t%0.1q, %1.2d, %2.2d"
5128 [(set_attr "type" "neon_mul_d_long")]