1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
23 (match_operand:VALL 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF 0 "register_operand" "=w")
57 (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
59 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
60 [(set_attr "type" "neon_dup<q>")]
63 (define_insn "aarch64_dup_lane<mode>"
64 [(set (match_operand:VALL 0 "register_operand" "=w")
67 (match_operand:VALL 1 "register_operand" "w")
68 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
72 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
73 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
75 [(set_attr "type" "neon_dup<q>")]
78 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
79 [(set (match_operand:VALL 0 "register_operand" "=w")
82 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
83 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
87 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
88 INTVAL (operands[2])));
89 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
91 [(set_attr "type" "neon_dup<q>")]
94 (define_insn "*aarch64_simd_mov<mode>"
95 [(set (match_operand:VD 0 "nonimmediate_operand"
96 "=w, m, w, ?r, ?w, ?r, w")
97 (match_operand:VD 1 "general_operand"
98 "m, w, w, w, r, r, Dn"))]
100 && (register_operand (operands[0], <MODE>mode)
101 || register_operand (operands[1], <MODE>mode))"
103 switch (which_alternative)
105 case 0: return "ldr\\t%d0, %1";
106 case 1: return "str\\t%d1, %0";
107 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
108 case 3: return "umov\t%0, %1.d[0]";
109 case 4: return "ins\t%0.d[0], %1";
110 case 5: return "mov\t%0, %1";
112 return aarch64_output_simd_mov_immediate (operands[1],
114 default: gcc_unreachable ();
117 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
118 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
119 mov_reg, neon_move<q>")]
122 (define_insn "*aarch64_simd_mov<mode>"
123 [(set (match_operand:VQ 0 "nonimmediate_operand"
124 "=w, m, w, ?r, ?w, ?r, w")
125 (match_operand:VQ 1 "general_operand"
126 "m, w, w, w, r, r, Dn"))]
128 && (register_operand (operands[0], <MODE>mode)
129 || register_operand (operands[1], <MODE>mode))"
131 switch (which_alternative)
134 return "ldr\\t%q0, %1";
136 return "str\\t%q1, %0";
138 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
144 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
149 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
150 neon_logic<q>, multiple, multiple, multiple,\
152 (set_attr "length" "4,4,4,8,8,8,4")]
156 [(set (match_operand:VQ 0 "register_operand" "")
157 (match_operand:VQ 1 "register_operand" ""))]
158 "TARGET_SIMD && reload_completed
159 && GP_REGNUM_P (REGNO (operands[0]))
160 && GP_REGNUM_P (REGNO (operands[1]))"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
168 dest[0] = gen_rtx_REG (DImode, rdest);
169 src[0] = gen_rtx_REG (DImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 1);
171 src[1] = gen_rtx_REG (DImode, rsrc + 1);
173 aarch64_simd_disambiguate_copy (operands, dest, src, 2);
177 [(set (match_operand:VQ 0 "register_operand" "")
178 (match_operand:VQ 1 "register_operand" ""))]
179 "TARGET_SIMD && reload_completed
180 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
181 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
184 aarch64_split_simd_move (operands[0], operands[1]);
188 (define_expand "aarch64_split_simd_mov<mode>"
189 [(set (match_operand:VQ 0)
190 (match_operand:VQ 1))]
193 rtx dst = operands[0];
194 rtx src = operands[1];
196 if (GP_REGNUM_P (REGNO (src)))
198 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
199 rtx src_high_part = gen_highpart (<VHALF>mode, src);
202 (gen_move_lo_quad_<mode> (dst, src_low_part));
204 (gen_move_hi_quad_<mode> (dst, src_high_part));
209 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
210 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
211 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
212 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
215 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
217 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
223 (define_insn "aarch64_simd_mov_from_<mode>low"
224 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
226 (match_operand:VQ 1 "register_operand" "w")
227 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
228 "TARGET_SIMD && reload_completed"
230 [(set_attr "type" "neon_to_gp<q>")
231 (set_attr "length" "4")
234 (define_insn "aarch64_simd_mov_from_<mode>high"
235 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
237 (match_operand:VQ 1 "register_operand" "w")
238 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
239 "TARGET_SIMD && reload_completed"
241 [(set_attr "type" "neon_to_gp<q>")
242 (set_attr "length" "4")
245 (define_insn "orn<mode>3"
246 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
247 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
248 (match_operand:VDQ_I 2 "register_operand" "w")))]
250 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
251 [(set_attr "type" "neon_logic<q>")]
254 (define_insn "bic<mode>3"
255 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
256 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
257 (match_operand:VDQ_I 2 "register_operand" "w")))]
259 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
260 [(set_attr "type" "neon_logic<q>")]
263 (define_insn "add<mode>3"
264 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
265 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
266 (match_operand:VDQ_I 2 "register_operand" "w")))]
268 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
269 [(set_attr "type" "neon_add<q>")]
272 (define_insn "sub<mode>3"
273 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
274 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
275 (match_operand:VDQ_I 2 "register_operand" "w")))]
277 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
278 [(set_attr "type" "neon_sub<q>")]
281 (define_insn "mul<mode>3"
282 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
283 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
284 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
286 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
287 [(set_attr "type" "neon_mul_<Vetype><q>")]
290 (define_insn "bswap<mode>2"
291 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
292 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
294 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
295 [(set_attr "type" "neon_rev<q>")]
298 (define_insn "aarch64_rbit<mode>"
299 [(set (match_operand:VB 0 "register_operand" "=w")
300 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
303 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
304 [(set_attr "type" "neon_rbit")]
307 (define_expand "ctz<mode>2"
308 [(set (match_operand:VS 0 "register_operand")
309 (ctz:VS (match_operand:VS 1 "register_operand")))]
312 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
313 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
315 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
316 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
321 (define_insn "*aarch64_mul3_elt<mode>"
322 [(set (match_operand:VMUL 0 "register_operand" "=w")
326 (match_operand:VMUL 1 "register_operand" "<h_con>")
327 (parallel [(match_operand:SI 2 "immediate_operand")])))
328 (match_operand:VMUL 3 "register_operand" "w")))]
331 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
332 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
334 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
337 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
338 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
339 (mult:VMUL_CHANGE_NLANES
340 (vec_duplicate:VMUL_CHANGE_NLANES
342 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
343 (parallel [(match_operand:SI 2 "immediate_operand")])))
344 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
347 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
348 INTVAL (operands[2])));
349 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
351 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
354 (define_insn "*aarch64_mul3_elt_to_128df"
355 [(set (match_operand:V2DF 0 "register_operand" "=w")
358 (match_operand:DF 2 "register_operand" "w"))
359 (match_operand:V2DF 1 "register_operand" "w")))]
361 "fmul\\t%0.2d, %1.2d, %2.d[0]"
362 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
365 (define_insn "*aarch64_mul3_elt_to_64v2df"
366 [(set (match_operand:DF 0 "register_operand" "=w")
369 (match_operand:V2DF 1 "register_operand" "w")
370 (parallel [(match_operand:SI 2 "immediate_operand")]))
371 (match_operand:DF 3 "register_operand" "w")))]
374 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
375 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
377 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
380 (define_insn "neg<mode>2"
381 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
382 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
384 "neg\t%0.<Vtype>, %1.<Vtype>"
385 [(set_attr "type" "neon_neg<q>")]
388 (define_insn "abs<mode>2"
389 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
390 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
392 "abs\t%0.<Vtype>, %1.<Vtype>"
393 [(set_attr "type" "neon_abs<q>")]
396 (define_insn "abd<mode>_3"
397 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
398 (abs:VDQ_BHSI (minus:VDQ_BHSI
399 (match_operand:VDQ_BHSI 1 "register_operand" "w")
400 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
402 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
403 [(set_attr "type" "neon_abd<q>")]
406 (define_insn "aba<mode>_3"
407 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
408 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
409 (match_operand:VDQ_BHSI 1 "register_operand" "w")
410 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
411 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
413 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
414 [(set_attr "type" "neon_arith_acc<q>")]
417 (define_insn "fabd<mode>_3"
418 [(set (match_operand:VDQF 0 "register_operand" "=w")
419 (abs:VDQF (minus:VDQF
420 (match_operand:VDQF 1 "register_operand" "w")
421 (match_operand:VDQF 2 "register_operand" "w"))))]
423 "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
424 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
427 (define_insn "*fabd_scalar<mode>3"
428 [(set (match_operand:GPF 0 "register_operand" "=w")
430 (match_operand:GPF 1 "register_operand" "w")
431 (match_operand:GPF 2 "register_operand" "w"))))]
433 "fabd\t%<s>0, %<s>1, %<s>2"
434 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
437 (define_insn "and<mode>3"
438 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
439 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
440 (match_operand:VDQ_I 2 "register_operand" "w")))]
442 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
443 [(set_attr "type" "neon_logic<q>")]
446 (define_insn "ior<mode>3"
447 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
448 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
449 (match_operand:VDQ_I 2 "register_operand" "w")))]
451 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
452 [(set_attr "type" "neon_logic<q>")]
455 (define_insn "xor<mode>3"
456 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
457 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
458 (match_operand:VDQ_I 2 "register_operand" "w")))]
460 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
461 [(set_attr "type" "neon_logic<q>")]
464 (define_insn "one_cmpl<mode>2"
465 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
466 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
468 "not\t%0.<Vbtype>, %1.<Vbtype>"
469 [(set_attr "type" "neon_logic<q>")]
472 (define_insn "aarch64_simd_vec_set<mode>"
473 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
475 (vec_duplicate:VDQ_BHSI
476 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
477 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
478 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
481 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
482 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
483 switch (which_alternative)
486 return "ins\\t%0.<Vetype>[%p2], %w1";
488 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
490 return "ld1\\t{%0.<Vetype>}[%p2], %1";
495 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
498 (define_insn "aarch64_simd_lshr<mode>"
499 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
500 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
501 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
503 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
504 [(set_attr "type" "neon_shift_imm<q>")]
507 (define_insn "aarch64_simd_ashr<mode>"
508 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
509 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
510 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
512 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
513 [(set_attr "type" "neon_shift_imm<q>")]
516 (define_insn "aarch64_simd_imm_shl<mode>"
517 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
518 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
519 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
521 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
522 [(set_attr "type" "neon_shift_imm<q>")]
525 (define_insn "aarch64_simd_reg_sshl<mode>"
526 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
527 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
528 (match_operand:VDQ_I 2 "register_operand" "w")))]
530 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
531 [(set_attr "type" "neon_shift_reg<q>")]
534 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
535 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
536 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
537 (match_operand:VDQ_I 2 "register_operand" "w")]
538 UNSPEC_ASHIFT_UNSIGNED))]
540 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
541 [(set_attr "type" "neon_shift_reg<q>")]
544 (define_insn "aarch64_simd_reg_shl<mode>_signed"
545 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
546 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
547 (match_operand:VDQ_I 2 "register_operand" "w")]
548 UNSPEC_ASHIFT_SIGNED))]
550 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
551 [(set_attr "type" "neon_shift_reg<q>")]
554 (define_expand "ashl<mode>3"
555 [(match_operand:VDQ_I 0 "register_operand" "")
556 (match_operand:VDQ_I 1 "register_operand" "")
557 (match_operand:SI 2 "general_operand" "")]
560 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
563 if (CONST_INT_P (operands[2]))
565 shift_amount = INTVAL (operands[2]);
566 if (shift_amount >= 0 && shift_amount < bit_width)
568 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
570 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
577 operands[2] = force_reg (SImode, operands[2]);
580 else if (MEM_P (operands[2]))
582 operands[2] = force_reg (SImode, operands[2]);
585 if (REG_P (operands[2]))
587 rtx tmp = gen_reg_rtx (<MODE>mode);
588 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
589 convert_to_mode (<VEL>mode,
592 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
601 (define_expand "lshr<mode>3"
602 [(match_operand:VDQ_I 0 "register_operand" "")
603 (match_operand:VDQ_I 1 "register_operand" "")
604 (match_operand:SI 2 "general_operand" "")]
607 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
610 if (CONST_INT_P (operands[2]))
612 shift_amount = INTVAL (operands[2]);
613 if (shift_amount > 0 && shift_amount <= bit_width)
615 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
617 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
623 operands[2] = force_reg (SImode, operands[2]);
625 else if (MEM_P (operands[2]))
627 operands[2] = force_reg (SImode, operands[2]);
630 if (REG_P (operands[2]))
632 rtx tmp = gen_reg_rtx (SImode);
633 rtx tmp1 = gen_reg_rtx (<MODE>mode);
634 emit_insn (gen_negsi2 (tmp, operands[2]));
635 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
636 convert_to_mode (<VEL>mode,
638 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
648 (define_expand "ashr<mode>3"
649 [(match_operand:VDQ_I 0 "register_operand" "")
650 (match_operand:VDQ_I 1 "register_operand" "")
651 (match_operand:SI 2 "general_operand" "")]
654 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
657 if (CONST_INT_P (operands[2]))
659 shift_amount = INTVAL (operands[2]);
660 if (shift_amount > 0 && shift_amount <= bit_width)
662 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
664 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
670 operands[2] = force_reg (SImode, operands[2]);
672 else if (MEM_P (operands[2]))
674 operands[2] = force_reg (SImode, operands[2]);
677 if (REG_P (operands[2]))
679 rtx tmp = gen_reg_rtx (SImode);
680 rtx tmp1 = gen_reg_rtx (<MODE>mode);
681 emit_insn (gen_negsi2 (tmp, operands[2]));
682 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
683 convert_to_mode (<VEL>mode,
685 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
695 (define_expand "vashl<mode>3"
696 [(match_operand:VDQ_I 0 "register_operand" "")
697 (match_operand:VDQ_I 1 "register_operand" "")
698 (match_operand:VDQ_I 2 "register_operand" "")]
701 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
706 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
707 ;; Negating individual lanes most certainly offsets the
708 ;; gain from vectorization.
709 (define_expand "vashr<mode>3"
710 [(match_operand:VDQ_BHSI 0 "register_operand" "")
711 (match_operand:VDQ_BHSI 1 "register_operand" "")
712 (match_operand:VDQ_BHSI 2 "register_operand" "")]
715 rtx neg = gen_reg_rtx (<MODE>mode);
716 emit (gen_neg<mode>2 (neg, operands[2]));
717 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
723 (define_expand "aarch64_ashr_simddi"
724 [(match_operand:DI 0 "register_operand" "=w")
725 (match_operand:DI 1 "register_operand" "w")
726 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
729 if (INTVAL (operands[2]) == 64)
730 emit_insn (gen_aarch64_sshr_simddi (operands[0], operands[1]));
732 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
737 ;; SIMD shift by 64. This pattern is a special case as standard pattern does
738 ;; not handle NEON shifts by 64.
739 (define_insn "aarch64_sshr_simddi"
740 [(set (match_operand:DI 0 "register_operand" "=w")
742 [(match_operand:DI 1 "register_operand" "w")] UNSPEC_SSHR64))]
745 [(set_attr "type" "neon_shift_imm")]
748 (define_expand "vlshr<mode>3"
749 [(match_operand:VDQ_BHSI 0 "register_operand" "")
750 (match_operand:VDQ_BHSI 1 "register_operand" "")
751 (match_operand:VDQ_BHSI 2 "register_operand" "")]
754 rtx neg = gen_reg_rtx (<MODE>mode);
755 emit (gen_neg<mode>2 (neg, operands[2]));
756 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
761 (define_expand "aarch64_lshr_simddi"
762 [(match_operand:DI 0 "register_operand" "=w")
763 (match_operand:DI 1 "register_operand" "w")
764 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
767 if (INTVAL (operands[2]) == 64)
768 emit_insn (gen_aarch64_ushr_simddi (operands[0], operands[1]));
770 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
775 ;; SIMD shift by 64. This pattern is a special case as standard pattern does
776 ;; not handle NEON shifts by 64.
777 (define_insn "aarch64_ushr_simddi"
778 [(set (match_operand:DI 0 "register_operand" "=w")
780 [(match_operand:DI 1 "register_operand" "w")] UNSPEC_USHR64))]
783 [(set_attr "type" "neon_shift_imm")]
786 (define_expand "vec_set<mode>"
787 [(match_operand:VDQ_BHSI 0 "register_operand")
788 (match_operand:<VEL> 1 "register_operand")
789 (match_operand:SI 2 "immediate_operand")]
792 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
793 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
794 GEN_INT (elem), operands[0]));
799 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
800 (define_insn "vec_shr_<mode>"
801 [(set (match_operand:VD 0 "register_operand" "=w")
802 (lshiftrt:VD (match_operand:VD 1 "register_operand" "w")
803 (match_operand:SI 2 "immediate_operand" "i")))]
806 if (BYTES_BIG_ENDIAN)
807 return "ushl %d0, %d1, %2";
809 return "ushr %d0, %d1, %2";
811 [(set_attr "type" "neon_shift_imm")]
814 (define_insn "aarch64_simd_vec_setv2di"
815 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
818 (match_operand:DI 1 "register_operand" "r,w"))
819 (match_operand:V2DI 3 "register_operand" "0,0")
820 (match_operand:SI 2 "immediate_operand" "i,i")))]
823 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
824 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
825 switch (which_alternative)
828 return "ins\\t%0.d[%p2], %1";
830 return "ins\\t%0.d[%p2], %1.d[0]";
835 [(set_attr "type" "neon_from_gp, neon_ins_q")]
838 (define_expand "vec_setv2di"
839 [(match_operand:V2DI 0 "register_operand")
840 (match_operand:DI 1 "register_operand")
841 (match_operand:SI 2 "immediate_operand")]
844 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
845 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
846 GEN_INT (elem), operands[0]));
851 (define_insn "aarch64_simd_vec_set<mode>"
852 [(set (match_operand:VDQF 0 "register_operand" "=w")
855 (match_operand:<VEL> 1 "register_operand" "w"))
856 (match_operand:VDQF 3 "register_operand" "0")
857 (match_operand:SI 2 "immediate_operand" "i")))]
860 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
862 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
863 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
865 [(set_attr "type" "neon_ins<q>")]
868 (define_expand "vec_set<mode>"
869 [(match_operand:VDQF 0 "register_operand" "+w")
870 (match_operand:<VEL> 1 "register_operand" "w")
871 (match_operand:SI 2 "immediate_operand" "")]
874 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
875 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
876 GEN_INT (elem), operands[0]));
882 (define_insn "aarch64_mla<mode>"
883 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
884 (plus:VDQ_BHSI (mult:VDQ_BHSI
885 (match_operand:VDQ_BHSI 2 "register_operand" "w")
886 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
887 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
889 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
890 [(set_attr "type" "neon_mla_<Vetype><q>")]
893 (define_insn "*aarch64_mla_elt<mode>"
894 [(set (match_operand:VDQHS 0 "register_operand" "=w")
899 (match_operand:VDQHS 1 "register_operand" "<h_con>")
900 (parallel [(match_operand:SI 2 "immediate_operand")])))
901 (match_operand:VDQHS 3 "register_operand" "w"))
902 (match_operand:VDQHS 4 "register_operand" "0")))]
905 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
906 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
908 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
911 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
912 [(set (match_operand:VDQHS 0 "register_operand" "=w")
917 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
918 (parallel [(match_operand:SI 2 "immediate_operand")])))
919 (match_operand:VDQHS 3 "register_operand" "w"))
920 (match_operand:VDQHS 4 "register_operand" "0")))]
923 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
924 INTVAL (operands[2])));
925 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
927 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
930 (define_insn "aarch64_mls<mode>"
931 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
932 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
933 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
934 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
936 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
937 [(set_attr "type" "neon_mla_<Vetype><q>")]
940 (define_insn "*aarch64_mls_elt<mode>"
941 [(set (match_operand:VDQHS 0 "register_operand" "=w")
943 (match_operand:VDQHS 4 "register_operand" "0")
947 (match_operand:VDQHS 1 "register_operand" "<h_con>")
948 (parallel [(match_operand:SI 2 "immediate_operand")])))
949 (match_operand:VDQHS 3 "register_operand" "w"))))]
952 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
953 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
955 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
958 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
959 [(set (match_operand:VDQHS 0 "register_operand" "=w")
961 (match_operand:VDQHS 4 "register_operand" "0")
965 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
966 (parallel [(match_operand:SI 2 "immediate_operand")])))
967 (match_operand:VDQHS 3 "register_operand" "w"))))]
970 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
971 INTVAL (operands[2])));
972 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
974 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
977 ;; Max/Min operations.
978 (define_insn "<su><maxmin><mode>3"
979 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
980 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
981 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
983 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
984 [(set_attr "type" "neon_minmax<q>")]
987 (define_expand "<su><maxmin>v2di3"
988 [(set (match_operand:V2DI 0 "register_operand" "")
989 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
990 (match_operand:V2DI 2 "register_operand" "")))]
993 enum rtx_code cmp_operator;
1014 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1015 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
1016 operands[2], cmp_fmt, operands[1], operands[2]));
1020 ;; vec_concat gives a new vector with the low elements from operand 1, and
1021 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1022 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1023 ;; What that means, is that the RTL descriptions of the below patterns
1024 ;; need to change depending on endianness.
1026 ;; Move to the low architectural bits of the register.
1027 ;; On little-endian this is { operand, zeroes }
1028 ;; On big-endian this is { zeroes, operand }
1030 (define_insn "move_lo_quad_internal_<mode>"
1031 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1033 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1034 (vec_duplicate:<VHALF> (const_int 0))))]
1035 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1040 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1041 (set_attr "simd" "yes,*,yes")
1042 (set_attr "fp" "*,yes,*")
1043 (set_attr "length" "4")]
1046 (define_insn "move_lo_quad_internal_<mode>"
1047 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1049 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1051 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1056 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1057 (set_attr "simd" "yes,*,yes")
1058 (set_attr "fp" "*,yes,*")
1059 (set_attr "length" "4")]
1062 (define_insn "move_lo_quad_internal_be_<mode>"
1063 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1065 (vec_duplicate:<VHALF> (const_int 0))
1066 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1067 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1072 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1073 (set_attr "simd" "yes,*,yes")
1074 (set_attr "fp" "*,yes,*")
1075 (set_attr "length" "4")]
1078 (define_insn "move_lo_quad_internal_be_<mode>"
1079 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1082 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1083 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1088 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1089 (set_attr "simd" "yes,*,yes")
1090 (set_attr "fp" "*,yes,*")
1091 (set_attr "length" "4")]
1094 (define_expand "move_lo_quad_<mode>"
1095 [(match_operand:VQ 0 "register_operand")
1096 (match_operand:VQ 1 "register_operand")]
1099 if (BYTES_BIG_ENDIAN)
1100 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1102 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1107 ;; Move operand1 to the high architectural bits of the register, keeping
1108 ;; the low architectural bits of operand2.
1109 ;; For little-endian this is { operand2, operand1 }
1110 ;; For big-endian this is { operand1, operand2 }
1112 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1113 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1117 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1118 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1119 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1121 ins\\t%0.d[1], %1.d[0]
1123 [(set_attr "type" "neon_ins")]
1126 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1127 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1129 (match_operand:<VHALF> 1 "register_operand" "w,r")
1132 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1133 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1135 ins\\t%0.d[1], %1.d[0]
1137 [(set_attr "type" "neon_ins")]
1140 (define_expand "move_hi_quad_<mode>"
1141 [(match_operand:VQ 0 "register_operand" "")
1142 (match_operand:<VHALF> 1 "register_operand" "")]
1145 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1146 if (BYTES_BIG_ENDIAN)
1147 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1150 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1155 ;; Narrowing operations.
1158 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1159 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1160 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1162 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1163 [(set_attr "type" "neon_shift_imm_narrow_q")]
1166 (define_expand "vec_pack_trunc_<mode>"
1167 [(match_operand:<VNARROWD> 0 "register_operand" "")
1168 (match_operand:VDN 1 "register_operand" "")
1169 (match_operand:VDN 2 "register_operand" "")]
1172 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1173 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1174 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1176 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1177 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1178 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1184 (define_insn "vec_pack_trunc_<mode>"
1185 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1186 (vec_concat:<VNARROWQ2>
1187 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1188 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1191 if (BYTES_BIG_ENDIAN)
1192 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1194 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1196 [(set_attr "type" "multiple")
1197 (set_attr "length" "8")]
1200 ;; Widening operations.
1202 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1203 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1204 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1205 (match_operand:VQW 1 "register_operand" "w")
1206 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1209 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1210 [(set_attr "type" "neon_shift_imm_long")]
1213 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1214 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1215 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1216 (match_operand:VQW 1 "register_operand" "w")
1217 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1220 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1221 [(set_attr "type" "neon_shift_imm_long")]
1224 (define_expand "vec_unpack<su>_hi_<mode>"
1225 [(match_operand:<VWIDE> 0 "register_operand" "")
1226 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1229 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1230 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1236 (define_expand "vec_unpack<su>_lo_<mode>"
1237 [(match_operand:<VWIDE> 0 "register_operand" "")
1238 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1241 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1242 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1248 ;; Widening arithmetic.
1250 (define_insn "*aarch64_<su>mlal_lo<mode>"
1251 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1254 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1255 (match_operand:VQW 2 "register_operand" "w")
1256 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1257 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1258 (match_operand:VQW 4 "register_operand" "w")
1260 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1262 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1263 [(set_attr "type" "neon_mla_<Vetype>_long")]
1266 (define_insn "*aarch64_<su>mlal_hi<mode>"
1267 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1270 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1271 (match_operand:VQW 2 "register_operand" "w")
1272 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1273 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1274 (match_operand:VQW 4 "register_operand" "w")
1276 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1278 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1279 [(set_attr "type" "neon_mla_<Vetype>_long")]
1282 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1283 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1285 (match_operand:<VWIDE> 1 "register_operand" "0")
1287 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1288 (match_operand:VQW 2 "register_operand" "w")
1289 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1290 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1291 (match_operand:VQW 4 "register_operand" "w")
1294 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1295 [(set_attr "type" "neon_mla_<Vetype>_long")]
1298 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1299 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1301 (match_operand:<VWIDE> 1 "register_operand" "0")
1303 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1304 (match_operand:VQW 2 "register_operand" "w")
1305 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1306 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1307 (match_operand:VQW 4 "register_operand" "w")
1310 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1311 [(set_attr "type" "neon_mla_<Vetype>_long")]
1314 (define_insn "*aarch64_<su>mlal<mode>"
1315 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1319 (match_operand:VD_BHSI 1 "register_operand" "w"))
1321 (match_operand:VD_BHSI 2 "register_operand" "w")))
1322 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1324 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1325 [(set_attr "type" "neon_mla_<Vetype>_long")]
1328 (define_insn "*aarch64_<su>mlsl<mode>"
1329 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1331 (match_operand:<VWIDE> 1 "register_operand" "0")
1334 (match_operand:VD_BHSI 2 "register_operand" "w"))
1336 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1338 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1339 [(set_attr "type" "neon_mla_<Vetype>_long")]
1342 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1343 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1344 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1345 (match_operand:VQW 1 "register_operand" "w")
1346 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1347 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1348 (match_operand:VQW 2 "register_operand" "w")
1351 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1352 [(set_attr "type" "neon_mul_<Vetype>_long")]
1355 (define_expand "vec_widen_<su>mult_lo_<mode>"
1356 [(match_operand:<VWIDE> 0 "register_operand" "")
1357 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1358 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1361 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1362 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1369 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1370 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1371 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1372 (match_operand:VQW 1 "register_operand" "w")
1373 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1374 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1375 (match_operand:VQW 2 "register_operand" "w")
1378 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1379 [(set_attr "type" "neon_mul_<Vetype>_long")]
1382 (define_expand "vec_widen_<su>mult_hi_<mode>"
1383 [(match_operand:<VWIDE> 0 "register_operand" "")
1384 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1385 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1388 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1389 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1397 ;; FP vector operations.
1398 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1399 ;; double-precision (64-bit) floating-point data types and arithmetic as
1400 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1401 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1403 ;; Floating-point operations can raise an exception. Vectorizing such
1404 ;; operations are safe because of reasons explained below.
1406 ;; ARMv8 permits an extension to enable trapped floating-point
1407 ;; exception handling, however this is an optional feature. In the
1408 ;; event of a floating-point exception being raised by vectorised
1410 ;; 1. If trapped floating-point exceptions are available, then a trap
1411 ;; will be taken when any lane raises an enabled exception. A trap
1412 ;; handler may determine which lane raised the exception.
1413 ;; 2. Alternatively a sticky exception flag is set in the
1414 ;; floating-point status register (FPSR). Software may explicitly
1415 ;; test the exception flags, in which case the tests will either
1416 ;; prevent vectorisation, allowing precise identification of the
1417 ;; failing operation, or if tested outside of vectorisable regions
1418 ;; then the specific operation and lane are not of interest.
1420 ;; FP arithmetic operations.
1422 (define_insn "add<mode>3"
1423 [(set (match_operand:VDQF 0 "register_operand" "=w")
1424 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1425 (match_operand:VDQF 2 "register_operand" "w")))]
1427 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1428 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1431 (define_insn "sub<mode>3"
1432 [(set (match_operand:VDQF 0 "register_operand" "=w")
1433 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1434 (match_operand:VDQF 2 "register_operand" "w")))]
1436 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1437 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1440 (define_insn "mul<mode>3"
1441 [(set (match_operand:VDQF 0 "register_operand" "=w")
1442 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1443 (match_operand:VDQF 2 "register_operand" "w")))]
1445 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1446 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1449 (define_insn "div<mode>3"
1450 [(set (match_operand:VDQF 0 "register_operand" "=w")
1451 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1452 (match_operand:VDQF 2 "register_operand" "w")))]
1454 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1455 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1458 (define_insn "neg<mode>2"
1459 [(set (match_operand:VDQF 0 "register_operand" "=w")
1460 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1462 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1463 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1466 (define_insn "abs<mode>2"
1467 [(set (match_operand:VDQF 0 "register_operand" "=w")
1468 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1470 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1471 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1474 (define_insn "fma<mode>4"
1475 [(set (match_operand:VDQF 0 "register_operand" "=w")
1476 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1477 (match_operand:VDQF 2 "register_operand" "w")
1478 (match_operand:VDQF 3 "register_operand" "0")))]
1480 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1481 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1484 (define_insn "*aarch64_fma4_elt<mode>"
1485 [(set (match_operand:VDQF 0 "register_operand" "=w")
1489 (match_operand:VDQF 1 "register_operand" "<h_con>")
1490 (parallel [(match_operand:SI 2 "immediate_operand")])))
1491 (match_operand:VDQF 3 "register_operand" "w")
1492 (match_operand:VDQF 4 "register_operand" "0")))]
1495 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1496 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1498 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1501 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1502 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1504 (vec_duplicate:VDQSF
1506 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1507 (parallel [(match_operand:SI 2 "immediate_operand")])))
1508 (match_operand:VDQSF 3 "register_operand" "w")
1509 (match_operand:VDQSF 4 "register_operand" "0")))]
1512 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1513 INTVAL (operands[2])));
1514 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1516 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1519 (define_insn "*aarch64_fma4_elt_to_128df"
1520 [(set (match_operand:V2DF 0 "register_operand" "=w")
1523 (match_operand:DF 1 "register_operand" "w"))
1524 (match_operand:V2DF 2 "register_operand" "w")
1525 (match_operand:V2DF 3 "register_operand" "0")))]
1527 "fmla\\t%0.2d, %2.2d, %1.2d[0]"
1528 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1531 (define_insn "*aarch64_fma4_elt_to_64v2df"
1532 [(set (match_operand:DF 0 "register_operand" "=w")
1535 (match_operand:V2DF 1 "register_operand" "w")
1536 (parallel [(match_operand:SI 2 "immediate_operand")]))
1537 (match_operand:DF 3 "register_operand" "w")
1538 (match_operand:DF 4 "register_operand" "0")))]
1541 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1542 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1544 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1547 (define_insn "fnma<mode>4"
1548 [(set (match_operand:VDQF 0 "register_operand" "=w")
1550 (match_operand:VDQF 1 "register_operand" "w")
1552 (match_operand:VDQF 2 "register_operand" "w"))
1553 (match_operand:VDQF 3 "register_operand" "0")))]
1555 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1556 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1559 (define_insn "*aarch64_fnma4_elt<mode>"
1560 [(set (match_operand:VDQF 0 "register_operand" "=w")
1563 (match_operand:VDQF 3 "register_operand" "w"))
1566 (match_operand:VDQF 1 "register_operand" "<h_con>")
1567 (parallel [(match_operand:SI 2 "immediate_operand")])))
1568 (match_operand:VDQF 4 "register_operand" "0")))]
1571 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1572 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1574 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1577 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1578 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1581 (match_operand:VDQSF 3 "register_operand" "w"))
1582 (vec_duplicate:VDQSF
1584 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1585 (parallel [(match_operand:SI 2 "immediate_operand")])))
1586 (match_operand:VDQSF 4 "register_operand" "0")))]
1589 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1590 INTVAL (operands[2])));
1591 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1593 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1596 (define_insn "*aarch64_fnma4_elt_to_128df"
1597 [(set (match_operand:V2DF 0 "register_operand" "=w")
1600 (match_operand:V2DF 2 "register_operand" "w"))
1602 (match_operand:DF 1 "register_operand" "w"))
1603 (match_operand:V2DF 3 "register_operand" "0")))]
1605 "fmls\\t%0.2d, %2.2d, %1.2d[0]"
1606 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1609 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1610 [(set (match_operand:DF 0 "register_operand" "=w")
1613 (match_operand:V2DF 1 "register_operand" "w")
1614 (parallel [(match_operand:SI 2 "immediate_operand")]))
1616 (match_operand:DF 3 "register_operand" "w"))
1617 (match_operand:DF 4 "register_operand" "0")))]
1620 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1621 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1623 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1626 ;; Vector versions of the floating-point frint patterns.
1627 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1628 (define_insn "<frint_pattern><mode>2"
1629 [(set (match_operand:VDQF 0 "register_operand" "=w")
1630 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1633 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1634 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1637 ;; Vector versions of the fcvt standard patterns.
1638 ;; Expands to lbtrunc, lround, lceil, lfloor
1639 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1640 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1641 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1642 [(match_operand:VDQF 1 "register_operand" "w")]
1645 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1646 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1649 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1650 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1651 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1652 [(match_operand:VDQF 1 "register_operand")]
1657 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1658 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1659 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1660 [(match_operand:VDQF 1 "register_operand")]
1665 (define_expand "ftrunc<VDQF:mode>2"
1666 [(set (match_operand:VDQF 0 "register_operand")
1667 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1672 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1673 [(set (match_operand:VDQF 0 "register_operand" "=w")
1675 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1677 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1678 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1681 ;; Conversions between vectors of floats and doubles.
1682 ;; Contains a mix of patterns to match standard pattern names
1683 ;; and those for intrinsics.
1685 ;; Float widening operations.
1687 (define_insn "vec_unpacks_lo_v4sf"
1688 [(set (match_operand:V2DF 0 "register_operand" "=w")
1691 (match_operand:V4SF 1 "register_operand" "w")
1692 (parallel [(const_int 0) (const_int 1)])
1695 "fcvtl\\t%0.2d, %1.2s"
1696 [(set_attr "type" "neon_fp_cvt_widen_s")]
1699 (define_insn "aarch64_float_extend_lo_v2df"
1700 [(set (match_operand:V2DF 0 "register_operand" "=w")
1702 (match_operand:V2SF 1 "register_operand" "w")))]
1704 "fcvtl\\t%0.2d, %1.2s"
1705 [(set_attr "type" "neon_fp_cvt_widen_s")]
1708 (define_insn "vec_unpacks_hi_v4sf"
1709 [(set (match_operand:V2DF 0 "register_operand" "=w")
1712 (match_operand:V4SF 1 "register_operand" "w")
1713 (parallel [(const_int 2) (const_int 3)])
1716 "fcvtl2\\t%0.2d, %1.4s"
1717 [(set_attr "type" "neon_fp_cvt_widen_s")]
1720 ;; Float narrowing operations.
1722 (define_insn "aarch64_float_truncate_lo_v2sf"
1723 [(set (match_operand:V2SF 0 "register_operand" "=w")
1724 (float_truncate:V2SF
1725 (match_operand:V2DF 1 "register_operand" "w")))]
1727 "fcvtn\\t%0.2s, %1.2d"
1728 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1731 (define_insn "aarch64_float_truncate_hi_v4sf"
1732 [(set (match_operand:V4SF 0 "register_operand" "=w")
1734 (match_operand:V2SF 1 "register_operand" "0")
1735 (float_truncate:V2SF
1736 (match_operand:V2DF 2 "register_operand" "w"))))]
1738 "fcvtn2\\t%0.4s, %2.2d"
1739 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1742 (define_expand "vec_pack_trunc_v2df"
1743 [(set (match_operand:V4SF 0 "register_operand")
1745 (float_truncate:V2SF
1746 (match_operand:V2DF 1 "register_operand"))
1747 (float_truncate:V2SF
1748 (match_operand:V2DF 2 "register_operand"))
1752 rtx tmp = gen_reg_rtx (V2SFmode);
1753 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1754 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1756 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1757 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1758 tmp, operands[hi]));
1763 (define_expand "vec_pack_trunc_df"
1764 [(set (match_operand:V2SF 0 "register_operand")
1767 (match_operand:DF 1 "register_operand"))
1769 (match_operand:DF 2 "register_operand"))
1773 rtx tmp = gen_reg_rtx (V2SFmode);
1774 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1775 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1777 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1778 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1779 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1784 (define_insn "aarch64_vmls<mode>"
1785 [(set (match_operand:VDQF 0 "register_operand" "=w")
1786 (minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
1787 (mult:VDQF (match_operand:VDQF 2 "register_operand" "w")
1788 (match_operand:VDQF 3 "register_operand" "w"))))]
1790 "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1791 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1795 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1797 ;; a = (b < c) ? b : c;
1798 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1799 ;; either explicitly or indirectly via -ffast-math.
1801 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1802 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1803 ;; operand will be returned when both operands are zero (i.e. they may not
1804 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
1805 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
1808 (define_insn "<su><maxmin><mode>3"
1809 [(set (match_operand:VDQF 0 "register_operand" "=w")
1810 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
1811 (match_operand:VDQF 2 "register_operand" "w")))]
1813 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1814 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1817 (define_insn "<maxmin_uns><mode>3"
1818 [(set (match_operand:VDQF 0 "register_operand" "=w")
1819 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1820 (match_operand:VDQF 2 "register_operand" "w")]
1823 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1824 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1827 ;; 'across lanes' add.
1829 (define_expand "reduc_plus_scal_<mode>"
1830 [(match_operand:<VEL> 0 "register_operand" "=w")
1831 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
1835 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1836 rtx scratch = gen_reg_rtx (<MODE>mode);
1837 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1838 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1843 (define_expand "reduc_plus_scal_<mode>"
1844 [(match_operand:<VEL> 0 "register_operand" "=w")
1845 (match_operand:V2F 1 "register_operand" "w")]
1848 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1849 rtx scratch = gen_reg_rtx (<MODE>mode);
1850 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1851 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1856 (define_insn "aarch64_reduc_plus_internal<mode>"
1857 [(set (match_operand:VDQV 0 "register_operand" "=w")
1858 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
1861 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
1862 [(set_attr "type" "neon_reduc_add<q>")]
1865 (define_insn "aarch64_reduc_plus_internalv2si"
1866 [(set (match_operand:V2SI 0 "register_operand" "=w")
1867 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1870 "addp\\t%0.2s, %1.2s, %1.2s"
1871 [(set_attr "type" "neon_reduc_add")]
1874 (define_insn "aarch64_reduc_plus_internal<mode>"
1875 [(set (match_operand:V2F 0 "register_operand" "=w")
1876 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
1879 "faddp\\t%<Vetype>0, %1.<Vtype>"
1880 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
1883 (define_insn "aarch64_addpv4sf"
1884 [(set (match_operand:V4SF 0 "register_operand" "=w")
1885 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
1888 "faddp\\t%0.4s, %1.4s, %1.4s"
1889 [(set_attr "type" "neon_fp_reduc_add_s_q")]
1892 (define_expand "reduc_plus_scal_v4sf"
1893 [(set (match_operand:SF 0 "register_operand")
1894 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
1898 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
1899 rtx scratch = gen_reg_rtx (V4SFmode);
1900 emit_insn (gen_aarch64_addpv4sf (scratch, operands[1]));
1901 emit_insn (gen_aarch64_addpv4sf (scratch, scratch));
1902 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
1906 (define_insn "clrsb<mode>2"
1907 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1908 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1910 "cls\\t%0.<Vtype>, %1.<Vtype>"
1911 [(set_attr "type" "neon_cls<q>")]
1914 (define_insn "clz<mode>2"
1915 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1916 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1918 "clz\\t%0.<Vtype>, %1.<Vtype>"
1919 [(set_attr "type" "neon_cls<q>")]
1922 (define_insn "popcount<mode>2"
1923 [(set (match_operand:VB 0 "register_operand" "=w")
1924 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
1926 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
1927 [(set_attr "type" "neon_cnt<q>")]
1930 ;; 'across lanes' max and min ops.
1932 ;; Template for outputting a scalar, so we can create __builtins which can be
1933 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
1934 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1935 [(match_operand:<VEL> 0 "register_operand")
1936 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1940 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1941 rtx scratch = gen_reg_rtx (<MODE>mode);
1942 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
1944 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1949 ;; Likewise for integer cases, signed and unsigned.
1950 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1951 [(match_operand:<VEL> 0 "register_operand")
1952 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
1956 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1957 rtx scratch = gen_reg_rtx (<MODE>mode);
1958 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
1960 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1965 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
1966 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
1967 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
1970 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
1971 [(set_attr "type" "neon_reduc_minmax<q>")]
1974 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
1975 [(set (match_operand:V2SI 0 "register_operand" "=w")
1976 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1979 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
1980 [(set_attr "type" "neon_reduc_minmax")]
1983 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
1984 [(set (match_operand:VDQF 0 "register_operand" "=w")
1985 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1988 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
1989 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
1992 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
1994 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
1997 ;; Thus our BSL is of the form:
1998 ;; op0 = bsl (mask, op2, op3)
1999 ;; We can use any of:
2002 ;; bsl mask, op1, op2
2003 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2004 ;; bit op0, op2, mask
2005 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2006 ;; bif op0, op1, mask
2008 (define_insn "aarch64_simd_bsl<mode>_internal"
2009 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2013 (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w"))
2014 (match_operand:VSDQ_I_DI 3 "register_operand" " w,0,w"))
2016 (match_dup:<V_cmp_result> 1)
2017 (match_operand:VSDQ_I_DI 2 "register_operand" " w,w,0"))
2021 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2022 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2023 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2024 [(set_attr "type" "neon_bsl<q>")]
2027 (define_expand "aarch64_simd_bsl<mode>"
2028 [(match_operand:VALLDIF 0 "register_operand")
2029 (match_operand:<V_cmp_result> 1 "register_operand")
2030 (match_operand:VALLDIF 2 "register_operand")
2031 (match_operand:VALLDIF 3 "register_operand")]
2034 /* We can't alias operands together if they have different modes. */
2035 rtx tmp = operands[0];
2036 if (FLOAT_MODE_P (<MODE>mode))
2038 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2039 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2040 tmp = gen_reg_rtx (<V_cmp_result>mode);
2042 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2043 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2047 if (tmp != operands[0])
2048 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2053 (define_expand "aarch64_vcond_internal<mode><mode>"
2054 [(set (match_operand:VDQ_I 0 "register_operand")
2056 (match_operator 3 "comparison_operator"
2057 [(match_operand:VDQ_I 4 "register_operand")
2058 (match_operand:VDQ_I 5 "nonmemory_operand")])
2059 (match_operand:VDQ_I 1 "nonmemory_operand")
2060 (match_operand:VDQ_I 2 "nonmemory_operand")))]
2063 rtx op1 = operands[1];
2064 rtx op2 = operands[2];
2065 rtx mask = gen_reg_rtx (<MODE>mode);
2066 enum rtx_code code = GET_CODE (operands[3]);
2068 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2069 and desirable for other comparisons if it results in FOO ? -1 : 0
2070 (this allows direct use of the comparison result without a bsl). */
2073 && op1 == CONST0_RTX (<V_cmp_result>mode)
2074 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2080 case LE: code = GT; break;
2081 case LT: code = GE; break;
2082 case GE: code = LT; break;
2083 case GT: code = LE; break;
2085 case NE: code = EQ; break;
2086 case LTU: code = GEU; break;
2087 case LEU: code = GTU; break;
2088 case GTU: code = LEU; break;
2089 case GEU: code = LTU; break;
2090 default: gcc_unreachable ();
2094 /* Make sure we can handle the last operand. */
2098 /* Normalized to EQ above. */
2106 /* These instructions have a form taking an immediate zero. */
2107 if (operands[5] == CONST0_RTX (<MODE>mode))
2109 /* Fall through, as may need to load into register. */
2111 if (!REG_P (operands[5]))
2112 operands[5] = force_reg (<MODE>mode, operands[5]);
2119 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2123 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2127 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2131 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2135 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2139 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2143 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2147 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2150 /* NE has been normalized to EQ above. */
2152 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2159 /* If we have (a = (b CMP c) ? -1 : 0);
2160 Then we can simply move the generated mask. */
2162 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2163 && op2 == CONST0_RTX (<V_cmp_result>mode))
2164 emit_move_insn (operands[0], mask);
2168 op1 = force_reg (<MODE>mode, op1);
2170 op2 = force_reg (<MODE>mode, op2);
2171 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2178 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2179 [(set (match_operand:VDQF_COND 0 "register_operand")
2181 (match_operator 3 "comparison_operator"
2182 [(match_operand:VDQF 4 "register_operand")
2183 (match_operand:VDQF 5 "nonmemory_operand")])
2184 (match_operand:VDQF_COND 1 "nonmemory_operand")
2185 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2189 int use_zero_form = 0;
2190 int swap_bsl_operands = 0;
2191 rtx op1 = operands[1];
2192 rtx op2 = operands[2];
2193 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2194 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2196 rtx (*base_comparison) (rtx, rtx, rtx);
2197 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2199 switch (GET_CODE (operands[3]))
2206 if (operands[5] == CONST0_RTX (<MODE>mode))
2213 if (!REG_P (operands[5]))
2214 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2217 switch (GET_CODE (operands[3]))
2227 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2228 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2236 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2237 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2242 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2243 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2249 switch (GET_CODE (operands[3]))
2256 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2257 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2263 Note that there also exist direct comparison against 0 forms,
2264 so catch those as a special case. */
2268 switch (GET_CODE (operands[3]))
2271 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2274 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2277 /* Do nothing, other zero form cases already have the correct
2284 emit_insn (base_comparison (mask, operands[4], operands[5]));
2286 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2293 /* FCM returns false for lanes which are unordered, so if we use
2294 the inverse of the comparison we actually want to emit, then
2295 swap the operands to BSL, we will end up with the correct result.
2296 Note that a NE NaN and NaN NE b are true for all a, b.
2298 Our transformations are:
2303 a NE b -> !(a EQ b) */
2306 emit_insn (base_comparison (mask, operands[4], operands[5]));
2308 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2310 swap_bsl_operands = 1;
2313 /* We check (a > b || b > a). combining these comparisons give us
2314 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2315 will then give us (a == b || a UNORDERED b) as intended. */
2317 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2318 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2319 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2320 swap_bsl_operands = 1;
2323 /* Operands are ORDERED iff (a > b || b >= a).
2324 Swapping the operands to BSL will give the UNORDERED case. */
2325 swap_bsl_operands = 1;
2328 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2329 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2330 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2336 if (swap_bsl_operands)
2342 /* If we have (a = (b CMP c) ? -1 : 0);
2343 Then we can simply move the generated mask. */
2345 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2346 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2347 emit_move_insn (operands[0], mask);
2351 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2353 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2354 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2361 (define_expand "vcond<mode><mode>"
2362 [(set (match_operand:VALL 0 "register_operand")
2364 (match_operator 3 "comparison_operator"
2365 [(match_operand:VALL 4 "register_operand")
2366 (match_operand:VALL 5 "nonmemory_operand")])
2367 (match_operand:VALL 1 "nonmemory_operand")
2368 (match_operand:VALL 2 "nonmemory_operand")))]
2371 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2372 operands[2], operands[3],
2373 operands[4], operands[5]));
2377 (define_expand "vcond<v_cmp_result><mode>"
2378 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2379 (if_then_else:<V_cmp_result>
2380 (match_operator 3 "comparison_operator"
2381 [(match_operand:VDQF 4 "register_operand")
2382 (match_operand:VDQF 5 "nonmemory_operand")])
2383 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2384 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2387 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2388 operands[0], operands[1],
2389 operands[2], operands[3],
2390 operands[4], operands[5]));
2394 (define_expand "vcondu<mode><mode>"
2395 [(set (match_operand:VDQ_I 0 "register_operand")
2397 (match_operator 3 "comparison_operator"
2398 [(match_operand:VDQ_I 4 "register_operand")
2399 (match_operand:VDQ_I 5 "nonmemory_operand")])
2400 (match_operand:VDQ_I 1 "nonmemory_operand")
2401 (match_operand:VDQ_I 2 "nonmemory_operand")))]
2404 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2405 operands[2], operands[3],
2406 operands[4], operands[5]));
2410 ;; Patterns for AArch64 SIMD Intrinsics.
2412 ;; Lane extraction with sign extension to general purpose register.
2413 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2414 [(set (match_operand:GPI 0 "register_operand" "=r")
2417 (match_operand:VDQQH 1 "register_operand" "w")
2418 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2421 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2422 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2424 [(set_attr "type" "neon_to_gp<q>")]
2427 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2428 [(set (match_operand:SI 0 "register_operand" "=r")
2431 (match_operand:VDQQH 1 "register_operand" "w")
2432 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2435 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2436 return "umov\\t%w0, %1.<Vetype>[%2]";
2438 [(set_attr "type" "neon_to_gp<q>")]
2441 ;; Lane extraction of a value, neither sign nor zero extension
2442 ;; is guaranteed so upper bits should be considered undefined.
2443 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2444 (define_insn "aarch64_get_lane<mode>"
2445 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2447 (match_operand:VALL 1 "register_operand" "w, w, w")
2448 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2451 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2452 switch (which_alternative)
2455 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2457 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2459 return "st1\\t{%1.<Vetype>}[%2], %0";
2464 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2467 (define_expand "aarch64_get_lanedi"
2468 [(match_operand:DI 0 "register_operand")
2469 (match_operand:DI 1 "register_operand")
2470 (match_operand:SI 2 "immediate_operand")]
2473 aarch64_simd_lane_bounds (operands[2], 0, 1, NULL);
2474 emit_move_insn (operands[0], operands[1]);
2478 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2481 (define_insn "*aarch64_combinez<mode>"
2482 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2484 (match_operand:VD_BHSI 1 "register_operand" "w")
2485 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz")))]
2486 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2487 "mov\\t%0.8b, %1.8b"
2488 [(set_attr "type" "neon_move<q>")]
2491 (define_insn "*aarch64_combinez_be<mode>"
2492 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2494 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz")
2495 (match_operand:VD_BHSI 1 "register_operand" "w")))]
2496 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2497 "mov\\t%0.8b, %1.8b"
2498 [(set_attr "type" "neon_move<q>")]
2501 (define_expand "aarch64_combine<mode>"
2502 [(match_operand:<VDBL> 0 "register_operand")
2503 (match_operand:VDC 1 "register_operand")
2504 (match_operand:VDC 2 "register_operand")]
2508 if (BYTES_BIG_ENDIAN)
2518 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2523 (define_insn_and_split "aarch64_combine_internal<mode>"
2524 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2525 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2526 (match_operand:VDC 2 "register_operand" "w")))]
2529 "&& reload_completed"
2532 if (BYTES_BIG_ENDIAN)
2533 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2535 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2538 [(set_attr "type" "multiple")]
2541 (define_expand "aarch64_simd_combine<mode>"
2542 [(match_operand:<VDBL> 0 "register_operand")
2543 (match_operand:VDC 1 "register_operand")
2544 (match_operand:VDC 2 "register_operand")]
2547 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2548 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2551 [(set_attr "type" "multiple")]
2554 ;; <su><addsub>l<q>.
2556 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2557 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2558 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2559 (match_operand:VQW 1 "register_operand" "w")
2560 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2561 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2562 (match_operand:VQW 2 "register_operand" "w")
2565 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2566 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2569 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2570 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2571 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2572 (match_operand:VQW 1 "register_operand" "w")
2573 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2574 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2575 (match_operand:VQW 2 "register_operand" "w")
2578 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2579 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2583 (define_expand "aarch64_saddl2<mode>"
2584 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2585 (match_operand:VQW 1 "register_operand" "w")
2586 (match_operand:VQW 2 "register_operand" "w")]
2589 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2590 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2595 (define_expand "aarch64_uaddl2<mode>"
2596 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2597 (match_operand:VQW 1 "register_operand" "w")
2598 (match_operand:VQW 2 "register_operand" "w")]
2601 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2602 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2607 (define_expand "aarch64_ssubl2<mode>"
2608 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2609 (match_operand:VQW 1 "register_operand" "w")
2610 (match_operand:VQW 2 "register_operand" "w")]
2613 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2614 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2619 (define_expand "aarch64_usubl2<mode>"
2620 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2621 (match_operand:VQW 1 "register_operand" "w")
2622 (match_operand:VQW 2 "register_operand" "w")]
2625 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2626 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2631 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2632 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2633 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2634 (match_operand:VD_BHSI 1 "register_operand" "w"))
2636 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2638 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2639 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2642 ;; <su><addsub>w<q>.
2644 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2645 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2646 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2648 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2650 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2651 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2654 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2655 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2656 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2659 (match_operand:VQW 2 "register_operand" "w")
2660 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2662 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2663 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2666 (define_expand "aarch64_saddw2<mode>"
2667 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2668 (match_operand:<VWIDE> 1 "register_operand" "w")
2669 (match_operand:VQW 2 "register_operand" "w")]
2672 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2673 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2678 (define_expand "aarch64_uaddw2<mode>"
2679 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2680 (match_operand:<VWIDE> 1 "register_operand" "w")
2681 (match_operand:VQW 2 "register_operand" "w")]
2684 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2685 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2691 (define_expand "aarch64_ssubw2<mode>"
2692 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2693 (match_operand:<VWIDE> 1 "register_operand" "w")
2694 (match_operand:VQW 2 "register_operand" "w")]
2697 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2698 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2703 (define_expand "aarch64_usubw2<mode>"
2704 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2705 (match_operand:<VWIDE> 1 "register_operand" "w")
2706 (match_operand:VQW 2 "register_operand" "w")]
2709 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2710 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2715 ;; <su><r>h<addsub>.
2717 (define_insn "aarch64_<sur>h<addsub><mode>"
2718 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2719 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
2720 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
2723 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2724 [(set_attr "type" "neon_<addsub>_halve<q>")]
2727 ;; <r><addsub>hn<q>.
2729 (define_insn "aarch64_<sur><addsub>hn<mode>"
2730 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2731 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
2732 (match_operand:VQN 2 "register_operand" "w")]
2735 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
2736 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2739 (define_insn "aarch64_<sur><addsub>hn2<mode>"
2740 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2741 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
2742 (match_operand:VQN 2 "register_operand" "w")
2743 (match_operand:VQN 3 "register_operand" "w")]
2746 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
2747 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2752 (define_insn "aarch64_pmul<mode>"
2753 [(set (match_operand:VB 0 "register_operand" "=w")
2754 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
2755 (match_operand:VB 2 "register_operand" "w")]
2758 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2759 [(set_attr "type" "neon_mul_<Vetype><q>")]
2764 (define_insn "aarch64_<su_optab><optab><mode>"
2765 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2766 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
2767 (match_operand:VSDQ_I 2 "register_operand" "w")))]
2769 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2770 [(set_attr "type" "neon_<optab><q>")]
2773 ;; suqadd and usqadd
2775 (define_insn "aarch64_<sur>qadd<mode>"
2776 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2777 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
2778 (match_operand:VSDQ_I 2 "register_operand" "w")]
2781 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
2782 [(set_attr "type" "neon_qadd<q>")]
2787 (define_insn "aarch64_sqmovun<mode>"
2788 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2789 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2792 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2793 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2796 ;; sqmovn and uqmovn
2798 (define_insn "aarch64_<sur>qmovn<mode>"
2799 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2800 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2803 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2804 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2809 (define_insn "aarch64_s<optab><mode>"
2810 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2812 (match_operand:VSDQ_I 1 "register_operand" "w")))]
2814 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
2815 [(set_attr "type" "neon_<optab><q>")]
2820 (define_insn "aarch64_sq<r>dmulh<mode>"
2821 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
2823 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
2824 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
2827 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2828 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
2833 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2834 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2836 [(match_operand:VDQHS 1 "register_operand" "w")
2838 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2839 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2843 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
2844 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2845 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2848 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2849 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2851 [(match_operand:VDQHS 1 "register_operand" "w")
2853 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2854 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2858 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
2859 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2860 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2863 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2864 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2866 [(match_operand:SD_HSI 1 "register_operand" "w")
2868 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2869 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2873 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
2874 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2875 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2878 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2879 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2881 [(match_operand:SD_HSI 1 "register_operand" "w")
2883 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2884 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2888 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
2889 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2890 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2895 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
2896 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2898 (match_operand:<VWIDE> 1 "register_operand" "0")
2901 (sign_extend:<VWIDE>
2902 (match_operand:VSD_HSI 2 "register_operand" "w"))
2903 (sign_extend:<VWIDE>
2904 (match_operand:VSD_HSI 3 "register_operand" "w")))
2907 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
2908 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
2913 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
2914 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2916 (match_operand:<VWIDE> 1 "register_operand" "0")
2919 (sign_extend:<VWIDE>
2920 (match_operand:VD_HSI 2 "register_operand" "w"))
2921 (sign_extend:<VWIDE>
2922 (vec_duplicate:VD_HSI
2924 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2925 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2930 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
2932 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2934 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2937 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
2938 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2940 (match_operand:<VWIDE> 1 "register_operand" "0")
2943 (sign_extend:<VWIDE>
2944 (match_operand:VD_HSI 2 "register_operand" "w"))
2945 (sign_extend:<VWIDE>
2946 (vec_duplicate:VD_HSI
2948 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2949 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2954 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
2956 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2958 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2961 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
2962 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2964 (match_operand:<VWIDE> 1 "register_operand" "0")
2967 (sign_extend:<VWIDE>
2968 (match_operand:SD_HSI 2 "register_operand" "w"))
2969 (sign_extend:<VWIDE>
2971 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2972 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2977 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
2979 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2981 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2984 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
2985 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2987 (match_operand:<VWIDE> 1 "register_operand" "0")
2990 (sign_extend:<VWIDE>
2991 (match_operand:SD_HSI 2 "register_operand" "w"))
2992 (sign_extend:<VWIDE>
2994 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2995 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3000 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3002 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3004 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3009 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3010 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3012 (match_operand:<VWIDE> 1 "register_operand" "0")
3015 (sign_extend:<VWIDE>
3016 (match_operand:VD_HSI 2 "register_operand" "w"))
3017 (sign_extend:<VWIDE>
3018 (vec_duplicate:VD_HSI
3019 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3022 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3023 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3028 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3029 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3031 (match_operand:<VWIDE> 1 "register_operand" "0")
3034 (sign_extend:<VWIDE>
3036 (match_operand:VQ_HSI 2 "register_operand" "w")
3037 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3038 (sign_extend:<VWIDE>
3040 (match_operand:VQ_HSI 3 "register_operand" "w")
3044 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3045 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3048 (define_expand "aarch64_sqdmlal2<mode>"
3049 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3050 (match_operand:<VWIDE> 1 "register_operand" "w")
3051 (match_operand:VQ_HSI 2 "register_operand" "w")
3052 (match_operand:VQ_HSI 3 "register_operand" "w")]
3055 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3056 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3057 operands[2], operands[3], p));
3061 (define_expand "aarch64_sqdmlsl2<mode>"
3062 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3063 (match_operand:<VWIDE> 1 "register_operand" "w")
3064 (match_operand:VQ_HSI 2 "register_operand" "w")
3065 (match_operand:VQ_HSI 3 "register_operand" "w")]
3068 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3069 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3070 operands[2], operands[3], p));
3076 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3077 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3079 (match_operand:<VWIDE> 1 "register_operand" "0")
3082 (sign_extend:<VWIDE>
3084 (match_operand:VQ_HSI 2 "register_operand" "w")
3085 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3086 (sign_extend:<VWIDE>
3087 (vec_duplicate:<VHALF>
3089 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3090 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3095 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3097 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3099 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3102 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3103 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3105 (match_operand:<VWIDE> 1 "register_operand" "0")
3108 (sign_extend:<VWIDE>
3110 (match_operand:VQ_HSI 2 "register_operand" "w")
3111 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3112 (sign_extend:<VWIDE>
3113 (vec_duplicate:<VHALF>
3115 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3116 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3121 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3123 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3125 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3128 (define_expand "aarch64_sqdmlal2_lane<mode>"
3129 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3130 (match_operand:<VWIDE> 1 "register_operand" "w")
3131 (match_operand:VQ_HSI 2 "register_operand" "w")
3132 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3133 (match_operand:SI 4 "immediate_operand" "i")]
3136 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3137 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3138 operands[2], operands[3],
3143 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3144 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3145 (match_operand:<VWIDE> 1 "register_operand" "w")
3146 (match_operand:VQ_HSI 2 "register_operand" "w")
3147 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3148 (match_operand:SI 4 "immediate_operand" "i")]
3151 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3152 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3153 operands[2], operands[3],
3158 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3159 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3160 (match_operand:<VWIDE> 1 "register_operand" "w")
3161 (match_operand:VQ_HSI 2 "register_operand" "w")
3162 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3163 (match_operand:SI 4 "immediate_operand" "i")]
3166 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3167 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3168 operands[2], operands[3],
3173 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3174 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3175 (match_operand:<VWIDE> 1 "register_operand" "w")
3176 (match_operand:VQ_HSI 2 "register_operand" "w")
3177 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3178 (match_operand:SI 4 "immediate_operand" "i")]
3181 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3182 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3183 operands[2], operands[3],
3188 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3189 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3191 (match_operand:<VWIDE> 1 "register_operand" "0")
3194 (sign_extend:<VWIDE>
3196 (match_operand:VQ_HSI 2 "register_operand" "w")
3197 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3198 (sign_extend:<VWIDE>
3199 (vec_duplicate:<VHALF>
3200 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3203 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3204 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3207 (define_expand "aarch64_sqdmlal2_n<mode>"
3208 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3209 (match_operand:<VWIDE> 1 "register_operand" "w")
3210 (match_operand:VQ_HSI 2 "register_operand" "w")
3211 (match_operand:<VEL> 3 "register_operand" "w")]
3214 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3215 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3216 operands[2], operands[3],
3221 (define_expand "aarch64_sqdmlsl2_n<mode>"
3222 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3223 (match_operand:<VWIDE> 1 "register_operand" "w")
3224 (match_operand:VQ_HSI 2 "register_operand" "w")
3225 (match_operand:<VEL> 3 "register_operand" "w")]
3228 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3229 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3230 operands[2], operands[3],
3237 (define_insn "aarch64_sqdmull<mode>"
3238 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3241 (sign_extend:<VWIDE>
3242 (match_operand:VSD_HSI 1 "register_operand" "w"))
3243 (sign_extend:<VWIDE>
3244 (match_operand:VSD_HSI 2 "register_operand" "w")))
3247 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3248 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3253 (define_insn "aarch64_sqdmull_lane<mode>"
3254 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3257 (sign_extend:<VWIDE>
3258 (match_operand:VD_HSI 1 "register_operand" "w"))
3259 (sign_extend:<VWIDE>
3260 (vec_duplicate:VD_HSI
3262 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3263 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3268 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3269 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3271 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3274 (define_insn "aarch64_sqdmull_laneq<mode>"
3275 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3278 (sign_extend:<VWIDE>
3279 (match_operand:VD_HSI 1 "register_operand" "w"))
3280 (sign_extend:<VWIDE>
3281 (vec_duplicate:VD_HSI
3283 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3284 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3289 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3290 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3292 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3295 (define_insn "aarch64_sqdmull_lane<mode>"
3296 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3299 (sign_extend:<VWIDE>
3300 (match_operand:SD_HSI 1 "register_operand" "w"))
3301 (sign_extend:<VWIDE>
3303 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3304 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3309 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3310 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3312 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3315 (define_insn "aarch64_sqdmull_laneq<mode>"
3316 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3319 (sign_extend:<VWIDE>
3320 (match_operand:SD_HSI 1 "register_operand" "w"))
3321 (sign_extend:<VWIDE>
3323 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3324 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3329 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3330 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3332 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3337 (define_insn "aarch64_sqdmull_n<mode>"
3338 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3341 (sign_extend:<VWIDE>
3342 (match_operand:VD_HSI 1 "register_operand" "w"))
3343 (sign_extend:<VWIDE>
3344 (vec_duplicate:VD_HSI
3345 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3349 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3350 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3357 (define_insn "aarch64_sqdmull2<mode>_internal"
3358 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3361 (sign_extend:<VWIDE>
3363 (match_operand:VQ_HSI 1 "register_operand" "w")
3364 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3365 (sign_extend:<VWIDE>
3367 (match_operand:VQ_HSI 2 "register_operand" "w")
3372 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3373 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3376 (define_expand "aarch64_sqdmull2<mode>"
3377 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3378 (match_operand:VQ_HSI 1 "register_operand" "w")
3379 (match_operand:VQ_HSI 2 "register_operand" "w")]
3382 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3383 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3390 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3391 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3394 (sign_extend:<VWIDE>
3396 (match_operand:VQ_HSI 1 "register_operand" "w")
3397 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3398 (sign_extend:<VWIDE>
3399 (vec_duplicate:<VHALF>
3401 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3402 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3407 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3408 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3410 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3413 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3414 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3417 (sign_extend:<VWIDE>
3419 (match_operand:VQ_HSI 1 "register_operand" "w")
3420 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3421 (sign_extend:<VWIDE>
3422 (vec_duplicate:<VHALF>
3424 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3425 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3430 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3431 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3433 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3436 (define_expand "aarch64_sqdmull2_lane<mode>"
3437 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3438 (match_operand:VQ_HSI 1 "register_operand" "w")
3439 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3440 (match_operand:SI 3 "immediate_operand" "i")]
3443 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3444 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3445 operands[2], operands[3],
3450 (define_expand "aarch64_sqdmull2_laneq<mode>"
3451 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3452 (match_operand:VQ_HSI 1 "register_operand" "w")
3453 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3454 (match_operand:SI 3 "immediate_operand" "i")]
3457 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3458 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3459 operands[2], operands[3],
3466 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3467 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3470 (sign_extend:<VWIDE>
3472 (match_operand:VQ_HSI 1 "register_operand" "w")
3473 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3474 (sign_extend:<VWIDE>
3475 (vec_duplicate:<VHALF>
3476 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3480 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3481 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3484 (define_expand "aarch64_sqdmull2_n<mode>"
3485 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3486 (match_operand:VQ_HSI 1 "register_operand" "w")
3487 (match_operand:<VEL> 2 "register_operand" "w")]
3490 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3491 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3498 (define_insn "aarch64_<sur>shl<mode>"
3499 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3501 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3502 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3505 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3506 [(set_attr "type" "neon_shift_reg<q>")]
3512 (define_insn "aarch64_<sur>q<r>shl<mode>"
3513 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3515 [(match_operand:VSDQ_I 1 "register_operand" "w")
3516 (match_operand:VSDQ_I 2 "register_operand" "w")]
3519 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3520 [(set_attr "type" "neon_sat_shift_reg<q>")]
3525 (define_insn "aarch64_<sur>shll_n<mode>"
3526 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3527 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
3529 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
3533 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3534 if (INTVAL (operands[2]) == bit_width)
3536 return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3539 return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3541 [(set_attr "type" "neon_shift_imm_long")]
3546 (define_insn "aarch64_<sur>shll2_n<mode>"
3547 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3548 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
3549 (match_operand:SI 2 "immediate_operand" "i")]
3553 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3554 if (INTVAL (operands[2]) == bit_width)
3556 return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3559 return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3561 [(set_attr "type" "neon_shift_imm_long")]
3566 (define_insn "aarch64_<sur>shr_n<mode>"
3567 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3568 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3570 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3573 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3574 [(set_attr "type" "neon_sat_shift_imm<q>")]
3579 (define_insn "aarch64_<sur>sra_n<mode>"
3580 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3581 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3582 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3584 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3587 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3588 [(set_attr "type" "neon_shift_acc<q>")]
3593 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
3594 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3595 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3596 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3598 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
3601 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3602 [(set_attr "type" "neon_shift_imm<q>")]
3607 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
3608 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3609 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
3611 "aarch64_simd_shift_imm_<ve_mode>" "i")]
3614 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3615 [(set_attr "type" "neon_sat_shift_imm<q>")]
3621 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
3622 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3623 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
3625 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3628 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
3629 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3633 ;; cm(eq|ge|gt|lt|le)
3634 ;; Note, we have constraints for Dz and Z as different expanders
3635 ;; have different ideas of what should be passed to this pattern.
3637 (define_insn "aarch64_cm<optab><mode>"
3638 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3640 (COMPARISONS:<V_cmp_result>
3641 (match_operand:VDQ_I 1 "register_operand" "w,w")
3642 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3646 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3647 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
3648 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
3651 (define_insn_and_split "aarch64_cm<optab>di"
3652 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
3655 (match_operand:DI 1 "register_operand" "w,w,r")
3656 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
3658 (clobber (reg:CC CC_REGNUM))]
3662 [(set (match_operand:DI 0 "register_operand")
3665 (match_operand:DI 1 "register_operand")
3666 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
3669 /* If we are in the general purpose register file,
3670 we split to a sequence of comparison and store. */
3671 if (GP_REGNUM_P (REGNO (operands[0]))
3672 && GP_REGNUM_P (REGNO (operands[1])))
3674 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
3675 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3676 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3677 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3680 /* Otherwise, we expand to a similar pattern which does not
3681 clobber CC_REGNUM. */
3683 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
3686 (define_insn "*aarch64_cm<optab>di"
3687 [(set (match_operand:DI 0 "register_operand" "=w,w")
3690 (match_operand:DI 1 "register_operand" "w,w")
3691 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3693 "TARGET_SIMD && reload_completed"
3695 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
3696 cm<optab>\t%d0, %d1, #0"
3697 [(set_attr "type" "neon_compare, neon_compare_zero")]
3702 (define_insn "aarch64_cm<optab><mode>"
3703 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3705 (UCOMPARISONS:<V_cmp_result>
3706 (match_operand:VDQ_I 1 "register_operand" "w")
3707 (match_operand:VDQ_I 2 "register_operand" "w")
3710 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3711 [(set_attr "type" "neon_compare<q>")]
3714 (define_insn_and_split "aarch64_cm<optab>di"
3715 [(set (match_operand:DI 0 "register_operand" "=w,r")
3718 (match_operand:DI 1 "register_operand" "w,r")
3719 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
3721 (clobber (reg:CC CC_REGNUM))]
3725 [(set (match_operand:DI 0 "register_operand")
3728 (match_operand:DI 1 "register_operand")
3729 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
3732 /* If we are in the general purpose register file,
3733 we split to a sequence of comparison and store. */
3734 if (GP_REGNUM_P (REGNO (operands[0]))
3735 && GP_REGNUM_P (REGNO (operands[1])))
3737 machine_mode mode = CCmode;
3738 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3739 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3740 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3743 /* Otherwise, we expand to a similar pattern which does not
3744 clobber CC_REGNUM. */
3746 [(set_attr "type" "neon_compare,multiple")]
3749 (define_insn "*aarch64_cm<optab>di"
3750 [(set (match_operand:DI 0 "register_operand" "=w")
3753 (match_operand:DI 1 "register_operand" "w")
3754 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
3756 "TARGET_SIMD && reload_completed"
3757 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
3758 [(set_attr "type" "neon_compare")]
3763 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
3764 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
3765 ;; not (neg (eq (and x y) 0))
3766 ;; which is rewritten by simplify_rtx as
3767 ;; plus (eq (and x y) 0) -1.
3769 (define_insn "aarch64_cmtst<mode>"
3770 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3771 (plus:<V_cmp_result>
3774 (match_operand:VDQ_I 1 "register_operand" "w")
3775 (match_operand:VDQ_I 2 "register_operand" "w"))
3776 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
3777 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
3780 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3781 [(set_attr "type" "neon_tst<q>")]
3784 (define_insn_and_split "aarch64_cmtstdi"
3785 [(set (match_operand:DI 0 "register_operand" "=w,r")
3789 (match_operand:DI 1 "register_operand" "w,r")
3790 (match_operand:DI 2 "register_operand" "w,r"))
3792 (clobber (reg:CC CC_REGNUM))]
3796 [(set (match_operand:DI 0 "register_operand")
3800 (match_operand:DI 1 "register_operand")
3801 (match_operand:DI 2 "register_operand"))
3804 /* If we are in the general purpose register file,
3805 we split to a sequence of comparison and store. */
3806 if (GP_REGNUM_P (REGNO (operands[0]))
3807 && GP_REGNUM_P (REGNO (operands[1])))
3809 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
3810 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
3811 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
3812 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
3813 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3816 /* Otherwise, we expand to a similar pattern which does not
3817 clobber CC_REGNUM. */
3819 [(set_attr "type" "neon_tst,multiple")]
3822 (define_insn "*aarch64_cmtstdi"
3823 [(set (match_operand:DI 0 "register_operand" "=w")
3827 (match_operand:DI 1 "register_operand" "w")
3828 (match_operand:DI 2 "register_operand" "w"))
3831 "cmtst\t%d0, %d1, %d2"
3832 [(set_attr "type" "neon_tst")]
3835 ;; fcm(eq|ge|gt|le|lt)
3837 (define_insn "aarch64_cm<optab><mode>"
3838 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3840 (COMPARISONS:<V_cmp_result>
3841 (match_operand:VALLF 1 "register_operand" "w,w")
3842 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
3846 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3847 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
3848 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3852 ;; Note we can also handle what would be fac(le|lt) by
3853 ;; generating fac(ge|gt).
3855 (define_insn "*aarch64_fac<optab><mode>"
3856 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3858 (FAC_COMPARISONS:<V_cmp_result>
3859 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
3860 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
3863 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3864 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3869 (define_insn "aarch64_addp<mode>"
3870 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
3872 [(match_operand:VD_BHSI 1 "register_operand" "w")
3873 (match_operand:VD_BHSI 2 "register_operand" "w")]
3876 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3877 [(set_attr "type" "neon_reduc_add<q>")]
3880 (define_insn "aarch64_addpdi"
3881 [(set (match_operand:DI 0 "register_operand" "=w")
3883 [(match_operand:V2DI 1 "register_operand" "w")]
3887 [(set_attr "type" "neon_reduc_add")]
3892 (define_insn "sqrt<mode>2"
3893 [(set (match_operand:VDQF 0 "register_operand" "=w")
3894 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
3896 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
3897 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
3900 ;; Patterns for vector struct loads and stores.
3902 (define_insn "vec_load_lanesoi<mode>"
3903 [(set (match_operand:OI 0 "register_operand" "=w")
3904 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
3905 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3908 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3909 [(set_attr "type" "neon_load2_2reg<q>")]
3912 (define_insn "aarch64_simd_ld2r<mode>"
3913 [(set (match_operand:OI 0 "register_operand" "=w")
3914 (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3915 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3918 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3919 [(set_attr "type" "neon_load2_all_lanes<q>")]
3922 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
3923 [(set (match_operand:OI 0 "register_operand" "=w")
3924 (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3925 (match_operand:OI 2 "register_operand" "0")
3926 (match_operand:SI 3 "immediate_operand" "i")
3927 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3930 "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"
3931 [(set_attr "type" "neon_load2_one_lane")]
3934 (define_insn "vec_store_lanesoi<mode>"
3935 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3936 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
3937 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3940 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
3941 [(set_attr "type" "neon_store2_2reg<q>")]
3944 (define_insn "vec_store_lanesoi_lane<mode>"
3945 [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
3946 (unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
3947 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
3948 (match_operand:SI 2 "immediate_operand" "i")]
3951 "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"
3952 [(set_attr "type" "neon_store3_one_lane<q>")]
3955 (define_insn "vec_load_lanesci<mode>"
3956 [(set (match_operand:CI 0 "register_operand" "=w")
3957 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
3958 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3961 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3962 [(set_attr "type" "neon_load3_3reg<q>")]
3965 (define_insn "aarch64_simd_ld3r<mode>"
3966 [(set (match_operand:CI 0 "register_operand" "=w")
3967 (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3968 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3971 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3972 [(set_attr "type" "neon_load3_all_lanes<q>")]
3975 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
3976 [(set (match_operand:CI 0 "register_operand" "=w")
3977 (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3978 (match_operand:CI 2 "register_operand" "0")
3979 (match_operand:SI 3 "immediate_operand" "i")
3980 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3983 "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"
3984 [(set_attr "type" "neon_load3_one_lane")]
3987 (define_insn "vec_store_lanesci<mode>"
3988 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
3989 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
3990 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3993 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
3994 [(set_attr "type" "neon_store3_3reg<q>")]
3997 (define_insn "vec_store_lanesci_lane<mode>"
3998 [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
3999 (unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
4000 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4001 (match_operand:SI 2 "immediate_operand" "i")]
4004 "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"
4005 [(set_attr "type" "neon_store3_one_lane<q>")]
4008 (define_insn "vec_load_lanesxi<mode>"
4009 [(set (match_operand:XI 0 "register_operand" "=w")
4010 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4011 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4014 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4015 [(set_attr "type" "neon_load4_4reg<q>")]
4018 (define_insn "aarch64_simd_ld4r<mode>"
4019 [(set (match_operand:XI 0 "register_operand" "=w")
4020 (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
4021 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4024 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4025 [(set_attr "type" "neon_load4_all_lanes<q>")]
4028 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4029 [(set (match_operand:XI 0 "register_operand" "=w")
4030 (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
4031 (match_operand:XI 2 "register_operand" "0")
4032 (match_operand:SI 3 "immediate_operand" "i")
4033 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4036 "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"
4037 [(set_attr "type" "neon_load4_one_lane")]
4040 (define_insn "vec_store_lanesxi<mode>"
4041 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4042 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4043 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4046 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4047 [(set_attr "type" "neon_store4_4reg<q>")]
4050 (define_insn "vec_store_lanesxi_lane<mode>"
4051 [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
4052 (unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
4053 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4054 (match_operand:SI 2 "immediate_operand" "i")]
4057 "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"
4058 [(set_attr "type" "neon_store4_one_lane<q>")]
4061 ;; Reload patterns for AdvSIMD register list operands.
4063 (define_expand "mov<mode>"
4064 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "")
4065 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))]
4068 if (can_create_pseudo_p ())
4070 if (GET_CODE (operands[0]) != REG)
4071 operands[1] = force_reg (<MODE>mode, operands[1]);
4075 (define_insn "*aarch64_mov<mode>"
4076 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4077 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4079 && (register_operand (operands[0], <MODE>mode)
4080 || register_operand (operands[1], <MODE>mode))"
4083 switch (which_alternative)
4086 case 1: return "st1\\t{%S1.16b - %<Vendreg>1.16b}, %0";
4087 case 2: return "ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1";
4088 default: gcc_unreachable ();
4091 [(set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\
4092 neon_load<nregs>_<nregs>reg_q")
4093 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4096 (define_insn "aarch64_be_ld1<mode>"
4097 [(set (match_operand:VALLDI 0 "register_operand" "=w")
4098 (unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
4101 "ld1\\t{%0<Vmtype>}, %1"
4102 [(set_attr "type" "neon_load1_1reg<q>")]
4105 (define_insn "aarch64_be_st1<mode>"
4106 [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
4107 (unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
4110 "st1\\t{%1<Vmtype>}, %0"
4111 [(set_attr "type" "neon_store1_1reg<q>")]
4115 [(set (match_operand:OI 0 "register_operand" "")
4116 (match_operand:OI 1 "register_operand" ""))]
4117 "TARGET_SIMD && reload_completed"
4118 [(set (match_dup 0) (match_dup 1))
4119 (set (match_dup 2) (match_dup 3))]
4121 int rdest = REGNO (operands[0]);
4122 int rsrc = REGNO (operands[1]);
4123 rtx dest[2], src[2];
4125 dest[0] = gen_rtx_REG (TFmode, rdest);
4126 src[0] = gen_rtx_REG (TFmode, rsrc);
4127 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
4128 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
4130 aarch64_simd_disambiguate_copy (operands, dest, src, 2);
4134 [(set (match_operand:CI 0 "register_operand" "")
4135 (match_operand:CI 1 "register_operand" ""))]
4136 "TARGET_SIMD && reload_completed"
4137 [(set (match_dup 0) (match_dup 1))
4138 (set (match_dup 2) (match_dup 3))
4139 (set (match_dup 4) (match_dup 5))]
4141 int rdest = REGNO (operands[0]);
4142 int rsrc = REGNO (operands[1]);
4143 rtx dest[3], src[3];
4145 dest[0] = gen_rtx_REG (TFmode, rdest);
4146 src[0] = gen_rtx_REG (TFmode, rsrc);
4147 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
4148 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
4149 dest[2] = gen_rtx_REG (TFmode, rdest + 2);
4150 src[2] = gen_rtx_REG (TFmode, rsrc + 2);
4152 aarch64_simd_disambiguate_copy (operands, dest, src, 3);
4156 [(set (match_operand:XI 0 "register_operand" "")
4157 (match_operand:XI 1 "register_operand" ""))]
4158 "TARGET_SIMD && reload_completed"
4159 [(set (match_dup 0) (match_dup 1))
4160 (set (match_dup 2) (match_dup 3))
4161 (set (match_dup 4) (match_dup 5))
4162 (set (match_dup 6) (match_dup 7))]
4164 int rdest = REGNO (operands[0]);
4165 int rsrc = REGNO (operands[1]);
4166 rtx dest[4], src[4];
4168 dest[0] = gen_rtx_REG (TFmode, rdest);
4169 src[0] = gen_rtx_REG (TFmode, rsrc);
4170 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
4171 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
4172 dest[2] = gen_rtx_REG (TFmode, rdest + 2);
4173 src[2] = gen_rtx_REG (TFmode, rsrc + 2);
4174 dest[3] = gen_rtx_REG (TFmode, rdest + 3);
4175 src[3] = gen_rtx_REG (TFmode, rsrc + 3);
4177 aarch64_simd_disambiguate_copy (operands, dest, src, 4);
4180 (define_expand "aarch64_ld2r<mode>"
4181 [(match_operand:OI 0 "register_operand" "=w")
4182 (match_operand:DI 1 "register_operand" "w")
4183 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4186 machine_mode mode = <V_TWO_ELEM>mode;
4187 rtx mem = gen_rtx_MEM (mode, operands[1]);
4189 emit_insn (gen_aarch64_simd_ld2r<mode> (operands[0], mem));
4193 (define_expand "aarch64_ld3r<mode>"
4194 [(match_operand:CI 0 "register_operand" "=w")
4195 (match_operand:DI 1 "register_operand" "w")
4196 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4199 machine_mode mode = <V_THREE_ELEM>mode;
4200 rtx mem = gen_rtx_MEM (mode, operands[1]);
4202 emit_insn (gen_aarch64_simd_ld3r<mode> (operands[0], mem));
4206 (define_expand "aarch64_ld4r<mode>"
4207 [(match_operand:XI 0 "register_operand" "=w")
4208 (match_operand:DI 1 "register_operand" "w")
4209 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4212 machine_mode mode = <V_FOUR_ELEM>mode;
4213 rtx mem = gen_rtx_MEM (mode, operands[1]);
4215 emit_insn (gen_aarch64_simd_ld4r<mode> (operands[0],mem));
4219 (define_insn "aarch64_ld2<mode>_dreg"
4220 [(set (match_operand:OI 0 "register_operand" "=w")
4224 (unspec:VD [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
4226 (vec_duplicate:VD (const_int 0)))
4228 (unspec:VD [(match_dup 1)]
4230 (vec_duplicate:VD (const_int 0)))) 0))]
4232 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4233 [(set_attr "type" "neon_load2_2reg<q>")]
4236 (define_insn "aarch64_ld2<mode>_dreg"
4237 [(set (match_operand:OI 0 "register_operand" "=w")
4241 (unspec:DX [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
4245 (unspec:DX [(match_dup 1)]
4247 (const_int 0))) 0))]
4249 "ld1\\t{%S0.1d - %T0.1d}, %1"
4250 [(set_attr "type" "neon_load1_2reg<q>")]
4253 (define_insn "aarch64_ld3<mode>_dreg"
4254 [(set (match_operand:CI 0 "register_operand" "=w")
4259 (unspec:VD [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
4261 (vec_duplicate:VD (const_int 0)))
4263 (unspec:VD [(match_dup 1)]
4265 (vec_duplicate:VD (const_int 0))))
4267 (unspec:VD [(match_dup 1)]
4269 (vec_duplicate:VD (const_int 0)))) 0))]
4271 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4272 [(set_attr "type" "neon_load3_3reg<q>")]
4275 (define_insn "aarch64_ld3<mode>_dreg"
4276 [(set (match_operand:CI 0 "register_operand" "=w")
4281 (unspec:DX [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
4285 (unspec:DX [(match_dup 1)]
4289 (unspec:DX [(match_dup 1)]
4291 (const_int 0))) 0))]
4293 "ld1\\t{%S0.1d - %U0.1d}, %1"
4294 [(set_attr "type" "neon_load1_3reg<q>")]
4297 (define_insn "aarch64_ld4<mode>_dreg"
4298 [(set (match_operand:XI 0 "register_operand" "=w")
4303 (unspec:VD [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
4305 (vec_duplicate:VD (const_int 0)))
4307 (unspec:VD [(match_dup 1)]
4309 (vec_duplicate:VD (const_int 0))))
4312 (unspec:VD [(match_dup 1)]
4314 (vec_duplicate:VD (const_int 0)))
4316 (unspec:VD [(match_dup 1)]
4318 (vec_duplicate:VD (const_int 0))))) 0))]
4320 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4321 [(set_attr "type" "neon_load4_4reg<q>")]
4324 (define_insn "aarch64_ld4<mode>_dreg"
4325 [(set (match_operand:XI 0 "register_operand" "=w")
4330 (unspec:DX [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
4334 (unspec:DX [(match_dup 1)]
4339 (unspec:DX [(match_dup 1)]
4343 (unspec:DX [(match_dup 1)]
4345 (const_int 0)))) 0))]
4347 "ld1\\t{%S0.1d - %V0.1d}, %1"
4348 [(set_attr "type" "neon_load1_4reg<q>")]
4351 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
4352 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4353 (match_operand:DI 1 "register_operand" "r")
4354 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4357 machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
4358 rtx mem = gen_rtx_MEM (mode, operands[1]);
4360 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
4364 (define_expand "aarch64_ld1<VALL:mode>"
4365 [(match_operand:VALL 0 "register_operand")
4366 (match_operand:DI 1 "register_operand")]
4369 machine_mode mode = <VALL:MODE>mode;
4370 rtx mem = gen_rtx_MEM (mode, operands[1]);
4372 if (BYTES_BIG_ENDIAN)
4373 emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem));
4375 emit_move_insn (operands[0], mem);
4379 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
4380 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4381 (match_operand:DI 1 "register_operand" "r")
4382 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4385 machine_mode mode = <VSTRUCT:MODE>mode;
4386 rtx mem = gen_rtx_MEM (mode, operands[1]);
4388 emit_insn (gen_vec_load_lanes<VSTRUCT:mode><VQ:mode> (operands[0], mem));
4392 (define_expand "aarch64_ld2_lane<mode>"
4393 [(match_operand:OI 0 "register_operand" "=w")
4394 (match_operand:DI 1 "register_operand" "w")
4395 (match_operand:OI 2 "register_operand" "0")
4396 (match_operand:SI 3 "immediate_operand" "i")
4397 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4400 machine_mode mode = <V_TWO_ELEM>mode;
4401 rtx mem = gen_rtx_MEM (mode, operands[1]);
4403 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4405 emit_insn (gen_aarch64_vec_load_lanesoi_lane<mode> (operands[0],
4412 (define_expand "aarch64_ld3_lane<mode>"
4413 [(match_operand:CI 0 "register_operand" "=w")
4414 (match_operand:DI 1 "register_operand" "w")
4415 (match_operand:CI 2 "register_operand" "0")
4416 (match_operand:SI 3 "immediate_operand" "i")
4417 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4420 machine_mode mode = <V_THREE_ELEM>mode;
4421 rtx mem = gen_rtx_MEM (mode, operands[1]);
4423 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4425 emit_insn (gen_aarch64_vec_load_lanesci_lane<mode> (operands[0],
4432 (define_expand "aarch64_ld4_lane<mode>"
4433 [(match_operand:XI 0 "register_operand" "=w")
4434 (match_operand:DI 1 "register_operand" "w")
4435 (match_operand:XI 2 "register_operand" "0")
4436 (match_operand:SI 3 "immediate_operand" "i")
4437 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4440 machine_mode mode = <V_FOUR_ELEM>mode;
4441 rtx mem = gen_rtx_MEM (mode, operands[1]);
4443 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4445 emit_insn (gen_aarch64_vec_load_lanesxi_lane<mode> (operands[0],
4454 ;; Expanders for builtins to extract vector registers from large
4455 ;; opaque integer modes.
4459 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
4460 [(match_operand:VDC 0 "register_operand" "=w")
4461 (match_operand:VSTRUCT 1 "register_operand" "w")
4462 (match_operand:SI 2 "immediate_operand" "i")]
4465 int part = INTVAL (operands[2]);
4466 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
4467 int offset = part * 16;
4469 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
4470 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
4476 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
4477 [(match_operand:VQ 0 "register_operand" "=w")
4478 (match_operand:VSTRUCT 1 "register_operand" "w")
4479 (match_operand:SI 2 "immediate_operand" "i")]
4482 int part = INTVAL (operands[2]);
4483 int offset = part * 16;
4485 emit_move_insn (operands[0],
4486 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
4490 ;; Permuted-store expanders for neon intrinsics.
4492 ;; Permute instructions
4496 (define_expand "vec_perm_const<mode>"
4497 [(match_operand:VALL 0 "register_operand")
4498 (match_operand:VALL 1 "register_operand")
4499 (match_operand:VALL 2 "register_operand")
4500 (match_operand:<V_cmp_result> 3)]
4503 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
4504 operands[2], operands[3]))
4510 (define_expand "vec_perm<mode>"
4511 [(match_operand:VB 0 "register_operand")
4512 (match_operand:VB 1 "register_operand")
4513 (match_operand:VB 2 "register_operand")
4514 (match_operand:VB 3 "register_operand")]
4517 aarch64_expand_vec_perm (operands[0], operands[1],
4518 operands[2], operands[3]);
4522 (define_insn "aarch64_tbl1<mode>"
4523 [(set (match_operand:VB 0 "register_operand" "=w")
4524 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
4525 (match_operand:VB 2 "register_operand" "w")]
4528 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
4529 [(set_attr "type" "neon_tbl1<q>")]
4532 ;; Two source registers.
4534 (define_insn "aarch64_tbl2v16qi"
4535 [(set (match_operand:V16QI 0 "register_operand" "=w")
4536 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
4537 (match_operand:V16QI 2 "register_operand" "w")]
4540 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
4541 [(set_attr "type" "neon_tbl2_q")]
4544 (define_insn_and_split "aarch64_combinev16qi"
4545 [(set (match_operand:OI 0 "register_operand" "=w")
4546 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
4547 (match_operand:V16QI 2 "register_operand" "w")]
4551 "&& reload_completed"
4554 aarch64_split_combinev16qi (operands);
4557 [(set_attr "type" "multiple")]
4560 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
4561 [(set (match_operand:VALL 0 "register_operand" "=w")
4562 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
4563 (match_operand:VALL 2 "register_operand" "w")]
4566 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4567 [(set_attr "type" "neon_permute<q>")]
4570 ;; Note immediate (third) operand is lane index not byte index.
4571 (define_insn "aarch64_ext<mode>"
4572 [(set (match_operand:VALL 0 "register_operand" "=w")
4573 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
4574 (match_operand:VALL 2 "register_operand" "w")
4575 (match_operand:SI 3 "immediate_operand" "i")]
4579 operands[3] = GEN_INT (INTVAL (operands[3])
4580 * GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
4581 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
4583 [(set_attr "type" "neon_ext<q>")]
4586 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
4587 [(set (match_operand:VALL 0 "register_operand" "=w")
4588 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")]
4591 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
4592 [(set_attr "type" "neon_rev<q>")]
4595 (define_insn "aarch64_st2<mode>_dreg"
4596 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
4597 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
4598 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4601 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4602 [(set_attr "type" "neon_store2_2reg")]
4605 (define_insn "aarch64_st2<mode>_dreg"
4606 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
4607 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
4608 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4611 "st1\\t{%S1.1d - %T1.1d}, %0"
4612 [(set_attr "type" "neon_store1_2reg")]
4615 (define_insn "aarch64_st3<mode>_dreg"
4616 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
4617 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
4618 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4621 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4622 [(set_attr "type" "neon_store3_3reg")]
4625 (define_insn "aarch64_st3<mode>_dreg"
4626 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
4627 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
4628 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4631 "st1\\t{%S1.1d - %U1.1d}, %0"
4632 [(set_attr "type" "neon_store1_3reg")]
4635 (define_insn "aarch64_st4<mode>_dreg"
4636 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4637 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
4638 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4641 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4642 [(set_attr "type" "neon_store4_4reg")]
4645 (define_insn "aarch64_st4<mode>_dreg"
4646 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4647 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
4648 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4651 "st1\\t{%S1.1d - %V1.1d}, %0"
4652 [(set_attr "type" "neon_store1_4reg")]
4655 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
4656 [(match_operand:DI 0 "register_operand" "r")
4657 (match_operand:VSTRUCT 1 "register_operand" "w")
4658 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4661 machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
4662 rtx mem = gen_rtx_MEM (mode, operands[0]);
4664 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
4668 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
4669 [(match_operand:DI 0 "register_operand" "r")
4670 (match_operand:VSTRUCT 1 "register_operand" "w")
4671 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4674 machine_mode mode = <VSTRUCT:MODE>mode;
4675 rtx mem = gen_rtx_MEM (mode, operands[0]);
4677 emit_insn (gen_vec_store_lanes<VSTRUCT:mode><VQ:mode> (mem, operands[1]));
4681 (define_expand "aarch64_st2_lane<VQ:mode>"
4682 [(match_operand:DI 0 "register_operand" "r")
4683 (match_operand:OI 1 "register_operand" "w")
4684 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4685 (match_operand:SI 2 "immediate_operand")]
4688 machine_mode mode = <V_TWO_ELEM>mode;
4689 rtx mem = gen_rtx_MEM (mode, operands[0]);
4690 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4692 emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem,
4698 (define_expand "aarch64_st3_lane<VQ:mode>"
4699 [(match_operand:DI 0 "register_operand" "r")
4700 (match_operand:CI 1 "register_operand" "w")
4701 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4702 (match_operand:SI 2 "immediate_operand")]
4705 machine_mode mode = <V_THREE_ELEM>mode;
4706 rtx mem = gen_rtx_MEM (mode, operands[0]);
4707 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4709 emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem,
4715 (define_expand "aarch64_st4_lane<VQ:mode>"
4716 [(match_operand:DI 0 "register_operand" "r")
4717 (match_operand:XI 1 "register_operand" "w")
4718 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4719 (match_operand:SI 2 "immediate_operand")]
4722 machine_mode mode = <V_FOUR_ELEM>mode;
4723 rtx mem = gen_rtx_MEM (mode, operands[0]);
4724 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4726 emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem,
4732 (define_expand "aarch64_st1<VALL:mode>"
4733 [(match_operand:DI 0 "register_operand")
4734 (match_operand:VALL 1 "register_operand")]
4737 machine_mode mode = <VALL:MODE>mode;
4738 rtx mem = gen_rtx_MEM (mode, operands[0]);
4740 if (BYTES_BIG_ENDIAN)
4741 emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1]));
4743 emit_move_insn (mem, operands[1]);
4747 ;; Expander for builtins to insert vector registers into large
4748 ;; opaque integer modes.
4750 ;; Q-register list. We don't need a D-reg inserter as we zero
4751 ;; extend them in arm_neon.h and insert the resulting Q-regs.
4753 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
4754 [(match_operand:VSTRUCT 0 "register_operand" "+w")
4755 (match_operand:VSTRUCT 1 "register_operand" "0")
4756 (match_operand:VQ 2 "register_operand" "w")
4757 (match_operand:SI 3 "immediate_operand" "i")]
4760 int part = INTVAL (operands[3]);
4761 int offset = part * 16;
4763 emit_move_insn (operands[0], operands[1]);
4764 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
4769 ;; Standard pattern name vec_init<mode>.
4771 (define_expand "vec_init<mode>"
4772 [(match_operand:VALL 0 "register_operand" "")
4773 (match_operand 1 "" "")]
4776 aarch64_expand_vector_init (operands[0], operands[1]);
4780 (define_insn "*aarch64_simd_ld1r<mode>"
4781 [(set (match_operand:VALL 0 "register_operand" "=w")
4783 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
4785 "ld1r\\t{%0.<Vtype>}, %1"
4786 [(set_attr "type" "neon_load1_all_lanes")]
4789 (define_insn "aarch64_frecpe<mode>"
4790 [(set (match_operand:VDQF 0 "register_operand" "=w")
4791 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
4794 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
4795 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
4798 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
4799 [(set (match_operand:GPF 0 "register_operand" "=w")
4800 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
4803 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
4804 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
4807 (define_insn "aarch64_frecps<mode>"
4808 [(set (match_operand:VALLF 0 "register_operand" "=w")
4809 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
4810 (match_operand:VALLF 2 "register_operand" "w")]
4813 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4814 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
4817 (define_insn "aarch64_urecpe<mode>"
4818 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
4819 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
4822 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
4823 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
4825 ;; Standard pattern name vec_extract<mode>.
4827 (define_expand "vec_extract<mode>"
4828 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
4829 (match_operand:VALL 1 "register_operand" "")
4830 (match_operand:SI 2 "immediate_operand" "")]
4834 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
4840 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
4841 [(set (match_operand:V16QI 0 "register_operand" "=w")
4842 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4843 (match_operand:V16QI 2 "register_operand" "w")]
4845 "TARGET_SIMD && TARGET_CRYPTO"
4846 "aes<aes_op>\\t%0.16b, %2.16b"
4847 [(set_attr "type" "crypto_aese")]
4850 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
4851 [(set (match_operand:V16QI 0 "register_operand" "=w")
4852 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
4854 "TARGET_SIMD && TARGET_CRYPTO"
4855 "aes<aesmc_op>\\t%0.16b, %1.16b"
4856 [(set_attr "type" "crypto_aesmc")]
4861 (define_insn "aarch64_crypto_sha1hsi"
4862 [(set (match_operand:SI 0 "register_operand" "=w")
4863 (unspec:SI [(match_operand:SI 1
4864 "register_operand" "w")]
4866 "TARGET_SIMD && TARGET_CRYPTO"
4868 [(set_attr "type" "crypto_sha1_fast")]
4871 (define_insn "aarch64_crypto_sha1su1v4si"
4872 [(set (match_operand:V4SI 0 "register_operand" "=w")
4873 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4874 (match_operand:V4SI 2 "register_operand" "w")]
4876 "TARGET_SIMD && TARGET_CRYPTO"
4877 "sha1su1\\t%0.4s, %2.4s"
4878 [(set_attr "type" "crypto_sha1_fast")]
4881 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
4882 [(set (match_operand:V4SI 0 "register_operand" "=w")
4883 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4884 (match_operand:SI 2 "register_operand" "w")
4885 (match_operand:V4SI 3 "register_operand" "w")]
4887 "TARGET_SIMD && TARGET_CRYPTO"
4888 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
4889 [(set_attr "type" "crypto_sha1_slow")]
4892 (define_insn "aarch64_crypto_sha1su0v4si"
4893 [(set (match_operand:V4SI 0 "register_operand" "=w")
4894 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4895 (match_operand:V4SI 2 "register_operand" "w")
4896 (match_operand:V4SI 3 "register_operand" "w")]
4898 "TARGET_SIMD && TARGET_CRYPTO"
4899 "sha1su0\\t%0.4s, %2.4s, %3.4s"
4900 [(set_attr "type" "crypto_sha1_xor")]
4905 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
4906 [(set (match_operand:V4SI 0 "register_operand" "=w")
4907 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4908 (match_operand:V4SI 2 "register_operand" "w")
4909 (match_operand:V4SI 3 "register_operand" "w")]
4911 "TARGET_SIMD && TARGET_CRYPTO"
4912 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
4913 [(set_attr "type" "crypto_sha256_slow")]
4916 (define_insn "aarch64_crypto_sha256su0v4si"
4917 [(set (match_operand:V4SI 0 "register_operand" "=w")
4918 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4919 (match_operand:V4SI 2 "register_operand" "w")]
4921 "TARGET_SIMD &&TARGET_CRYPTO"
4922 "sha256su0\\t%0.4s, %2.4s"
4923 [(set_attr "type" "crypto_sha256_fast")]
4926 (define_insn "aarch64_crypto_sha256su1v4si"
4927 [(set (match_operand:V4SI 0 "register_operand" "=w")
4928 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4929 (match_operand:V4SI 2 "register_operand" "w")
4930 (match_operand:V4SI 3 "register_operand" "w")]
4932 "TARGET_SIMD &&TARGET_CRYPTO"
4933 "sha256su1\\t%0.4s, %2.4s, %3.4s"
4934 [(set_attr "type" "crypto_sha256_slow")]
4939 (define_insn "aarch64_crypto_pmulldi"
4940 [(set (match_operand:TI 0 "register_operand" "=w")
4941 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
4942 (match_operand:DI 2 "register_operand" "w")]
4944 "TARGET_SIMD && TARGET_CRYPTO"
4945 "pmull\\t%0.1q, %1.1d, %2.1d"
4946 [(set_attr "type" "neon_mul_d_long")]
4949 (define_insn "aarch64_crypto_pmullv2di"
4950 [(set (match_operand:TI 0 "register_operand" "=w")
4951 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
4952 (match_operand:V2DI 2 "register_operand" "w")]
4954 "TARGET_SIMD && TARGET_CRYPTO"
4955 "pmull2\\t%0.1q, %1.2d, %2.2d"
4956 [(set_attr "type" "neon_mul_d_long")]