1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
23 (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
33 (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ 0 "register_operand" "=w, w")
46 (vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r, w")))]
49 dup\\t%0.<Vtype>, %<vw>1
50 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
51 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQF 0 "register_operand" "=w")
56 (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
58 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
59 [(set_attr "type" "neon_dup<q>")]
62 (define_insn "aarch64_dup_lane<mode>"
63 [(set (match_operand:VALL 0 "register_operand" "=w")
66 (match_operand:VALL 1 "register_operand" "w")
67 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
71 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
72 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
74 [(set_attr "type" "neon_dup<q>")]
77 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
78 [(set (match_operand:VALL 0 "register_operand" "=w")
81 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
82 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
86 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
87 INTVAL (operands[2])));
88 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
90 [(set_attr "type" "neon_dup<q>")]
93 (define_insn "*aarch64_simd_mov<mode>"
94 [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
95 "=w, m, w, ?r, ?w, ?r, w")
96 (match_operand:VD 1 "aarch64_simd_general_operand"
97 "m, w, w, w, r, r, Dn"))]
99 && (register_operand (operands[0], <MODE>mode)
100 || register_operand (operands[1], <MODE>mode))"
102 switch (which_alternative)
104 case 0: return "ldr\\t%d0, %1";
105 case 1: return "str\\t%d1, %0";
106 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
107 case 3: return "umov\t%0, %1.d[0]";
108 case 4: return "ins\t%0.d[0], %1";
109 case 5: return "mov\t%0, %1";
111 return aarch64_output_simd_mov_immediate (operands[1],
113 default: gcc_unreachable ();
116 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
117 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
118 mov_reg, neon_move<q>")]
121 (define_insn "*aarch64_simd_mov<mode>"
122 [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand"
123 "=w, m, w, ?r, ?w, ?r, w")
124 (match_operand:VQ 1 "aarch64_simd_general_operand"
125 "m, w, w, w, r, r, Dn"))]
127 && (register_operand (operands[0], <MODE>mode)
128 || register_operand (operands[1], <MODE>mode))"
130 switch (which_alternative)
133 return "ldr\\t%q0, %1";
135 return "str\\t%q1, %0";
137 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
143 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
148 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
149 neon_logic<q>, multiple, multiple, multiple,\
151 (set_attr "length" "4,4,4,8,8,8,4")]
155 [(set (match_operand:VQ 0 "register_operand" "")
156 (match_operand:VQ 1 "register_operand" ""))]
157 "TARGET_SIMD && reload_completed
158 && GP_REGNUM_P (REGNO (operands[0]))
159 && GP_REGNUM_P (REGNO (operands[1]))"
160 [(set (match_dup 0) (match_dup 1))
161 (set (match_dup 2) (match_dup 3))]
163 int rdest = REGNO (operands[0]);
164 int rsrc = REGNO (operands[1]);
167 dest[0] = gen_rtx_REG (DImode, rdest);
168 src[0] = gen_rtx_REG (DImode, rsrc);
169 dest[1] = gen_rtx_REG (DImode, rdest + 1);
170 src[1] = gen_rtx_REG (DImode, rsrc + 1);
172 aarch64_simd_disambiguate_copy (operands, dest, src, 2);
176 [(set (match_operand:VQ 0 "register_operand" "")
177 (match_operand:VQ 1 "register_operand" ""))]
178 "TARGET_SIMD && reload_completed
179 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
180 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
183 aarch64_split_simd_move (operands[0], operands[1]);
187 (define_expand "aarch64_split_simd_mov<mode>"
188 [(set (match_operand:VQ 0)
189 (match_operand:VQ 1))]
192 rtx dst = operands[0];
193 rtx src = operands[1];
195 if (GP_REGNUM_P (REGNO (src)))
197 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
198 rtx src_high_part = gen_highpart (<VHALF>mode, src);
201 (gen_move_lo_quad_<mode> (dst, src_low_part));
203 (gen_move_hi_quad_<mode> (dst, src_high_part));
208 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
209 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
210 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
211 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
214 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
216 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
222 (define_insn "aarch64_simd_mov_from_<mode>low"
223 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
225 (match_operand:VQ 1 "register_operand" "w")
226 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
227 "TARGET_SIMD && reload_completed"
229 [(set_attr "type" "neon_to_gp<q>")
230 (set_attr "length" "4")
233 (define_insn "aarch64_simd_mov_from_<mode>high"
234 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
236 (match_operand:VQ 1 "register_operand" "w")
237 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
238 "TARGET_SIMD && reload_completed"
240 [(set_attr "type" "neon_to_gp<q>")
241 (set_attr "length" "4")
244 (define_insn "orn<mode>3"
245 [(set (match_operand:VDQ 0 "register_operand" "=w")
246 (ior:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
247 (match_operand:VDQ 2 "register_operand" "w")))]
249 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
250 [(set_attr "type" "neon_logic<q>")]
253 (define_insn "bic<mode>3"
254 [(set (match_operand:VDQ 0 "register_operand" "=w")
255 (and:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
256 (match_operand:VDQ 2 "register_operand" "w")))]
258 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
259 [(set_attr "type" "neon_logic<q>")]
262 (define_insn "add<mode>3"
263 [(set (match_operand:VDQ 0 "register_operand" "=w")
264 (plus:VDQ (match_operand:VDQ 1 "register_operand" "w")
265 (match_operand:VDQ 2 "register_operand" "w")))]
267 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
268 [(set_attr "type" "neon_add<q>")]
271 (define_insn "sub<mode>3"
272 [(set (match_operand:VDQ 0 "register_operand" "=w")
273 (minus:VDQ (match_operand:VDQ 1 "register_operand" "w")
274 (match_operand:VDQ 2 "register_operand" "w")))]
276 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
277 [(set_attr "type" "neon_sub<q>")]
280 (define_insn "mul<mode>3"
281 [(set (match_operand:VDQM 0 "register_operand" "=w")
282 (mult:VDQM (match_operand:VDQM 1 "register_operand" "w")
283 (match_operand:VDQM 2 "register_operand" "w")))]
285 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
286 [(set_attr "type" "neon_mul_<Vetype><q>")]
289 (define_insn "*aarch64_mul3_elt<mode>"
290 [(set (match_operand:VMUL 0 "register_operand" "=w")
294 (match_operand:VMUL 1 "register_operand" "<h_con>")
295 (parallel [(match_operand:SI 2 "immediate_operand")])))
296 (match_operand:VMUL 3 "register_operand" "w")))]
299 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
300 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
302 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
305 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
306 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
307 (mult:VMUL_CHANGE_NLANES
308 (vec_duplicate:VMUL_CHANGE_NLANES
310 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
311 (parallel [(match_operand:SI 2 "immediate_operand")])))
312 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
315 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
316 INTVAL (operands[2])));
317 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
319 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
322 (define_insn "*aarch64_mul3_elt_to_128df"
323 [(set (match_operand:V2DF 0 "register_operand" "=w")
326 (match_operand:DF 2 "register_operand" "w"))
327 (match_operand:V2DF 1 "register_operand" "w")))]
329 "fmul\\t%0.2d, %1.2d, %2.d[0]"
330 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
333 (define_insn "*aarch64_mul3_elt_to_64v2df"
334 [(set (match_operand:DF 0 "register_operand" "=w")
337 (match_operand:V2DF 1 "register_operand" "w")
338 (parallel [(match_operand:SI 2 "immediate_operand")]))
339 (match_operand:DF 3 "register_operand" "w")))]
342 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
343 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
345 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
348 (define_insn "neg<mode>2"
349 [(set (match_operand:VDQ 0 "register_operand" "=w")
350 (neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
352 "neg\t%0.<Vtype>, %1.<Vtype>"
353 [(set_attr "type" "neon_neg<q>")]
356 (define_insn "abs<mode>2"
357 [(set (match_operand:VDQ 0 "register_operand" "=w")
358 (abs:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
360 "abs\t%0.<Vtype>, %1.<Vtype>"
361 [(set_attr "type" "neon_abs<q>")]
364 (define_insn "abd<mode>_3"
365 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
366 (abs:VDQ_BHSI (minus:VDQ_BHSI
367 (match_operand:VDQ_BHSI 1 "register_operand" "w")
368 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
370 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
371 [(set_attr "type" "neon_abd<q>")]
374 (define_insn "aba<mode>_3"
375 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
376 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
377 (match_operand:VDQ_BHSI 1 "register_operand" "w")
378 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
379 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
381 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
382 [(set_attr "type" "neon_arith_acc<q>")]
385 (define_insn "fabd<mode>_3"
386 [(set (match_operand:VDQF 0 "register_operand" "=w")
387 (abs:VDQF (minus:VDQF
388 (match_operand:VDQF 1 "register_operand" "w")
389 (match_operand:VDQF 2 "register_operand" "w"))))]
391 "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
392 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
395 (define_insn "*fabd_scalar<mode>3"
396 [(set (match_operand:GPF 0 "register_operand" "=w")
398 (match_operand:GPF 1 "register_operand" "w")
399 (match_operand:GPF 2 "register_operand" "w"))))]
401 "fabd\t%<s>0, %<s>1, %<s>2"
402 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
405 (define_insn "and<mode>3"
406 [(set (match_operand:VDQ 0 "register_operand" "=w")
407 (and:VDQ (match_operand:VDQ 1 "register_operand" "w")
408 (match_operand:VDQ 2 "register_operand" "w")))]
410 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
411 [(set_attr "type" "neon_logic<q>")]
414 (define_insn "ior<mode>3"
415 [(set (match_operand:VDQ 0 "register_operand" "=w")
416 (ior:VDQ (match_operand:VDQ 1 "register_operand" "w")
417 (match_operand:VDQ 2 "register_operand" "w")))]
419 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
420 [(set_attr "type" "neon_logic<q>")]
423 (define_insn "xor<mode>3"
424 [(set (match_operand:VDQ 0 "register_operand" "=w")
425 (xor:VDQ (match_operand:VDQ 1 "register_operand" "w")
426 (match_operand:VDQ 2 "register_operand" "w")))]
428 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
429 [(set_attr "type" "neon_logic<q>")]
432 (define_insn "one_cmpl<mode>2"
433 [(set (match_operand:VDQ 0 "register_operand" "=w")
434 (not:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
436 "not\t%0.<Vbtype>, %1.<Vbtype>"
437 [(set_attr "type" "neon_logic<q>")]
440 (define_insn "aarch64_simd_vec_set<mode>"
441 [(set (match_operand:VQ_S 0 "register_operand" "=w,w")
444 (match_operand:<VEL> 1 "register_operand" "r,w"))
445 (match_operand:VQ_S 3 "register_operand" "0,0")
446 (match_operand:SI 2 "immediate_operand" "i,i")))]
449 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
450 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
451 switch (which_alternative)
454 return "ins\\t%0.<Vetype>[%p2], %w1";
456 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
461 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>")]
464 (define_insn "aarch64_simd_lshr<mode>"
465 [(set (match_operand:VDQ 0 "register_operand" "=w")
466 (lshiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w")
467 (match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))]
469 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
470 [(set_attr "type" "neon_shift_imm<q>")]
473 (define_insn "aarch64_simd_ashr<mode>"
474 [(set (match_operand:VDQ 0 "register_operand" "=w")
475 (ashiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w")
476 (match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))]
478 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
479 [(set_attr "type" "neon_shift_imm<q>")]
482 (define_insn "aarch64_simd_imm_shl<mode>"
483 [(set (match_operand:VDQ 0 "register_operand" "=w")
484 (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w")
485 (match_operand:VDQ 2 "aarch64_simd_lshift_imm" "Dl")))]
487 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
488 [(set_attr "type" "neon_shift_imm<q>")]
491 (define_insn "aarch64_simd_reg_sshl<mode>"
492 [(set (match_operand:VDQ 0 "register_operand" "=w")
493 (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w")
494 (match_operand:VDQ 2 "register_operand" "w")))]
496 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
497 [(set_attr "type" "neon_shift_reg<q>")]
500 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
501 [(set (match_operand:VDQ 0 "register_operand" "=w")
502 (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")
503 (match_operand:VDQ 2 "register_operand" "w")]
504 UNSPEC_ASHIFT_UNSIGNED))]
506 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
507 [(set_attr "type" "neon_shift_reg<q>")]
510 (define_insn "aarch64_simd_reg_shl<mode>_signed"
511 [(set (match_operand:VDQ 0 "register_operand" "=w")
512 (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")
513 (match_operand:VDQ 2 "register_operand" "w")]
514 UNSPEC_ASHIFT_SIGNED))]
516 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
517 [(set_attr "type" "neon_shift_reg<q>")]
520 (define_expand "ashl<mode>3"
521 [(match_operand:VDQ 0 "register_operand" "")
522 (match_operand:VDQ 1 "register_operand" "")
523 (match_operand:SI 2 "general_operand" "")]
526 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
529 if (CONST_INT_P (operands[2]))
531 shift_amount = INTVAL (operands[2]);
532 if (shift_amount >= 0 && shift_amount < bit_width)
534 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
536 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
543 operands[2] = force_reg (SImode, operands[2]);
546 else if (MEM_P (operands[2]))
548 operands[2] = force_reg (SImode, operands[2]);
551 if (REG_P (operands[2]))
553 rtx tmp = gen_reg_rtx (<MODE>mode);
554 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
555 convert_to_mode (<VEL>mode,
558 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
567 (define_expand "lshr<mode>3"
568 [(match_operand:VDQ 0 "register_operand" "")
569 (match_operand:VDQ 1 "register_operand" "")
570 (match_operand:SI 2 "general_operand" "")]
573 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
576 if (CONST_INT_P (operands[2]))
578 shift_amount = INTVAL (operands[2]);
579 if (shift_amount > 0 && shift_amount <= bit_width)
581 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
583 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
589 operands[2] = force_reg (SImode, operands[2]);
591 else if (MEM_P (operands[2]))
593 operands[2] = force_reg (SImode, operands[2]);
596 if (REG_P (operands[2]))
598 rtx tmp = gen_reg_rtx (SImode);
599 rtx tmp1 = gen_reg_rtx (<MODE>mode);
600 emit_insn (gen_negsi2 (tmp, operands[2]));
601 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
602 convert_to_mode (<VEL>mode,
604 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
614 (define_expand "ashr<mode>3"
615 [(match_operand:VDQ 0 "register_operand" "")
616 (match_operand:VDQ 1 "register_operand" "")
617 (match_operand:SI 2 "general_operand" "")]
620 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
623 if (CONST_INT_P (operands[2]))
625 shift_amount = INTVAL (operands[2]);
626 if (shift_amount > 0 && shift_amount <= bit_width)
628 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
630 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
636 operands[2] = force_reg (SImode, operands[2]);
638 else if (MEM_P (operands[2]))
640 operands[2] = force_reg (SImode, operands[2]);
643 if (REG_P (operands[2]))
645 rtx tmp = gen_reg_rtx (SImode);
646 rtx tmp1 = gen_reg_rtx (<MODE>mode);
647 emit_insn (gen_negsi2 (tmp, operands[2]));
648 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
649 convert_to_mode (<VEL>mode,
651 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
661 (define_expand "vashl<mode>3"
662 [(match_operand:VDQ 0 "register_operand" "")
663 (match_operand:VDQ 1 "register_operand" "")
664 (match_operand:VDQ 2 "register_operand" "")]
667 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
672 ;; Using mode VQ_S as there is no V2DImode neg!
673 ;; Negating individual lanes most certainly offsets the
674 ;; gain from vectorization.
675 (define_expand "vashr<mode>3"
676 [(match_operand:VQ_S 0 "register_operand" "")
677 (match_operand:VQ_S 1 "register_operand" "")
678 (match_operand:VQ_S 2 "register_operand" "")]
681 rtx neg = gen_reg_rtx (<MODE>mode);
682 emit (gen_neg<mode>2 (neg, operands[2]));
683 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
689 (define_expand "aarch64_ashr_simddi"
690 [(match_operand:DI 0 "register_operand" "=w")
691 (match_operand:DI 1 "register_operand" "w")
692 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
695 if (INTVAL (operands[2]) == 64)
696 emit_insn (gen_aarch64_sshr_simddi (operands[0], operands[1]));
698 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
703 ;; SIMD shift by 64. This pattern is a special case as standard pattern does
704 ;; not handle NEON shifts by 64.
705 (define_insn "aarch64_sshr_simddi"
706 [(set (match_operand:DI 0 "register_operand" "=w")
708 [(match_operand:DI 1 "register_operand" "w")] UNSPEC_SSHR64))]
711 [(set_attr "type" "neon_shift_imm")]
714 (define_expand "vlshr<mode>3"
715 [(match_operand:VQ_S 0 "register_operand" "")
716 (match_operand:VQ_S 1 "register_operand" "")
717 (match_operand:VQ_S 2 "register_operand" "")]
720 rtx neg = gen_reg_rtx (<MODE>mode);
721 emit (gen_neg<mode>2 (neg, operands[2]));
722 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
727 (define_expand "aarch64_lshr_simddi"
728 [(match_operand:DI 0 "register_operand" "=w")
729 (match_operand:DI 1 "register_operand" "w")
730 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
733 if (INTVAL (operands[2]) == 64)
734 emit_insn (gen_aarch64_ushr_simddi (operands[0], operands[1]));
736 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
741 ;; SIMD shift by 64. This pattern is a special case as standard pattern does
742 ;; not handle NEON shifts by 64.
743 (define_insn "aarch64_ushr_simddi"
744 [(set (match_operand:DI 0 "register_operand" "=w")
746 [(match_operand:DI 1 "register_operand" "w")] UNSPEC_USHR64))]
749 [(set_attr "type" "neon_shift_imm")]
752 (define_expand "vec_set<mode>"
753 [(match_operand:VQ_S 0 "register_operand")
754 (match_operand:<VEL> 1 "register_operand")
755 (match_operand:SI 2 "immediate_operand")]
758 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
759 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
760 GEN_INT (elem), operands[0]));
765 (define_insn "aarch64_simd_vec_setv2di"
766 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
769 (match_operand:DI 1 "register_operand" "r,w"))
770 (match_operand:V2DI 3 "register_operand" "0,0")
771 (match_operand:SI 2 "immediate_operand" "i,i")))]
774 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
775 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
776 switch (which_alternative)
779 return "ins\\t%0.d[%p2], %1";
781 return "ins\\t%0.d[%p2], %1.d[0]";
786 [(set_attr "type" "neon_from_gp, neon_ins_q")]
789 (define_expand "vec_setv2di"
790 [(match_operand:V2DI 0 "register_operand")
791 (match_operand:DI 1 "register_operand")
792 (match_operand:SI 2 "immediate_operand")]
795 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
796 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
797 GEN_INT (elem), operands[0]));
802 (define_insn "aarch64_simd_vec_set<mode>"
803 [(set (match_operand:VDQF 0 "register_operand" "=w")
806 (match_operand:<VEL> 1 "register_operand" "w"))
807 (match_operand:VDQF 3 "register_operand" "0")
808 (match_operand:SI 2 "immediate_operand" "i")))]
811 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
813 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
814 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
816 [(set_attr "type" "neon_ins<q>")]
819 (define_expand "vec_set<mode>"
820 [(match_operand:VDQF 0 "register_operand" "+w")
821 (match_operand:<VEL> 1 "register_operand" "w")
822 (match_operand:SI 2 "immediate_operand" "")]
825 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
826 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
827 GEN_INT (elem), operands[0]));
833 (define_insn "aarch64_mla<mode>"
834 [(set (match_operand:VQ_S 0 "register_operand" "=w")
835 (plus:VQ_S (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w")
836 (match_operand:VQ_S 3 "register_operand" "w"))
837 (match_operand:VQ_S 1 "register_operand" "0")))]
839 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
840 [(set_attr "type" "neon_mla_<Vetype><q>")]
843 (define_insn "*aarch64_mla_elt<mode>"
844 [(set (match_operand:VDQHS 0 "register_operand" "=w")
849 (match_operand:VDQHS 1 "register_operand" "<h_con>")
850 (parallel [(match_operand:SI 2 "immediate_operand")])))
851 (match_operand:VDQHS 3 "register_operand" "w"))
852 (match_operand:VDQHS 4 "register_operand" "0")))]
855 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
856 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
858 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
861 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
862 [(set (match_operand:VDQHS 0 "register_operand" "=w")
867 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
868 (parallel [(match_operand:SI 2 "immediate_operand")])))
869 (match_operand:VDQHS 3 "register_operand" "w"))
870 (match_operand:VDQHS 4 "register_operand" "0")))]
873 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
874 INTVAL (operands[2])));
875 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
877 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
880 (define_insn "aarch64_mls<mode>"
881 [(set (match_operand:VQ_S 0 "register_operand" "=w")
882 (minus:VQ_S (match_operand:VQ_S 1 "register_operand" "0")
883 (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w")
884 (match_operand:VQ_S 3 "register_operand" "w"))))]
886 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
887 [(set_attr "type" "neon_mla_<Vetype><q>")]
890 (define_insn "*aarch64_mls_elt<mode>"
891 [(set (match_operand:VDQHS 0 "register_operand" "=w")
893 (match_operand:VDQHS 4 "register_operand" "0")
897 (match_operand:VDQHS 1 "register_operand" "<h_con>")
898 (parallel [(match_operand:SI 2 "immediate_operand")])))
899 (match_operand:VDQHS 3 "register_operand" "w"))))]
902 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
903 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
905 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
908 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
909 [(set (match_operand:VDQHS 0 "register_operand" "=w")
911 (match_operand:VDQHS 4 "register_operand" "0")
915 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
916 (parallel [(match_operand:SI 2 "immediate_operand")])))
917 (match_operand:VDQHS 3 "register_operand" "w"))))]
920 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
921 INTVAL (operands[2])));
922 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
924 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
927 ;; Max/Min operations.
928 (define_insn "<su><maxmin><mode>3"
929 [(set (match_operand:VQ_S 0 "register_operand" "=w")
930 (MAXMIN:VQ_S (match_operand:VQ_S 1 "register_operand" "w")
931 (match_operand:VQ_S 2 "register_operand" "w")))]
933 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
934 [(set_attr "type" "neon_minmax<q>")]
937 ;; Move into low-half clearing high half to 0.
939 (define_insn "move_lo_quad_<mode>"
940 [(set (match_operand:VQ 0 "register_operand" "=w,w,w")
942 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
943 (vec_duplicate:<VHALF> (const_int 0))))]
949 [(set_attr "type" "neon_dup<q>,fmov,neon_dup<q>")
950 (set_attr "simd" "yes,*,yes")
951 (set_attr "fp" "*,yes,*")
952 (set_attr "length" "4")]
955 ;; Move into high-half.
957 (define_insn "aarch64_simd_move_hi_quad_<mode>"
958 [(set (match_operand:VQ 0 "register_operand" "+w,w")
962 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
963 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
966 ins\\t%0.d[1], %1.d[0]
968 [(set_attr "type" "neon_ins")
969 (set_attr "length" "4")]
972 (define_expand "move_hi_quad_<mode>"
973 [(match_operand:VQ 0 "register_operand" "")
974 (match_operand:<VHALF> 1 "register_operand" "")]
977 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
978 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
983 ;; Narrowing operations.
986 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
987 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
988 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
990 "xtn\\t%0.<Vntype>, %1.<Vtype>"
991 [(set_attr "type" "neon_shift_imm_narrow_q")]
994 (define_expand "vec_pack_trunc_<mode>"
995 [(match_operand:<VNARROWD> 0 "register_operand" "")
996 (match_operand:VDN 1 "register_operand" "")
997 (match_operand:VDN 2 "register_operand" "")]
1000 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1001 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1002 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1004 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1005 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1006 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1012 (define_insn "vec_pack_trunc_<mode>"
1013 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "+&w")
1014 (vec_concat:<VNARROWQ2>
1015 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1016 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1019 if (BYTES_BIG_ENDIAN)
1020 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1022 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1024 [(set_attr "type" "multiple")
1025 (set_attr "length" "8")]
1028 ;; Widening operations.
1030 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1031 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1032 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1033 (match_operand:VQW 1 "register_operand" "w")
1034 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1037 "<su>shll %0.<Vwtype>, %1.<Vhalftype>, 0"
1038 [(set_attr "type" "neon_shift_imm_long")]
1041 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1042 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1043 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1044 (match_operand:VQW 1 "register_operand" "w")
1045 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1048 "<su>shll2 %0.<Vwtype>, %1.<Vtype>, 0"
1049 [(set_attr "type" "neon_shift_imm_long")]
1052 (define_expand "vec_unpack<su>_hi_<mode>"
1053 [(match_operand:<VWIDE> 0 "register_operand" "")
1054 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1057 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1058 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1064 (define_expand "vec_unpack<su>_lo_<mode>"
1065 [(match_operand:<VWIDE> 0 "register_operand" "")
1066 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1069 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1070 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1076 ;; Widening arithmetic.
1078 (define_insn "*aarch64_<su>mlal_lo<mode>"
1079 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1082 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1083 (match_operand:VQW 2 "register_operand" "w")
1084 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1085 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1086 (match_operand:VQW 4 "register_operand" "w")
1088 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1090 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1091 [(set_attr "type" "neon_mla_<Vetype>_long")]
1094 (define_insn "*aarch64_<su>mlal_hi<mode>"
1095 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1098 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1099 (match_operand:VQW 2 "register_operand" "w")
1100 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1101 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1102 (match_operand:VQW 4 "register_operand" "w")
1104 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1106 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1107 [(set_attr "type" "neon_mla_<Vetype>_long")]
1110 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1111 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1113 (match_operand:<VWIDE> 1 "register_operand" "0")
1115 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1116 (match_operand:VQW 2 "register_operand" "w")
1117 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1118 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1119 (match_operand:VQW 4 "register_operand" "w")
1122 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1123 [(set_attr "type" "neon_mla_<Vetype>_long")]
1126 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1127 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1129 (match_operand:<VWIDE> 1 "register_operand" "0")
1131 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1132 (match_operand:VQW 2 "register_operand" "w")
1133 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1134 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1135 (match_operand:VQW 4 "register_operand" "w")
1138 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1139 [(set_attr "type" "neon_mla_<Vetype>_long")]
1142 (define_insn "*aarch64_<su>mlal<mode>"
1143 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1147 (match_operand:VDW 1 "register_operand" "w"))
1149 (match_operand:VDW 2 "register_operand" "w")))
1150 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1152 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1153 [(set_attr "type" "neon_mla_<Vetype>_long")]
1156 (define_insn "*aarch64_<su>mlsl<mode>"
1157 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1159 (match_operand:<VWIDE> 1 "register_operand" "0")
1162 (match_operand:VDW 2 "register_operand" "w"))
1164 (match_operand:VDW 3 "register_operand" "w")))))]
1166 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1167 [(set_attr "type" "neon_mla_<Vetype>_long")]
1170 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1171 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1172 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1173 (match_operand:VQW 1 "register_operand" "w")
1174 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1175 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1176 (match_operand:VQW 2 "register_operand" "w")
1179 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1180 [(set_attr "type" "neon_mul_<Vetype>_long")]
1183 (define_expand "vec_widen_<su>mult_lo_<mode>"
1184 [(match_operand:<VWIDE> 0 "register_operand" "")
1185 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1186 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1189 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1190 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1197 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1198 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1199 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1200 (match_operand:VQW 1 "register_operand" "w")
1201 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1202 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1203 (match_operand:VQW 2 "register_operand" "w")
1206 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1207 [(set_attr "type" "neon_mul_<Vetype>_long")]
1210 (define_expand "vec_widen_<su>mult_hi_<mode>"
1211 [(match_operand:<VWIDE> 0 "register_operand" "")
1212 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1213 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1216 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1217 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1225 ;; FP vector operations.
1226 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1227 ;; double-precision (64-bit) floating-point data types and arithmetic as
1228 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1229 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1231 ;; Floating-point operations can raise an exception. Vectorizing such
1232 ;; operations are safe because of reasons explained below.
1234 ;; ARMv8 permits an extension to enable trapped floating-point
1235 ;; exception handling, however this is an optional feature. In the
1236 ;; event of a floating-point exception being raised by vectorised
1238 ;; 1. If trapped floating-point exceptions are available, then a trap
1239 ;; will be taken when any lane raises an enabled exception. A trap
1240 ;; handler may determine which lane raised the exception.
1241 ;; 2. Alternatively a sticky exception flag is set in the
1242 ;; floating-point status register (FPSR). Software may explicitly
1243 ;; test the exception flags, in which case the tests will either
1244 ;; prevent vectorisation, allowing precise identification of the
1245 ;; failing operation, or if tested outside of vectorisable regions
1246 ;; then the specific operation and lane are not of interest.
1248 ;; FP arithmetic operations.
1250 (define_insn "add<mode>3"
1251 [(set (match_operand:VDQF 0 "register_operand" "=w")
1252 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1253 (match_operand:VDQF 2 "register_operand" "w")))]
1255 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1256 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1259 (define_insn "sub<mode>3"
1260 [(set (match_operand:VDQF 0 "register_operand" "=w")
1261 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1262 (match_operand:VDQF 2 "register_operand" "w")))]
1264 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1265 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1268 (define_insn "mul<mode>3"
1269 [(set (match_operand:VDQF 0 "register_operand" "=w")
1270 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1271 (match_operand:VDQF 2 "register_operand" "w")))]
1273 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1274 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1277 (define_insn "div<mode>3"
1278 [(set (match_operand:VDQF 0 "register_operand" "=w")
1279 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1280 (match_operand:VDQF 2 "register_operand" "w")))]
1282 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1283 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1286 (define_insn "neg<mode>2"
1287 [(set (match_operand:VDQF 0 "register_operand" "=w")
1288 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1290 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1291 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1294 (define_insn "abs<mode>2"
1295 [(set (match_operand:VDQF 0 "register_operand" "=w")
1296 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1298 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1299 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1302 (define_insn "fma<mode>4"
1303 [(set (match_operand:VDQF 0 "register_operand" "=w")
1304 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1305 (match_operand:VDQF 2 "register_operand" "w")
1306 (match_operand:VDQF 3 "register_operand" "0")))]
1308 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1309 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1312 (define_insn "*aarch64_fma4_elt<mode>"
1313 [(set (match_operand:VDQF 0 "register_operand" "=w")
1317 (match_operand:VDQF 1 "register_operand" "<h_con>")
1318 (parallel [(match_operand:SI 2 "immediate_operand")])))
1319 (match_operand:VDQF 3 "register_operand" "w")
1320 (match_operand:VDQF 4 "register_operand" "0")))]
1323 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1324 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1326 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1329 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1330 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1332 (vec_duplicate:VDQSF
1334 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1335 (parallel [(match_operand:SI 2 "immediate_operand")])))
1336 (match_operand:VDQSF 3 "register_operand" "w")
1337 (match_operand:VDQSF 4 "register_operand" "0")))]
1340 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1341 INTVAL (operands[2])));
1342 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1344 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1347 (define_insn "*aarch64_fma4_elt_to_128df"
1348 [(set (match_operand:V2DF 0 "register_operand" "=w")
1351 (match_operand:DF 1 "register_operand" "w"))
1352 (match_operand:V2DF 2 "register_operand" "w")
1353 (match_operand:V2DF 3 "register_operand" "0")))]
1355 "fmla\\t%0.2d, %2.2d, %1.2d[0]"
1356 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1359 (define_insn "*aarch64_fma4_elt_to_64v2df"
1360 [(set (match_operand:DF 0 "register_operand" "=w")
1363 (match_operand:V2DF 1 "register_operand" "w")
1364 (parallel [(match_operand:SI 2 "immediate_operand")]))
1365 (match_operand:DF 3 "register_operand" "w")
1366 (match_operand:DF 4 "register_operand" "0")))]
1369 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1370 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1372 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1375 (define_insn "fnma<mode>4"
1376 [(set (match_operand:VDQF 0 "register_operand" "=w")
1378 (match_operand:VDQF 1 "register_operand" "w")
1380 (match_operand:VDQF 2 "register_operand" "w"))
1381 (match_operand:VDQF 3 "register_operand" "0")))]
1383 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1384 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1387 (define_insn "*aarch64_fnma4_elt<mode>"
1388 [(set (match_operand:VDQF 0 "register_operand" "=w")
1391 (match_operand:VDQF 3 "register_operand" "w"))
1394 (match_operand:VDQF 1 "register_operand" "<h_con>")
1395 (parallel [(match_operand:SI 2 "immediate_operand")])))
1396 (match_operand:VDQF 4 "register_operand" "0")))]
1399 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1400 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1402 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1405 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1406 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1409 (match_operand:VDQSF 3 "register_operand" "w"))
1410 (vec_duplicate:VDQSF
1412 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1413 (parallel [(match_operand:SI 2 "immediate_operand")])))
1414 (match_operand:VDQSF 4 "register_operand" "0")))]
1417 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1418 INTVAL (operands[2])));
1419 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1421 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1424 (define_insn "*aarch64_fnma4_elt_to_128df"
1425 [(set (match_operand:V2DF 0 "register_operand" "=w")
1428 (match_operand:V2DF 2 "register_operand" "w"))
1430 (match_operand:DF 1 "register_operand" "w"))
1431 (match_operand:V2DF 3 "register_operand" "0")))]
1433 "fmls\\t%0.2d, %2.2d, %1.2d[0]"
1434 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1437 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1438 [(set (match_operand:DF 0 "register_operand" "=w")
1441 (match_operand:V2DF 1 "register_operand" "w")
1442 (parallel [(match_operand:SI 2 "immediate_operand")]))
1444 (match_operand:DF 3 "register_operand" "w"))
1445 (match_operand:DF 4 "register_operand" "0")))]
1448 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1449 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1451 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1454 ;; Vector versions of the floating-point frint patterns.
1455 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
1456 (define_insn "<frint_pattern><mode>2"
1457 [(set (match_operand:VDQF 0 "register_operand" "=w")
1458 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1461 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1462 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1465 ;; Vector versions of the fcvt standard patterns.
1466 ;; Expands to lbtrunc, lround, lceil, lfloor
1467 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1468 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1469 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1470 [(match_operand:VDQF 1 "register_operand" "w")]
1473 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1474 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1477 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1478 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1479 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1480 [(match_operand:VDQF 1 "register_operand")]
1485 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1486 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1487 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1488 [(match_operand:VDQF 1 "register_operand")]
1493 (define_expand "ftrunc<VDQF:mode>2"
1494 [(set (match_operand:VDQF 0 "register_operand")
1495 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1500 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1501 [(set (match_operand:VDQF 0 "register_operand" "=w")
1503 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1505 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1506 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1509 ;; Conversions between vectors of floats and doubles.
1510 ;; Contains a mix of patterns to match standard pattern names
1511 ;; and those for intrinsics.
1513 ;; Float widening operations.
1515 (define_insn "vec_unpacks_lo_v4sf"
1516 [(set (match_operand:V2DF 0 "register_operand" "=w")
1519 (match_operand:V4SF 1 "register_operand" "w")
1520 (parallel [(const_int 0) (const_int 1)])
1523 "fcvtl\\t%0.2d, %1.2s"
1524 [(set_attr "type" "neon_fp_cvt_widen_s")]
1527 (define_insn "aarch64_float_extend_lo_v2df"
1528 [(set (match_operand:V2DF 0 "register_operand" "=w")
1530 (match_operand:V2SF 1 "register_operand" "w")))]
1532 "fcvtl\\t%0.2d, %1.2s"
1533 [(set_attr "type" "neon_fp_cvt_widen_s")]
1536 (define_insn "vec_unpacks_hi_v4sf"
1537 [(set (match_operand:V2DF 0 "register_operand" "=w")
1540 (match_operand:V4SF 1 "register_operand" "w")
1541 (parallel [(const_int 2) (const_int 3)])
1544 "fcvtl2\\t%0.2d, %1.4s"
1545 [(set_attr "type" "neon_fp_cvt_widen_s")]
1548 ;; Float narrowing operations.
1550 (define_insn "aarch64_float_truncate_lo_v2sf"
1551 [(set (match_operand:V2SF 0 "register_operand" "=w")
1552 (float_truncate:V2SF
1553 (match_operand:V2DF 1 "register_operand" "w")))]
1555 "fcvtn\\t%0.2s, %1.2d"
1556 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1559 (define_insn "aarch64_float_truncate_hi_v4sf"
1560 [(set (match_operand:V4SF 0 "register_operand" "=w")
1562 (match_operand:V2SF 1 "register_operand" "0")
1563 (float_truncate:V2SF
1564 (match_operand:V2DF 2 "register_operand" "w"))))]
1566 "fcvtn2\\t%0.4s, %2.2d"
1567 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1570 (define_expand "vec_pack_trunc_v2df"
1571 [(set (match_operand:V4SF 0 "register_operand")
1573 (float_truncate:V2SF
1574 (match_operand:V2DF 1 "register_operand"))
1575 (float_truncate:V2SF
1576 (match_operand:V2DF 2 "register_operand"))
1580 rtx tmp = gen_reg_rtx (V2SFmode);
1581 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1582 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1584 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1585 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1586 tmp, operands[hi]));
1591 (define_expand "vec_pack_trunc_df"
1592 [(set (match_operand:V2SF 0 "register_operand")
1595 (match_operand:DF 1 "register_operand"))
1597 (match_operand:DF 2 "register_operand"))
1601 rtx tmp = gen_reg_rtx (V2SFmode);
1602 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1603 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1605 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1606 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1607 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1612 (define_insn "aarch64_vmls<mode>"
1613 [(set (match_operand:VDQF 0 "register_operand" "=w")
1614 (minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
1615 (mult:VDQF (match_operand:VDQF 2 "register_operand" "w")
1616 (match_operand:VDQF 3 "register_operand" "w"))))]
1618 "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1619 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1623 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1625 ;; a = (b < c) ? b : c;
1626 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1627 ;; either explicitly or indirectly via -ffast-math.
1629 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1630 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1631 ;; operand will be returned when both operands are zero (i.e. they may not
1632 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
1633 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
1636 (define_insn "<su><maxmin><mode>3"
1637 [(set (match_operand:VDQF 0 "register_operand" "=w")
1638 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
1639 (match_operand:VDQF 2 "register_operand" "w")))]
1641 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1642 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1645 (define_insn "<maxmin_uns><mode>3"
1646 [(set (match_operand:VDQF 0 "register_operand" "=w")
1647 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1648 (match_operand:VDQF 2 "register_operand" "w")]
1651 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1652 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1655 ;; 'across lanes' add.
1657 (define_insn "reduc_<sur>plus_<mode>"
1658 [(set (match_operand:VDQV 0 "register_operand" "=w")
1659 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
1662 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
1663 [(set_attr "type" "neon_reduc_add<q>")]
1666 (define_insn "reduc_<sur>plus_v2si"
1667 [(set (match_operand:V2SI 0 "register_operand" "=w")
1668 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1671 "addp\\t%0.2s, %1.2s, %1.2s"
1672 [(set_attr "type" "neon_reduc_add")]
1675 (define_insn "reduc_splus_<mode>"
1676 [(set (match_operand:V2F 0 "register_operand" "=w")
1677 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
1680 "faddp\\t%<Vetype>0, %1.<Vtype>"
1681 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
1684 (define_insn "aarch64_addpv4sf"
1685 [(set (match_operand:V4SF 0 "register_operand" "=w")
1686 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
1689 "faddp\\t%0.4s, %1.4s, %1.4s"
1690 [(set_attr "type" "neon_fp_reduc_add_s_q")]
1693 (define_expand "reduc_splus_v4sf"
1694 [(set (match_operand:V4SF 0 "register_operand")
1695 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
1699 emit_insn (gen_aarch64_addpv4sf (operands[0], operands[1]));
1700 emit_insn (gen_aarch64_addpv4sf (operands[0], operands[0]));
1704 (define_insn "clz<mode>2"
1705 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1706 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1708 "clz\\t%0.<Vtype>, %1.<Vtype>"
1709 [(set_attr "type" "neon_cls<q>")]
1712 ;; 'across lanes' max and min ops.
1714 (define_insn "reduc_<maxmin_uns>_<mode>"
1715 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
1716 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
1719 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
1720 [(set_attr "type" "neon_reduc_minmax<q>")]
1723 (define_insn "reduc_<maxmin_uns>_v2si"
1724 [(set (match_operand:V2SI 0 "register_operand" "=w")
1725 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1728 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
1729 [(set_attr "type" "neon_reduc_minmax")]
1732 (define_insn "reduc_<maxmin_uns>_<mode>"
1733 [(set (match_operand:V2F 0 "register_operand" "=w")
1734 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
1737 "<maxmin_uns_op>p\\t%<Vetype>0, %1.<Vtype>"
1738 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
1741 (define_insn "reduc_<maxmin_uns>_v4sf"
1742 [(set (match_operand:V4SF 0 "register_operand" "=w")
1743 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
1746 "<maxmin_uns_op>v\\t%s0, %1.4s"
1747 [(set_attr "type" "neon_fp_reduc_minmax_s_q")]
1750 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
1752 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
1755 ;; Thus our BSL is of the form:
1756 ;; op0 = bsl (mask, op2, op3)
1757 ;; We can use any of:
1760 ;; bsl mask, op1, op2
1761 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
1762 ;; bit op0, op2, mask
1763 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
1764 ;; bif op0, op1, mask
1766 (define_insn "aarch64_simd_bsl<mode>_internal"
1767 [(set (match_operand:VALLDIF 0 "register_operand" "=w,w,w")
1770 (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
1771 (match_operand:VALLDIF 2 "register_operand" " w,w,0"))
1774 (match_dup:<V_cmp_result> 1))
1775 (match_operand:VALLDIF 3 "register_operand" " w,0,w"))
1779 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
1780 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
1781 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
1782 [(set_attr "type" "neon_bsl<q>")]
1785 (define_expand "aarch64_simd_bsl<mode>"
1786 [(match_operand:VALLDIF 0 "register_operand")
1787 (match_operand:<V_cmp_result> 1 "register_operand")
1788 (match_operand:VALLDIF 2 "register_operand")
1789 (match_operand:VALLDIF 3 "register_operand")]
1792 /* We can't alias operands together if they have different modes. */
1793 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
1794 emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
1795 operands[2], operands[3]));
1799 (define_expand "aarch64_vcond_internal<mode><mode>"
1800 [(set (match_operand:VDQ 0 "register_operand")
1802 (match_operator 3 "comparison_operator"
1803 [(match_operand:VDQ 4 "register_operand")
1804 (match_operand:VDQ 5 "nonmemory_operand")])
1805 (match_operand:VDQ 1 "nonmemory_operand")
1806 (match_operand:VDQ 2 "nonmemory_operand")))]
1809 int inverse = 0, has_zero_imm_form = 0;
1810 rtx op1 = operands[1];
1811 rtx op2 = operands[2];
1812 rtx mask = gen_reg_rtx (<MODE>mode);
1814 switch (GET_CODE (operands[3]))
1824 has_zero_imm_form = 1;
1834 if (!REG_P (operands[5])
1835 && (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form))
1836 operands[5] = force_reg (<MODE>mode, operands[5]);
1838 switch (GET_CODE (operands[3]))
1842 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
1847 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
1852 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
1857 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
1862 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
1875 /* If we have (a = (b CMP c) ? -1 : 0);
1876 Then we can simply move the generated mask. */
1878 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
1879 && op2 == CONST0_RTX (<V_cmp_result>mode))
1880 emit_move_insn (operands[0], mask);
1884 op1 = force_reg (<MODE>mode, op1);
1886 op2 = force_reg (<MODE>mode, op2);
1887 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
1894 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
1895 [(set (match_operand:VDQF_COND 0 "register_operand")
1897 (match_operator 3 "comparison_operator"
1898 [(match_operand:VDQF 4 "register_operand")
1899 (match_operand:VDQF 5 "nonmemory_operand")])
1900 (match_operand:VDQF_COND 1 "nonmemory_operand")
1901 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
1905 int use_zero_form = 0;
1906 int swap_bsl_operands = 0;
1907 rtx op1 = operands[1];
1908 rtx op2 = operands[2];
1909 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
1910 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
1912 rtx (*base_comparison) (rtx, rtx, rtx);
1913 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1915 switch (GET_CODE (operands[3]))
1922 if (operands[5] == CONST0_RTX (<MODE>mode))
1929 if (!REG_P (operands[5]))
1930 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
1933 switch (GET_CODE (operands[3]))
1943 base_comparison = gen_aarch64_cmge<VDQF:mode>;
1944 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
1952 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
1953 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
1958 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
1959 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
1965 switch (GET_CODE (operands[3]))
1972 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
1973 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1979 Note that there also exist direct comparison against 0 forms,
1980 so catch those as a special case. */
1984 switch (GET_CODE (operands[3]))
1987 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
1990 base_comparison = gen_aarch64_cmle<VDQF:mode>;
1993 /* Do nothing, other zero form cases already have the correct
2000 emit_insn (base_comparison (mask, operands[4], operands[5]));
2002 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2009 /* FCM returns false for lanes which are unordered, so if we use
2010 the inverse of the comparison we actually want to emit, then
2011 swap the operands to BSL, we will end up with the correct result.
2012 Note that a NE NaN and NaN NE b are true for all a, b.
2014 Our transformations are:
2019 a NE b -> !(a EQ b) */
2022 emit_insn (base_comparison (mask, operands[4], operands[5]));
2024 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2026 swap_bsl_operands = 1;
2029 /* We check (a > b || b > a). combining these comparisons give us
2030 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2031 will then give us (a == b || a UNORDERED b) as intended. */
2033 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2034 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2035 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2036 swap_bsl_operands = 1;
2039 /* Operands are ORDERED iff (a > b || b >= a).
2040 Swapping the operands to BSL will give the UNORDERED case. */
2041 swap_bsl_operands = 1;
2044 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2045 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2046 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2052 if (swap_bsl_operands)
2058 /* If we have (a = (b CMP c) ? -1 : 0);
2059 Then we can simply move the generated mask. */
2061 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2062 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2063 emit_move_insn (operands[0], mask);
2067 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2069 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2070 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2077 (define_expand "vcond<mode><mode>"
2078 [(set (match_operand:VALL 0 "register_operand")
2080 (match_operator 3 "comparison_operator"
2081 [(match_operand:VALL 4 "register_operand")
2082 (match_operand:VALL 5 "nonmemory_operand")])
2083 (match_operand:VALL 1 "nonmemory_operand")
2084 (match_operand:VALL 2 "nonmemory_operand")))]
2087 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2088 operands[2], operands[3],
2089 operands[4], operands[5]));
2093 (define_expand "vcond<v_cmp_result><mode>"
2094 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2095 (if_then_else:<V_cmp_result>
2096 (match_operator 3 "comparison_operator"
2097 [(match_operand:VDQF 4 "register_operand")
2098 (match_operand:VDQF 5 "nonmemory_operand")])
2099 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2100 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2103 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2104 operands[0], operands[1],
2105 operands[2], operands[3],
2106 operands[4], operands[5]));
2110 (define_expand "vcondu<mode><mode>"
2111 [(set (match_operand:VDQ 0 "register_operand")
2113 (match_operator 3 "comparison_operator"
2114 [(match_operand:VDQ 4 "register_operand")
2115 (match_operand:VDQ 5 "nonmemory_operand")])
2116 (match_operand:VDQ 1 "nonmemory_operand")
2117 (match_operand:VDQ 2 "nonmemory_operand")))]
2120 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2121 operands[2], operands[3],
2122 operands[4], operands[5]));
2126 ;; Patterns for AArch64 SIMD Intrinsics.
2128 (define_expand "aarch64_create<mode>"
2129 [(match_operand:VD_RE 0 "register_operand" "")
2130 (match_operand:DI 1 "general_operand" "")]
2133 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2134 emit_move_insn (operands[0], src);
2138 ;; Lane extraction with sign extension to general purpose register.
2139 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2140 [(set (match_operand:GPI 0 "register_operand" "=r")
2143 (match_operand:VDQQH 1 "register_operand" "w")
2144 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2147 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2148 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2150 [(set_attr "type" "neon_to_gp<q>")]
2153 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2154 [(set (match_operand:SI 0 "register_operand" "=r")
2157 (match_operand:VDQQH 1 "register_operand" "w")
2158 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2161 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2162 return "umov\\t%w0, %1.<Vetype>[%2]";
2164 [(set_attr "type" "neon_to_gp<q>")]
2167 (define_expand "aarch64_be_checked_get_lane<mode>"
2168 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
2169 (match_operand:VALL 1 "register_operand")
2170 (match_operand:SI 2 "immediate_operand")]
2173 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2174 emit_insn (gen_aarch64_get_lane<mode> (operands[0],
2181 ;; Lane extraction of a value, neither sign nor zero extension
2182 ;; is guaranteed so upper bits should be considered undefined.
2183 (define_insn "aarch64_get_lane<mode>"
2184 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2186 (match_operand:VALL 1 "register_operand" "w, w, w")
2187 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2190 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2191 switch (which_alternative)
2194 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2196 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2198 return "st1\\t{%1.<Vetype>}[%2], %0";
2203 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2206 (define_expand "aarch64_get_lanedi"
2207 [(match_operand:DI 0 "register_operand")
2208 (match_operand:DI 1 "register_operand")
2209 (match_operand:SI 2 "immediate_operand")]
2212 aarch64_simd_lane_bounds (operands[2], 0, 1);
2213 emit_move_insn (operands[0], operands[1]);
2217 (define_expand "aarch64_reinterpretv8qi<mode>"
2218 [(match_operand:V8QI 0 "register_operand" "")
2219 (match_operand:VDC 1 "register_operand" "")]
2222 aarch64_simd_reinterpret (operands[0], operands[1]);
2226 (define_expand "aarch64_reinterpretv4hi<mode>"
2227 [(match_operand:V4HI 0 "register_operand" "")
2228 (match_operand:VDC 1 "register_operand" "")]
2231 aarch64_simd_reinterpret (operands[0], operands[1]);
2235 (define_expand "aarch64_reinterpretv2si<mode>"
2236 [(match_operand:V2SI 0 "register_operand" "")
2237 (match_operand:VDC 1 "register_operand" "")]
2240 aarch64_simd_reinterpret (operands[0], operands[1]);
2244 (define_expand "aarch64_reinterpretv2sf<mode>"
2245 [(match_operand:V2SF 0 "register_operand" "")
2246 (match_operand:VDC 1 "register_operand" "")]
2249 aarch64_simd_reinterpret (operands[0], operands[1]);
2253 (define_expand "aarch64_reinterpretdi<mode>"
2254 [(match_operand:DI 0 "register_operand" "")
2255 (match_operand:VD_RE 1 "register_operand" "")]
2258 aarch64_simd_reinterpret (operands[0], operands[1]);
2262 (define_expand "aarch64_reinterpretv16qi<mode>"
2263 [(match_operand:V16QI 0 "register_operand" "")
2264 (match_operand:VQ 1 "register_operand" "")]
2267 aarch64_simd_reinterpret (operands[0], operands[1]);
2271 (define_expand "aarch64_reinterpretv8hi<mode>"
2272 [(match_operand:V8HI 0 "register_operand" "")
2273 (match_operand:VQ 1 "register_operand" "")]
2276 aarch64_simd_reinterpret (operands[0], operands[1]);
2280 (define_expand "aarch64_reinterpretv4si<mode>"
2281 [(match_operand:V4SI 0 "register_operand" "")
2282 (match_operand:VQ 1 "register_operand" "")]
2285 aarch64_simd_reinterpret (operands[0], operands[1]);
2289 (define_expand "aarch64_reinterpretv4sf<mode>"
2290 [(match_operand:V4SF 0 "register_operand" "")
2291 (match_operand:VQ 1 "register_operand" "")]
2294 aarch64_simd_reinterpret (operands[0], operands[1]);
2298 (define_expand "aarch64_reinterpretv2di<mode>"
2299 [(match_operand:V2DI 0 "register_operand" "")
2300 (match_operand:VQ 1 "register_operand" "")]
2303 aarch64_simd_reinterpret (operands[0], operands[1]);
2307 (define_expand "aarch64_reinterpretv2df<mode>"
2308 [(match_operand:V2DF 0 "register_operand" "")
2309 (match_operand:VQ 1 "register_operand" "")]
2312 aarch64_simd_reinterpret (operands[0], operands[1]);
2316 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2319 (define_insn "*aarch64_combinez<mode>"
2320 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2322 (match_operand:VDIC 1 "register_operand" "w")
2323 (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))]
2325 "mov\\t%0.8b, %1.8b"
2326 [(set_attr "type" "neon_move<q>")]
2329 (define_insn_and_split "aarch64_combine<mode>"
2330 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2331 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2332 (match_operand:VDC 2 "register_operand" "w")))]
2335 "&& reload_completed"
2338 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2341 [(set_attr "type" "multiple")]
2344 (define_expand "aarch64_simd_combine<mode>"
2345 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2346 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2347 (match_operand:VDC 2 "register_operand" "w")))]
2350 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2351 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2354 [(set_attr "type" "multiple")]
2357 ;; <su><addsub>l<q>.
2359 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2360 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2361 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2362 (match_operand:VQW 1 "register_operand" "w")
2363 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2364 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2365 (match_operand:VQW 2 "register_operand" "w")
2368 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2369 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2372 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2373 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2374 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2375 (match_operand:VQW 1 "register_operand" "w")
2376 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2377 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2378 (match_operand:VQW 2 "register_operand" "w")
2381 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2382 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2386 (define_expand "aarch64_saddl2<mode>"
2387 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2388 (match_operand:VQW 1 "register_operand" "w")
2389 (match_operand:VQW 2 "register_operand" "w")]
2392 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2393 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2398 (define_expand "aarch64_uaddl2<mode>"
2399 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2400 (match_operand:VQW 1 "register_operand" "w")
2401 (match_operand:VQW 2 "register_operand" "w")]
2404 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2405 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2410 (define_expand "aarch64_ssubl2<mode>"
2411 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2412 (match_operand:VQW 1 "register_operand" "w")
2413 (match_operand:VQW 2 "register_operand" "w")]
2416 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2417 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2422 (define_expand "aarch64_usubl2<mode>"
2423 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2424 (match_operand:VQW 1 "register_operand" "w")
2425 (match_operand:VQW 2 "register_operand" "w")]
2428 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2429 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2434 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2435 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2436 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2437 (match_operand:VDW 1 "register_operand" "w"))
2439 (match_operand:VDW 2 "register_operand" "w"))))]
2441 "<ANY_EXTEND:su><ADDSUB:optab>l %0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2442 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2445 ;; <su><addsub>w<q>.
2447 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2448 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2449 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2451 (match_operand:VDW 2 "register_operand" "w"))))]
2453 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2454 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2457 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2458 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2459 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2462 (match_operand:VQW 2 "register_operand" "w")
2463 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2465 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2466 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2469 (define_expand "aarch64_saddw2<mode>"
2470 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2471 (match_operand:<VWIDE> 1 "register_operand" "w")
2472 (match_operand:VQW 2 "register_operand" "w")]
2475 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2476 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2481 (define_expand "aarch64_uaddw2<mode>"
2482 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2483 (match_operand:<VWIDE> 1 "register_operand" "w")
2484 (match_operand:VQW 2 "register_operand" "w")]
2487 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2488 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2494 (define_expand "aarch64_ssubw2<mode>"
2495 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2496 (match_operand:<VWIDE> 1 "register_operand" "w")
2497 (match_operand:VQW 2 "register_operand" "w")]
2500 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2501 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2506 (define_expand "aarch64_usubw2<mode>"
2507 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2508 (match_operand:<VWIDE> 1 "register_operand" "w")
2509 (match_operand:VQW 2 "register_operand" "w")]
2512 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2513 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2518 ;; <su><r>h<addsub>.
2520 (define_insn "aarch64_<sur>h<addsub><mode>"
2521 [(set (match_operand:VQ_S 0 "register_operand" "=w")
2522 (unspec:VQ_S [(match_operand:VQ_S 1 "register_operand" "w")
2523 (match_operand:VQ_S 2 "register_operand" "w")]
2526 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2527 [(set_attr "type" "neon_<addsub>_halve<q>")]
2530 ;; <r><addsub>hn<q>.
2532 (define_insn "aarch64_<sur><addsub>hn<mode>"
2533 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2534 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
2535 (match_operand:VQN 2 "register_operand" "w")]
2538 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
2539 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2542 (define_insn "aarch64_<sur><addsub>hn2<mode>"
2543 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2544 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
2545 (match_operand:VQN 2 "register_operand" "w")
2546 (match_operand:VQN 3 "register_operand" "w")]
2549 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
2550 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2555 (define_insn "aarch64_pmul<mode>"
2556 [(set (match_operand:VB 0 "register_operand" "=w")
2557 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
2558 (match_operand:VB 2 "register_operand" "w")]
2561 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2562 [(set_attr "type" "neon_mul_<Vetype><q>")]
2567 (define_insn "aarch64_<su_optab><optab><mode>"
2568 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2569 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
2570 (match_operand:VSDQ_I 2 "register_operand" "w")))]
2572 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2573 [(set_attr "type" "neon_<optab><q>")]
2576 ;; suqadd and usqadd
2578 (define_insn "aarch64_<sur>qadd<mode>"
2579 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2580 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
2581 (match_operand:VSDQ_I 2 "register_operand" "w")]
2584 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
2585 [(set_attr "type" "neon_qadd<q>")]
2590 (define_insn "aarch64_sqmovun<mode>"
2591 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2592 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2595 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2596 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2599 ;; sqmovn and uqmovn
2601 (define_insn "aarch64_<sur>qmovn<mode>"
2602 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2603 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2606 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2607 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2612 (define_insn "aarch64_s<optab><mode>"
2613 [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w")
2615 (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))]
2617 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
2618 [(set_attr "type" "neon_<optab><q>")]
2623 (define_insn "aarch64_sq<r>dmulh<mode>"
2624 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
2626 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
2627 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
2630 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2631 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
2636 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2637 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2639 [(match_operand:VDQHS 1 "register_operand" "w")
2641 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2642 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2646 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
2647 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
2648 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2649 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2652 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2653 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2655 [(match_operand:VDQHS 1 "register_operand" "w")
2657 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2658 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2662 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
2663 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
2664 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2665 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2668 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2669 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2671 [(match_operand:SD_HSI 1 "register_operand" "w")
2673 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2674 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2678 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
2679 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
2680 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2681 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2686 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
2687 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2689 (match_operand:<VWIDE> 1 "register_operand" "0")
2692 (sign_extend:<VWIDE>
2693 (match_operand:VSD_HSI 2 "register_operand" "w"))
2694 (sign_extend:<VWIDE>
2695 (match_operand:VSD_HSI 3 "register_operand" "w")))
2698 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
2699 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
2704 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal"
2705 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2707 (match_operand:<VWIDE> 1 "register_operand" "0")
2710 (sign_extend:<VWIDE>
2711 (match_operand:VD_HSI 2 "register_operand" "w"))
2712 (sign_extend:<VWIDE>
2713 (vec_duplicate:VD_HSI
2715 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2716 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2721 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
2723 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2725 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2728 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal"
2729 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2731 (match_operand:<VWIDE> 1 "register_operand" "0")
2734 (sign_extend:<VWIDE>
2735 (match_operand:SD_HSI 2 "register_operand" "w"))
2736 (sign_extend:<VWIDE>
2738 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2739 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2744 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
2746 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2748 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2751 (define_expand "aarch64_sqdmlal_lane<mode>"
2752 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2753 (match_operand:<VWIDE> 1 "register_operand" "0")
2754 (match_operand:VSD_HSI 2 "register_operand" "w")
2755 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2756 (match_operand:SI 4 "immediate_operand" "i")]
2759 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
2760 emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
2761 operands[2], operands[3],
2766 (define_expand "aarch64_sqdmlal_laneq<mode>"
2767 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2768 (match_operand:<VWIDE> 1 "register_operand" "0")
2769 (match_operand:VSD_HSI 2 "register_operand" "w")
2770 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2771 (match_operand:SI 4 "immediate_operand" "i")]
2774 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
2775 emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
2776 operands[2], operands[3],
2781 (define_expand "aarch64_sqdmlsl_lane<mode>"
2782 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2783 (match_operand:<VWIDE> 1 "register_operand" "0")
2784 (match_operand:VSD_HSI 2 "register_operand" "w")
2785 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2786 (match_operand:SI 4 "immediate_operand" "i")]
2789 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
2790 emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
2791 operands[2], operands[3],
2796 (define_expand "aarch64_sqdmlsl_laneq<mode>"
2797 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2798 (match_operand:<VWIDE> 1 "register_operand" "0")
2799 (match_operand:VSD_HSI 2 "register_operand" "w")
2800 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2801 (match_operand:SI 4 "immediate_operand" "i")]
2804 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
2805 emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
2806 operands[2], operands[3],
2813 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
2814 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2816 (match_operand:<VWIDE> 1 "register_operand" "0")
2819 (sign_extend:<VWIDE>
2820 (match_operand:VD_HSI 2 "register_operand" "w"))
2821 (sign_extend:<VWIDE>
2822 (vec_duplicate:VD_HSI
2823 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
2826 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
2827 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2832 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
2833 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2835 (match_operand:<VWIDE> 1 "register_operand" "0")
2838 (sign_extend:<VWIDE>
2840 (match_operand:VQ_HSI 2 "register_operand" "w")
2841 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
2842 (sign_extend:<VWIDE>
2844 (match_operand:VQ_HSI 3 "register_operand" "w")
2848 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
2849 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2852 (define_expand "aarch64_sqdmlal2<mode>"
2853 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2854 (match_operand:<VWIDE> 1 "register_operand" "w")
2855 (match_operand:VQ_HSI 2 "register_operand" "w")
2856 (match_operand:VQ_HSI 3 "register_operand" "w")]
2859 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2860 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
2861 operands[2], operands[3], p));
2865 (define_expand "aarch64_sqdmlsl2<mode>"
2866 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2867 (match_operand:<VWIDE> 1 "register_operand" "w")
2868 (match_operand:VQ_HSI 2 "register_operand" "w")
2869 (match_operand:VQ_HSI 3 "register_operand" "w")]
2872 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2873 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
2874 operands[2], operands[3], p));
2880 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
2881 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2883 (match_operand:<VWIDE> 1 "register_operand" "0")
2886 (sign_extend:<VWIDE>
2888 (match_operand:VQ_HSI 2 "register_operand" "w")
2889 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
2890 (sign_extend:<VWIDE>
2891 (vec_duplicate:<VHALF>
2893 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2894 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
2899 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
2901 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2903 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2906 (define_expand "aarch64_sqdmlal2_lane<mode>"
2907 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2908 (match_operand:<VWIDE> 1 "register_operand" "w")
2909 (match_operand:VQ_HSI 2 "register_operand" "w")
2910 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2911 (match_operand:SI 4 "immediate_operand" "i")]
2914 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2915 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
2916 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
2917 operands[2], operands[3],
2922 (define_expand "aarch64_sqdmlal2_laneq<mode>"
2923 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2924 (match_operand:<VWIDE> 1 "register_operand" "w")
2925 (match_operand:VQ_HSI 2 "register_operand" "w")
2926 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2927 (match_operand:SI 4 "immediate_operand" "i")]
2930 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2931 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
2932 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
2933 operands[2], operands[3],
2938 (define_expand "aarch64_sqdmlsl2_lane<mode>"
2939 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2940 (match_operand:<VWIDE> 1 "register_operand" "w")
2941 (match_operand:VQ_HSI 2 "register_operand" "w")
2942 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2943 (match_operand:SI 4 "immediate_operand" "i")]
2946 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2947 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
2948 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
2949 operands[2], operands[3],
2954 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
2955 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2956 (match_operand:<VWIDE> 1 "register_operand" "w")
2957 (match_operand:VQ_HSI 2 "register_operand" "w")
2958 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2959 (match_operand:SI 4 "immediate_operand" "i")]
2962 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2963 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
2964 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
2965 operands[2], operands[3],
2970 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
2971 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2973 (match_operand:<VWIDE> 1 "register_operand" "0")
2976 (sign_extend:<VWIDE>
2978 (match_operand:VQ_HSI 2 "register_operand" "w")
2979 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
2980 (sign_extend:<VWIDE>
2981 (vec_duplicate:<VHALF>
2982 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
2985 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
2986 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2989 (define_expand "aarch64_sqdmlal2_n<mode>"
2990 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2991 (match_operand:<VWIDE> 1 "register_operand" "w")
2992 (match_operand:VQ_HSI 2 "register_operand" "w")
2993 (match_operand:<VEL> 3 "register_operand" "w")]
2996 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2997 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
2998 operands[2], operands[3],
3003 (define_expand "aarch64_sqdmlsl2_n<mode>"
3004 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3005 (match_operand:<VWIDE> 1 "register_operand" "w")
3006 (match_operand:VQ_HSI 2 "register_operand" "w")
3007 (match_operand:<VEL> 3 "register_operand" "w")]
3010 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3011 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3012 operands[2], operands[3],
3019 (define_insn "aarch64_sqdmull<mode>"
3020 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3023 (sign_extend:<VWIDE>
3024 (match_operand:VSD_HSI 1 "register_operand" "w"))
3025 (sign_extend:<VWIDE>
3026 (match_operand:VSD_HSI 2 "register_operand" "w")))
3029 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3030 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3035 (define_insn "aarch64_sqdmull_lane<mode>_internal"
3036 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3039 (sign_extend:<VWIDE>
3040 (match_operand:VD_HSI 1 "register_operand" "w"))
3041 (sign_extend:<VWIDE>
3042 (vec_duplicate:VD_HSI
3044 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3045 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3050 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3051 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3053 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3056 (define_insn "aarch64_sqdmull_lane<mode>_internal"
3057 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3060 (sign_extend:<VWIDE>
3061 (match_operand:SD_HSI 1 "register_operand" "w"))
3062 (sign_extend:<VWIDE>
3064 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3065 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3070 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3071 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3073 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3076 (define_expand "aarch64_sqdmull_lane<mode>"
3077 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3078 (match_operand:VSD_HSI 1 "register_operand" "w")
3079 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3080 (match_operand:SI 3 "immediate_operand" "i")]
3083 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
3084 emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1],
3085 operands[2], operands[3]));
3089 (define_expand "aarch64_sqdmull_laneq<mode>"
3090 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3091 (match_operand:VD_HSI 1 "register_operand" "w")
3092 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3093 (match_operand:SI 3 "immediate_operand" "i")]
3096 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode));
3097 emit_insn (gen_aarch64_sqdmull_lane<mode>_internal
3098 (operands[0], operands[1], operands[2], operands[3]));
3104 (define_insn "aarch64_sqdmull_n<mode>"
3105 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3108 (sign_extend:<VWIDE>
3109 (match_operand:VD_HSI 1 "register_operand" "w"))
3110 (sign_extend:<VWIDE>
3111 (vec_duplicate:VD_HSI
3112 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3116 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3117 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3124 (define_insn "aarch64_sqdmull2<mode>_internal"
3125 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3128 (sign_extend:<VWIDE>
3130 (match_operand:VQ_HSI 1 "register_operand" "w")
3131 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3132 (sign_extend:<VWIDE>
3134 (match_operand:VQ_HSI 2 "register_operand" "w")
3139 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3140 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3143 (define_expand "aarch64_sqdmull2<mode>"
3144 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3145 (match_operand:VQ_HSI 1 "register_operand" "w")
3146 (match_operand:<VCON> 2 "register_operand" "w")]
3149 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3150 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3157 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3158 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3161 (sign_extend:<VWIDE>
3163 (match_operand:VQ_HSI 1 "register_operand" "w")
3164 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3165 (sign_extend:<VWIDE>
3166 (vec_duplicate:<VHALF>
3168 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3169 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3174 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3175 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3177 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3180 (define_expand "aarch64_sqdmull2_lane<mode>"
3181 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3182 (match_operand:VQ_HSI 1 "register_operand" "w")
3183 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3184 (match_operand:SI 3 "immediate_operand" "i")]
3187 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3188 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
3189 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3190 operands[2], operands[3],
3195 (define_expand "aarch64_sqdmull2_laneq<mode>"
3196 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3197 (match_operand:VQ_HSI 1 "register_operand" "w")
3198 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3199 (match_operand:SI 3 "immediate_operand" "i")]
3202 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3203 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3204 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3205 operands[2], operands[3],
3212 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3213 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3216 (sign_extend:<VWIDE>
3218 (match_operand:VQ_HSI 1 "register_operand" "w")
3219 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3220 (sign_extend:<VWIDE>
3221 (vec_duplicate:<VHALF>
3222 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3226 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3227 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3230 (define_expand "aarch64_sqdmull2_n<mode>"
3231 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3232 (match_operand:VQ_HSI 1 "register_operand" "w")
3233 (match_operand:<VEL> 2 "register_operand" "w")]
3236 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3237 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3244 (define_insn "aarch64_<sur>shl<mode>"
3245 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3247 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3248 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3251 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3252 [(set_attr "type" "neon_shift_reg<q>")]
3258 (define_insn "aarch64_<sur>q<r>shl<mode>"
3259 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3261 [(match_operand:VSDQ_I 1 "register_operand" "w")
3262 (match_operand:VSDQ_I 2 "register_operand" "w")]
3265 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3266 [(set_attr "type" "neon_sat_shift_reg<q>")]
3271 (define_insn "aarch64_<sur>shll_n<mode>"
3272 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3273 (unspec:<VWIDE> [(match_operand:VDW 1 "register_operand" "w")
3274 (match_operand:SI 2 "immediate_operand" "i")]
3278 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3279 aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
3280 if (INTVAL (operands[2]) == bit_width)
3282 return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3285 return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3287 [(set_attr "type" "neon_shift_imm_long")]
3292 (define_insn "aarch64_<sur>shll2_n<mode>"
3293 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3294 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
3295 (match_operand:SI 2 "immediate_operand" "i")]
3299 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3300 aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
3301 if (INTVAL (operands[2]) == bit_width)
3303 return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3306 return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3308 [(set_attr "type" "neon_shift_imm_long")]
3313 (define_insn "aarch64_<sur>shr_n<mode>"
3314 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3315 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3316 (match_operand:SI 2 "immediate_operand" "i")]
3320 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3321 aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
3322 return \"<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
3323 [(set_attr "type" "neon_sat_shift_imm<q>")]
3328 (define_insn "aarch64_<sur>sra_n<mode>"
3329 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3330 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3331 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3332 (match_operand:SI 3 "immediate_operand" "i")]
3336 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3337 aarch64_simd_const_bounds (operands[3], 1, bit_width + 1);
3338 return \"<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
3339 [(set_attr "type" "neon_shift_acc<q>")]
3344 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
3345 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3346 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3347 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3348 (match_operand:SI 3 "immediate_operand" "i")]
3352 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3353 aarch64_simd_const_bounds (operands[3], 1 - <VSLRI:offsetlr>,
3354 bit_width - <VSLRI:offsetlr> + 1);
3355 return \"s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
3356 [(set_attr "type" "neon_shift_imm<q>")]
3361 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
3362 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3363 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
3364 (match_operand:SI 2 "immediate_operand" "i")]
3368 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3369 aarch64_simd_const_bounds (operands[2], 0, bit_width);
3370 return \"<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
3371 [(set_attr "type" "neon_sat_shift_imm<q>")]
3377 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
3378 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3379 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
3380 (match_operand:SI 2 "immediate_operand" "i")]
3384 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3385 aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
3386 return \"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2\";"
3387 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3391 ;; cm(eq|ge|gt|lt|le)
3392 ;; Note, we have constraints for Dz and Z as different expanders
3393 ;; have different ideas of what should be passed to this pattern.
3395 (define_insn "aarch64_cm<optab><mode>"
3396 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3398 (COMPARISONS:<V_cmp_result>
3399 (match_operand:VDQ 1 "register_operand" "w,w")
3400 (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3404 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3405 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
3406 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
3409 (define_insn_and_split "aarch64_cm<optab>di"
3410 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
3413 (match_operand:DI 1 "register_operand" "w,w,r")
3414 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
3416 (clobber (reg:CC CC_REGNUM))]
3419 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
3420 cm<optab>\t%d0, %d1, #0
3423 /* We need to prevent the split from
3424 happening in the 'w' constraint cases. */
3425 && GP_REGNUM_P (REGNO (operands[0]))
3426 && GP_REGNUM_P (REGNO (operands[1]))"
3429 enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
3430 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3431 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3432 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3435 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
3440 (define_insn "aarch64_cm<optab><mode>"
3441 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3443 (UCOMPARISONS:<V_cmp_result>
3444 (match_operand:VDQ 1 "register_operand" "w")
3445 (match_operand:VDQ 2 "register_operand" "w")
3448 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3449 [(set_attr "type" "neon_compare<q>")]
3452 (define_insn_and_split "aarch64_cm<optab>di"
3453 [(set (match_operand:DI 0 "register_operand" "=w,r")
3456 (match_operand:DI 1 "register_operand" "w,r")
3457 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
3459 (clobber (reg:CC CC_REGNUM))]
3462 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
3465 /* We need to prevent the split from
3466 happening in the 'w' constraint cases. */
3467 && GP_REGNUM_P (REGNO (operands[0]))
3468 && GP_REGNUM_P (REGNO (operands[1]))"
3471 enum machine_mode mode = CCmode;
3472 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3473 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3474 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3477 [(set_attr "type" "neon_compare, neon_compare_zero")]
3482 (define_insn "aarch64_cmtst<mode>"
3483 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3487 (match_operand:VDQ 1 "register_operand" "w")
3488 (match_operand:VDQ 2 "register_operand" "w"))
3489 (vec_duplicate:<V_cmp_result> (const_int 0)))))]
3491 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3492 [(set_attr "type" "neon_tst<q>")]
3495 (define_insn_and_split "aarch64_cmtstdi"
3496 [(set (match_operand:DI 0 "register_operand" "=w,r")
3500 (match_operand:DI 1 "register_operand" "w,r")
3501 (match_operand:DI 2 "register_operand" "w,r"))
3503 (clobber (reg:CC CC_REGNUM))]
3506 cmtst\t%d0, %d1, %d2
3509 /* We need to prevent the split from
3510 happening in the 'w' constraint cases. */
3511 && GP_REGNUM_P (REGNO (operands[0]))
3512 && GP_REGNUM_P (REGNO (operands[1]))"
3515 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
3516 enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
3517 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
3518 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
3519 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3522 [(set_attr "type" "neon_tst")]
3525 ;; fcm(eq|ge|gt|le|lt)
3527 (define_insn "aarch64_cm<optab><mode>"
3528 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3530 (COMPARISONS:<V_cmp_result>
3531 (match_operand:VALLF 1 "register_operand" "w,w")
3532 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
3536 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3537 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
3538 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3542 ;; Note we can also handle what would be fac(le|lt) by
3543 ;; generating fac(ge|gt).
3545 (define_insn "*aarch64_fac<optab><mode>"
3546 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3548 (FAC_COMPARISONS:<V_cmp_result>
3549 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
3550 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
3553 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3554 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3559 (define_insn "aarch64_addp<mode>"
3560 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
3562 [(match_operand:VD_BHSI 1 "register_operand" "w")
3563 (match_operand:VD_BHSI 2 "register_operand" "w")]
3566 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3567 [(set_attr "type" "neon_reduc_add<q>")]
3570 (define_insn "aarch64_addpdi"
3571 [(set (match_operand:DI 0 "register_operand" "=w")
3573 [(match_operand:V2DI 1 "register_operand" "w")]
3577 [(set_attr "type" "neon_reduc_add")]
3582 (define_insn "sqrt<mode>2"
3583 [(set (match_operand:VDQF 0 "register_operand" "=w")
3584 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
3586 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
3587 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
3590 ;; Patterns for vector struct loads and stores.
3592 (define_insn "vec_load_lanesoi<mode>"
3593 [(set (match_operand:OI 0 "register_operand" "=w")
3594 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
3595 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3598 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3599 [(set_attr "type" "neon_load2_2reg<q>")]
3602 (define_insn "vec_store_lanesoi<mode>"
3603 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3604 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
3605 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3608 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
3609 [(set_attr "type" "neon_store2_2reg<q>")]
3612 (define_insn "vec_load_lanesci<mode>"
3613 [(set (match_operand:CI 0 "register_operand" "=w")
3614 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
3615 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3618 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3619 [(set_attr "type" "neon_load3_3reg<q>")]
3622 (define_insn "vec_store_lanesci<mode>"
3623 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
3624 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
3625 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3628 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
3629 [(set_attr "type" "neon_store3_3reg<q>")]
3632 (define_insn "vec_load_lanesxi<mode>"
3633 [(set (match_operand:XI 0 "register_operand" "=w")
3634 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
3635 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3638 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
3639 [(set_attr "type" "neon_load4_4reg<q>")]
3642 (define_insn "vec_store_lanesxi<mode>"
3643 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
3644 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
3645 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3648 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
3649 [(set_attr "type" "neon_store4_4reg<q>")]
3652 ;; Reload patterns for AdvSIMD register list operands.
3654 (define_expand "mov<mode>"
3655 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "")
3656 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))]
3659 if (can_create_pseudo_p ())
3661 if (GET_CODE (operands[0]) != REG)
3662 operands[1] = force_reg (<MODE>mode, operands[1]);
3666 (define_insn "*aarch64_mov<mode>"
3667 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
3668 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
3670 && (register_operand (operands[0], <MODE>mode)
3671 || register_operand (operands[1], <MODE>mode))"
3674 switch (which_alternative)
3677 case 1: return "st1\\t{%S1.16b - %<Vendreg>1.16b}, %0";
3678 case 2: return "ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1";
3679 default: gcc_unreachable ();
3682 [(set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\
3683 neon_load<nregs>_<nregs>reg_q")
3684 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
3687 (define_insn "aarch64_be_ld1<mode>"
3688 [(set (match_operand:VALLDI 0 "register_operand" "=w")
3689 (unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
3692 "ld1\\t{%0<Vmtype>}, %1"
3693 [(set_attr "type" "neon_load1_1reg<q>")]
3696 (define_insn "aarch64_be_st1<mode>"
3697 [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
3698 (unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
3701 "st1\\t{%1<Vmtype>}, %0"
3702 [(set_attr "type" "neon_store1_1reg<q>")]
3706 [(set (match_operand:OI 0 "register_operand" "")
3707 (match_operand:OI 1 "register_operand" ""))]
3708 "TARGET_SIMD && reload_completed"
3709 [(set (match_dup 0) (match_dup 1))
3710 (set (match_dup 2) (match_dup 3))]
3712 int rdest = REGNO (operands[0]);
3713 int rsrc = REGNO (operands[1]);
3714 rtx dest[2], src[2];
3716 dest[0] = gen_rtx_REG (TFmode, rdest);
3717 src[0] = gen_rtx_REG (TFmode, rsrc);
3718 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
3719 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
3721 aarch64_simd_disambiguate_copy (operands, dest, src, 2);
3725 [(set (match_operand:CI 0 "register_operand" "")
3726 (match_operand:CI 1 "register_operand" ""))]
3727 "TARGET_SIMD && reload_completed"
3728 [(set (match_dup 0) (match_dup 1))
3729 (set (match_dup 2) (match_dup 3))
3730 (set (match_dup 4) (match_dup 5))]
3732 int rdest = REGNO (operands[0]);
3733 int rsrc = REGNO (operands[1]);
3734 rtx dest[3], src[3];
3736 dest[0] = gen_rtx_REG (TFmode, rdest);
3737 src[0] = gen_rtx_REG (TFmode, rsrc);
3738 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
3739 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
3740 dest[2] = gen_rtx_REG (TFmode, rdest + 2);
3741 src[2] = gen_rtx_REG (TFmode, rsrc + 2);
3743 aarch64_simd_disambiguate_copy (operands, dest, src, 3);
3747 [(set (match_operand:XI 0 "register_operand" "")
3748 (match_operand:XI 1 "register_operand" ""))]
3749 "TARGET_SIMD && reload_completed"
3750 [(set (match_dup 0) (match_dup 1))
3751 (set (match_dup 2) (match_dup 3))
3752 (set (match_dup 4) (match_dup 5))
3753 (set (match_dup 6) (match_dup 7))]
3755 int rdest = REGNO (operands[0]);
3756 int rsrc = REGNO (operands[1]);
3757 rtx dest[4], src[4];
3759 dest[0] = gen_rtx_REG (TFmode, rdest);
3760 src[0] = gen_rtx_REG (TFmode, rsrc);
3761 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
3762 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
3763 dest[2] = gen_rtx_REG (TFmode, rdest + 2);
3764 src[2] = gen_rtx_REG (TFmode, rsrc + 2);
3765 dest[3] = gen_rtx_REG (TFmode, rdest + 3);
3766 src[3] = gen_rtx_REG (TFmode, rsrc + 3);
3768 aarch64_simd_disambiguate_copy (operands, dest, src, 4);
3771 (define_insn "aarch64_ld2<mode>_dreg"
3772 [(set (match_operand:OI 0 "register_operand" "=w")
3776 (unspec:VD [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
3778 (vec_duplicate:VD (const_int 0)))
3780 (unspec:VD [(match_dup 1)]
3782 (vec_duplicate:VD (const_int 0)))) 0))]
3784 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3785 [(set_attr "type" "neon_load2_2reg<q>")]
3788 (define_insn "aarch64_ld2<mode>_dreg"
3789 [(set (match_operand:OI 0 "register_operand" "=w")
3793 (unspec:DX [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
3797 (unspec:DX [(match_dup 1)]
3799 (const_int 0))) 0))]
3801 "ld1\\t{%S0.1d - %T0.1d}, %1"
3802 [(set_attr "type" "neon_load1_2reg<q>")]
3805 (define_insn "aarch64_ld3<mode>_dreg"
3806 [(set (match_operand:CI 0 "register_operand" "=w")
3811 (unspec:VD [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
3813 (vec_duplicate:VD (const_int 0)))
3815 (unspec:VD [(match_dup 1)]
3817 (vec_duplicate:VD (const_int 0))))
3819 (unspec:VD [(match_dup 1)]
3821 (vec_duplicate:VD (const_int 0)))) 0))]
3823 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3824 [(set_attr "type" "neon_load3_3reg<q>")]
3827 (define_insn "aarch64_ld3<mode>_dreg"
3828 [(set (match_operand:CI 0 "register_operand" "=w")
3833 (unspec:DX [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
3837 (unspec:DX [(match_dup 1)]
3841 (unspec:DX [(match_dup 1)]
3843 (const_int 0))) 0))]
3845 "ld1\\t{%S0.1d - %U0.1d}, %1"
3846 [(set_attr "type" "neon_load1_3reg<q>")]
3849 (define_insn "aarch64_ld4<mode>_dreg"
3850 [(set (match_operand:XI 0 "register_operand" "=w")
3855 (unspec:VD [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
3857 (vec_duplicate:VD (const_int 0)))
3859 (unspec:VD [(match_dup 1)]
3861 (vec_duplicate:VD (const_int 0))))
3864 (unspec:VD [(match_dup 1)]
3866 (vec_duplicate:VD (const_int 0)))
3868 (unspec:VD [(match_dup 1)]
3870 (vec_duplicate:VD (const_int 0))))) 0))]
3872 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
3873 [(set_attr "type" "neon_load4_4reg<q>")]
3876 (define_insn "aarch64_ld4<mode>_dreg"
3877 [(set (match_operand:XI 0 "register_operand" "=w")
3882 (unspec:DX [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
3886 (unspec:DX [(match_dup 1)]
3891 (unspec:DX [(match_dup 1)]
3895 (unspec:DX [(match_dup 1)]
3897 (const_int 0)))) 0))]
3899 "ld1\\t{%S0.1d - %V0.1d}, %1"
3900 [(set_attr "type" "neon_load1_4reg<q>")]
3903 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
3904 [(match_operand:VSTRUCT 0 "register_operand" "=w")
3905 (match_operand:DI 1 "register_operand" "r")
3906 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3909 enum machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
3910 rtx mem = gen_rtx_MEM (mode, operands[1]);
3912 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
3916 (define_expand "aarch64_ld1<VALL:mode>"
3917 [(match_operand:VALL 0 "register_operand")
3918 (match_operand:DI 1 "register_operand")]
3921 enum machine_mode mode = <VALL:MODE>mode;
3922 rtx mem = gen_rtx_MEM (mode, operands[1]);
3924 if (BYTES_BIG_ENDIAN)
3925 emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem));
3927 emit_move_insn (operands[0], mem);
3931 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
3932 [(match_operand:VSTRUCT 0 "register_operand" "=w")
3933 (match_operand:DI 1 "register_operand" "r")
3934 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3937 enum machine_mode mode = <VSTRUCT:MODE>mode;
3938 rtx mem = gen_rtx_MEM (mode, operands[1]);
3940 emit_insn (gen_vec_load_lanes<VSTRUCT:mode><VQ:mode> (operands[0], mem));
3944 ;; Expanders for builtins to extract vector registers from large
3945 ;; opaque integer modes.
3949 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
3950 [(match_operand:VDC 0 "register_operand" "=w")
3951 (match_operand:VSTRUCT 1 "register_operand" "w")
3952 (match_operand:SI 2 "immediate_operand" "i")]
3955 int part = INTVAL (operands[2]);
3956 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
3957 int offset = part * 16;
3959 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
3960 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
3966 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
3967 [(match_operand:VQ 0 "register_operand" "=w")
3968 (match_operand:VSTRUCT 1 "register_operand" "w")
3969 (match_operand:SI 2 "immediate_operand" "i")]
3972 int part = INTVAL (operands[2]);
3973 int offset = part * 16;
3975 emit_move_insn (operands[0],
3976 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
3980 ;; Permuted-store expanders for neon intrinsics.
3982 ;; Permute instructions
3986 (define_expand "vec_perm_const<mode>"
3987 [(match_operand:VALL 0 "register_operand")
3988 (match_operand:VALL 1 "register_operand")
3989 (match_operand:VALL 2 "register_operand")
3990 (match_operand:<V_cmp_result> 3)]
3993 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
3994 operands[2], operands[3]))
4000 (define_expand "vec_perm<mode>"
4001 [(match_operand:VB 0 "register_operand")
4002 (match_operand:VB 1 "register_operand")
4003 (match_operand:VB 2 "register_operand")
4004 (match_operand:VB 3 "register_operand")]
4005 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4007 aarch64_expand_vec_perm (operands[0], operands[1],
4008 operands[2], operands[3]);
4012 (define_insn "aarch64_tbl1<mode>"
4013 [(set (match_operand:VB 0 "register_operand" "=w")
4014 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
4015 (match_operand:VB 2 "register_operand" "w")]
4018 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
4019 [(set_attr "type" "neon_tbl1<q>")]
4022 ;; Two source registers.
4024 (define_insn "aarch64_tbl2v16qi"
4025 [(set (match_operand:V16QI 0 "register_operand" "=w")
4026 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
4027 (match_operand:V16QI 2 "register_operand" "w")]
4030 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
4031 [(set_attr "type" "neon_tbl2_q")]
4034 (define_insn_and_split "aarch64_combinev16qi"
4035 [(set (match_operand:OI 0 "register_operand" "=w")
4036 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
4037 (match_operand:V16QI 2 "register_operand" "w")]
4041 "&& reload_completed"
4044 aarch64_split_combinev16qi (operands);
4047 [(set_attr "type" "multiple")]
4050 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
4051 [(set (match_operand:VALL 0 "register_operand" "=w")
4052 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
4053 (match_operand:VALL 2 "register_operand" "w")]
4056 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4057 [(set_attr "type" "neon_permute<q>")]
4060 (define_insn "aarch64_st2<mode>_dreg"
4061 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
4062 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
4063 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4066 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4067 [(set_attr "type" "neon_store2_2reg")]
4070 (define_insn "aarch64_st2<mode>_dreg"
4071 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
4072 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
4073 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4076 "st1\\t{%S1.1d - %T1.1d}, %0"
4077 [(set_attr "type" "neon_store1_2reg")]
4080 (define_insn "aarch64_st3<mode>_dreg"
4081 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
4082 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
4083 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4086 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4087 [(set_attr "type" "neon_store3_3reg")]
4090 (define_insn "aarch64_st3<mode>_dreg"
4091 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
4092 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
4093 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4096 "st1\\t{%S1.1d - %U1.1d}, %0"
4097 [(set_attr "type" "neon_store1_3reg")]
4100 (define_insn "aarch64_st4<mode>_dreg"
4101 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4102 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
4103 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4106 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4107 [(set_attr "type" "neon_store4_4reg")]
4110 (define_insn "aarch64_st4<mode>_dreg"
4111 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4112 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
4113 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4116 "st1\\t{%S1.1d - %V1.1d}, %0"
4117 [(set_attr "type" "neon_store1_4reg")]
4120 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
4121 [(match_operand:DI 0 "register_operand" "r")
4122 (match_operand:VSTRUCT 1 "register_operand" "w")
4123 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4126 enum machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
4127 rtx mem = gen_rtx_MEM (mode, operands[0]);
4129 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
4133 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
4134 [(match_operand:DI 0 "register_operand" "r")
4135 (match_operand:VSTRUCT 1 "register_operand" "w")
4136 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4139 enum machine_mode mode = <VSTRUCT:MODE>mode;
4140 rtx mem = gen_rtx_MEM (mode, operands[0]);
4142 emit_insn (gen_vec_store_lanes<VSTRUCT:mode><VQ:mode> (mem, operands[1]));
4146 (define_expand "aarch64_st1<VALL:mode>"
4147 [(match_operand:DI 0 "register_operand")
4148 (match_operand:VALL 1 "register_operand")]
4151 enum machine_mode mode = <VALL:MODE>mode;
4152 rtx mem = gen_rtx_MEM (mode, operands[0]);
4154 if (BYTES_BIG_ENDIAN)
4155 emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1]));
4157 emit_move_insn (mem, operands[1]);
4161 ;; Expander for builtins to insert vector registers into large
4162 ;; opaque integer modes.
4164 ;; Q-register list. We don't need a D-reg inserter as we zero
4165 ;; extend them in arm_neon.h and insert the resulting Q-regs.
4167 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
4168 [(match_operand:VSTRUCT 0 "register_operand" "+w")
4169 (match_operand:VSTRUCT 1 "register_operand" "0")
4170 (match_operand:VQ 2 "register_operand" "w")
4171 (match_operand:SI 3 "immediate_operand" "i")]
4174 int part = INTVAL (operands[3]);
4175 int offset = part * 16;
4177 emit_move_insn (operands[0], operands[1]);
4178 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
4183 ;; Standard pattern name vec_init<mode>.
4185 (define_expand "vec_init<mode>"
4186 [(match_operand:VALL 0 "register_operand" "")
4187 (match_operand 1 "" "")]
4190 aarch64_expand_vector_init (operands[0], operands[1]);
4194 (define_insn "*aarch64_simd_ld1r<mode>"
4195 [(set (match_operand:VALLDI 0 "register_operand" "=w")
4196 (vec_duplicate:VALLDI
4197 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
4199 "ld1r\\t{%0.<Vtype>}, %1"
4200 [(set_attr "type" "neon_load1_all_lanes")]
4203 (define_insn "aarch64_frecpe<mode>"
4204 [(set (match_operand:VDQF 0 "register_operand" "=w")
4205 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
4208 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
4209 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
4212 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
4213 [(set (match_operand:GPF 0 "register_operand" "=w")
4214 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
4217 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
4218 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
4221 (define_insn "aarch64_frecps<mode>"
4222 [(set (match_operand:VALLF 0 "register_operand" "=w")
4223 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
4224 (match_operand:VALLF 2 "register_operand" "w")]
4227 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4228 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
4231 ;; Standard pattern name vec_extract<mode>.
4233 (define_expand "vec_extract<mode>"
4234 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
4235 (match_operand:VALL 1 "register_operand" "")
4236 (match_operand:SI 2 "immediate_operand" "")]
4240 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
4246 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
4247 [(set (match_operand:V16QI 0 "register_operand" "=w")
4248 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4249 (match_operand:V16QI 2 "register_operand" "w")]
4251 "TARGET_SIMD && TARGET_CRYPTO"
4252 "aes<aes_op>\\t%0.16b, %2.16b"
4253 [(set_attr "type" "crypto_aese")]
4256 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
4257 [(set (match_operand:V16QI 0 "register_operand" "=w")
4258 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
4260 "TARGET_SIMD && TARGET_CRYPTO"
4261 "aes<aesmc_op>\\t%0.16b, %1.16b"
4262 [(set_attr "type" "crypto_aesmc")]
4267 (define_insn "aarch64_crypto_sha1hsi"
4268 [(set (match_operand:SI 0 "register_operand" "=w")
4269 (unspec:SI [(match_operand:SI 1
4270 "register_operand" "w")]
4272 "TARGET_SIMD && TARGET_CRYPTO"
4274 [(set_attr "type" "crypto_sha1_fast")]
4277 (define_insn "aarch64_crypto_sha1su1v4si"
4278 [(set (match_operand:V4SI 0 "register_operand" "=w")
4279 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4280 (match_operand:V4SI 2 "register_operand" "w")]
4282 "TARGET_SIMD && TARGET_CRYPTO"
4283 "sha1su1\\t%0.4s, %2.4s"
4284 [(set_attr "type" "crypto_sha1_fast")]
4287 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
4288 [(set (match_operand:V4SI 0 "register_operand" "=w")
4289 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4290 (match_operand:SI 2 "register_operand" "w")
4291 (match_operand:V4SI 3 "register_operand" "w")]
4293 "TARGET_SIMD && TARGET_CRYPTO"
4294 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
4295 [(set_attr "type" "crypto_sha1_slow")]
4298 (define_insn "aarch64_crypto_sha1su0v4si"
4299 [(set (match_operand:V4SI 0 "register_operand" "=w")
4300 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4301 (match_operand:V4SI 2 "register_operand" "w")
4302 (match_operand:V4SI 3 "register_operand" "w")]
4304 "TARGET_SIMD && TARGET_CRYPTO"
4305 "sha1su0\\t%0.4s, %2.4s, %3.4s"
4306 [(set_attr "type" "crypto_sha1_xor")]
4311 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
4312 [(set (match_operand:V4SI 0 "register_operand" "=w")
4313 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4314 (match_operand:V4SI 2 "register_operand" "w")
4315 (match_operand:V4SI 3 "register_operand" "w")]
4317 "TARGET_SIMD && TARGET_CRYPTO"
4318 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
4319 [(set_attr "type" "crypto_sha256_slow")]
4322 (define_insn "aarch64_crypto_sha256su0v4si"
4323 [(set (match_operand:V4SI 0 "register_operand" "=w")
4324 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4325 (match_operand:V4SI 2 "register_operand" "w")]
4327 "TARGET_SIMD &&TARGET_CRYPTO"
4328 "sha256su0\\t%0.4s, %2.4s"
4329 [(set_attr "type" "crypto_sha256_fast")]
4332 (define_insn "aarch64_crypto_sha256su1v4si"
4333 [(set (match_operand:V4SI 0 "register_operand" "=w")
4334 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4335 (match_operand:V4SI 2 "register_operand" "w")
4336 (match_operand:V4SI 3 "register_operand" "w")]
4338 "TARGET_SIMD &&TARGET_CRYPTO"
4339 "sha256su1\\t%0.4s, %2.4s, %3.4s"
4340 [(set_attr "type" "crypto_sha256_slow")]
4345 (define_insn "aarch64_crypto_pmulldi"
4346 [(set (match_operand:TI 0 "register_operand" "=w")
4347 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
4348 (match_operand:DI 2 "register_operand" "w")]
4350 "TARGET_SIMD && TARGET_CRYPTO"
4351 "pmull\\t%0.1q, %1.1d, %2.1d"
4352 [(set_attr "type" "neon_mul_d_long")]
4355 (define_insn "aarch64_crypto_pmullv2di"
4356 [(set (match_operand:TI 0 "register_operand" "=w")
4357 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
4358 (match_operand:V2DI 2 "register_operand" "w")]
4360 "TARGET_SIMD && TARGET_CRYPTO"
4361 "pmull2\\t%0.1q, %1.2d, %2.2d"
4362 [(set_attr "type" "neon_mul_d_long")]