1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2015 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
23 (match_operand:VALL 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF 0 "register_operand" "=w")
57 (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
59 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
60 [(set_attr "type" "neon_dup<q>")]
63 (define_insn "aarch64_dup_lane<mode>"
64 [(set (match_operand:VALL 0 "register_operand" "=w")
67 (match_operand:VALL 1 "register_operand" "w")
68 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
72 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
73 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
75 [(set_attr "type" "neon_dup<q>")]
78 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
79 [(set (match_operand:VALL 0 "register_operand" "=w")
82 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
83 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
87 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
88 INTVAL (operands[2])));
89 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
91 [(set_attr "type" "neon_dup<q>")]
94 (define_insn "*aarch64_simd_mov<mode>"
95 [(set (match_operand:VD 0 "nonimmediate_operand"
96 "=w, m, w, ?r, ?w, ?r, w")
97 (match_operand:VD 1 "general_operand"
98 "m, w, w, w, r, r, Dn"))]
100 && (register_operand (operands[0], <MODE>mode)
101 || register_operand (operands[1], <MODE>mode))"
103 switch (which_alternative)
105 case 0: return "ldr\\t%d0, %1";
106 case 1: return "str\\t%d1, %0";
107 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
108 case 3: return "umov\t%0, %1.d[0]";
109 case 4: return "ins\t%0.d[0], %1";
110 case 5: return "mov\t%0, %1";
112 return aarch64_output_simd_mov_immediate (operands[1],
114 default: gcc_unreachable ();
117 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
118 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
119 mov_reg, neon_move<q>")]
122 (define_insn "*aarch64_simd_mov<mode>"
123 [(set (match_operand:VQ 0 "nonimmediate_operand"
124 "=w, m, w, ?r, ?w, ?r, w")
125 (match_operand:VQ 1 "general_operand"
126 "m, w, w, w, r, r, Dn"))]
128 && (register_operand (operands[0], <MODE>mode)
129 || register_operand (operands[1], <MODE>mode))"
131 switch (which_alternative)
134 return "ldr\\t%q0, %1";
136 return "str\\t%q1, %0";
138 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
144 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
149 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
150 neon_logic<q>, multiple, multiple, multiple,\
152 (set_attr "length" "4,4,4,8,8,8,4")]
156 [(set (match_operand:VQ 0 "register_operand" "")
157 (match_operand:VQ 1 "register_operand" ""))]
158 "TARGET_SIMD && reload_completed
159 && GP_REGNUM_P (REGNO (operands[0]))
160 && GP_REGNUM_P (REGNO (operands[1]))"
163 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
168 [(set (match_operand:VQ 0 "register_operand" "")
169 (match_operand:VQ 1 "register_operand" ""))]
170 "TARGET_SIMD && reload_completed
171 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
172 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
175 aarch64_split_simd_move (operands[0], operands[1]);
179 (define_expand "aarch64_split_simd_mov<mode>"
180 [(set (match_operand:VQ 0)
181 (match_operand:VQ 1))]
184 rtx dst = operands[0];
185 rtx src = operands[1];
187 if (GP_REGNUM_P (REGNO (src)))
189 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
190 rtx src_high_part = gen_highpart (<VHALF>mode, src);
193 (gen_move_lo_quad_<mode> (dst, src_low_part));
195 (gen_move_hi_quad_<mode> (dst, src_high_part));
200 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
201 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
202 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
203 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
206 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
208 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
214 (define_insn "aarch64_simd_mov_from_<mode>low"
215 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
217 (match_operand:VQ 1 "register_operand" "w")
218 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
219 "TARGET_SIMD && reload_completed"
221 [(set_attr "type" "neon_to_gp<q>")
222 (set_attr "length" "4")
225 (define_insn "aarch64_simd_mov_from_<mode>high"
226 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
228 (match_operand:VQ 1 "register_operand" "w")
229 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
230 "TARGET_SIMD && reload_completed"
232 [(set_attr "type" "neon_to_gp<q>")
233 (set_attr "length" "4")
236 (define_insn "orn<mode>3"
237 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
238 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
239 (match_operand:VDQ_I 2 "register_operand" "w")))]
241 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
242 [(set_attr "type" "neon_logic<q>")]
245 (define_insn "bic<mode>3"
246 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
247 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
248 (match_operand:VDQ_I 2 "register_operand" "w")))]
250 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
251 [(set_attr "type" "neon_logic<q>")]
254 (define_insn "add<mode>3"
255 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
256 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
257 (match_operand:VDQ_I 2 "register_operand" "w")))]
259 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
260 [(set_attr "type" "neon_add<q>")]
263 (define_insn "sub<mode>3"
264 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
265 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
266 (match_operand:VDQ_I 2 "register_operand" "w")))]
268 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
269 [(set_attr "type" "neon_sub<q>")]
272 (define_insn "mul<mode>3"
273 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
274 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
275 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
277 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
278 [(set_attr "type" "neon_mul_<Vetype><q>")]
281 (define_insn "bswap<mode>2"
282 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
283 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
285 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
286 [(set_attr "type" "neon_rev<q>")]
289 (define_insn "aarch64_rbit<mode>"
290 [(set (match_operand:VB 0 "register_operand" "=w")
291 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
294 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
295 [(set_attr "type" "neon_rbit")]
298 (define_expand "ctz<mode>2"
299 [(set (match_operand:VS 0 "register_operand")
300 (ctz:VS (match_operand:VS 1 "register_operand")))]
303 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
304 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
306 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
307 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
312 (define_insn "*aarch64_mul3_elt<mode>"
313 [(set (match_operand:VMUL 0 "register_operand" "=w")
317 (match_operand:VMUL 1 "register_operand" "<h_con>")
318 (parallel [(match_operand:SI 2 "immediate_operand")])))
319 (match_operand:VMUL 3 "register_operand" "w")))]
322 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
323 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
325 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
328 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
329 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
330 (mult:VMUL_CHANGE_NLANES
331 (vec_duplicate:VMUL_CHANGE_NLANES
333 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
334 (parallel [(match_operand:SI 2 "immediate_operand")])))
335 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
338 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
339 INTVAL (operands[2])));
340 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
342 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
345 (define_insn "*aarch64_mul3_elt_to_128df"
346 [(set (match_operand:V2DF 0 "register_operand" "=w")
349 (match_operand:DF 2 "register_operand" "w"))
350 (match_operand:V2DF 1 "register_operand" "w")))]
352 "fmul\\t%0.2d, %1.2d, %2.d[0]"
353 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
356 (define_insn "*aarch64_mul3_elt_to_64v2df"
357 [(set (match_operand:DF 0 "register_operand" "=w")
360 (match_operand:V2DF 1 "register_operand" "w")
361 (parallel [(match_operand:SI 2 "immediate_operand")]))
362 (match_operand:DF 3 "register_operand" "w")))]
365 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
366 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
368 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
371 (define_insn "neg<mode>2"
372 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
373 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
375 "neg\t%0.<Vtype>, %1.<Vtype>"
376 [(set_attr "type" "neon_neg<q>")]
379 (define_insn "abs<mode>2"
380 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
381 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
383 "abs\t%0.<Vtype>, %1.<Vtype>"
384 [(set_attr "type" "neon_abs<q>")]
387 (define_insn "abd<mode>_3"
388 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
389 (abs:VDQ_BHSI (minus:VDQ_BHSI
390 (match_operand:VDQ_BHSI 1 "register_operand" "w")
391 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
393 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
394 [(set_attr "type" "neon_abd<q>")]
397 (define_insn "aba<mode>_3"
398 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
399 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
400 (match_operand:VDQ_BHSI 1 "register_operand" "w")
401 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
402 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
404 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
405 [(set_attr "type" "neon_arith_acc<q>")]
408 (define_insn "fabd<mode>_3"
409 [(set (match_operand:VDQF 0 "register_operand" "=w")
410 (abs:VDQF (minus:VDQF
411 (match_operand:VDQF 1 "register_operand" "w")
412 (match_operand:VDQF 2 "register_operand" "w"))))]
414 "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
415 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
418 (define_insn "*fabd_scalar<mode>3"
419 [(set (match_operand:GPF 0 "register_operand" "=w")
421 (match_operand:GPF 1 "register_operand" "w")
422 (match_operand:GPF 2 "register_operand" "w"))))]
424 "fabd\t%<s>0, %<s>1, %<s>2"
425 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
428 (define_insn "and<mode>3"
429 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
430 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
431 (match_operand:VDQ_I 2 "register_operand" "w")))]
433 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
434 [(set_attr "type" "neon_logic<q>")]
437 (define_insn "ior<mode>3"
438 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
439 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
440 (match_operand:VDQ_I 2 "register_operand" "w")))]
442 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
443 [(set_attr "type" "neon_logic<q>")]
446 (define_insn "xor<mode>3"
447 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
448 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
449 (match_operand:VDQ_I 2 "register_operand" "w")))]
451 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
452 [(set_attr "type" "neon_logic<q>")]
455 (define_insn "one_cmpl<mode>2"
456 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
457 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
459 "not\t%0.<Vbtype>, %1.<Vbtype>"
460 [(set_attr "type" "neon_logic<q>")]
463 (define_insn "aarch64_simd_vec_set<mode>"
464 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
466 (vec_duplicate:VDQ_BHSI
467 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
468 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
469 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
472 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
473 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
474 switch (which_alternative)
477 return "ins\\t%0.<Vetype>[%p2], %w1";
479 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
481 return "ld1\\t{%0.<Vetype>}[%p2], %1";
486 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
489 (define_insn "aarch64_simd_lshr<mode>"
490 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
491 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
492 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
494 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
495 [(set_attr "type" "neon_shift_imm<q>")]
498 (define_insn "aarch64_simd_ashr<mode>"
499 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
500 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
501 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
503 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
504 [(set_attr "type" "neon_shift_imm<q>")]
507 (define_insn "aarch64_simd_imm_shl<mode>"
508 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
509 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
510 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
512 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
513 [(set_attr "type" "neon_shift_imm<q>")]
516 (define_insn "aarch64_simd_reg_sshl<mode>"
517 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
518 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
519 (match_operand:VDQ_I 2 "register_operand" "w")))]
521 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
522 [(set_attr "type" "neon_shift_reg<q>")]
525 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
526 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
527 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
528 (match_operand:VDQ_I 2 "register_operand" "w")]
529 UNSPEC_ASHIFT_UNSIGNED))]
531 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
532 [(set_attr "type" "neon_shift_reg<q>")]
535 (define_insn "aarch64_simd_reg_shl<mode>_signed"
536 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
537 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
538 (match_operand:VDQ_I 2 "register_operand" "w")]
539 UNSPEC_ASHIFT_SIGNED))]
541 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
542 [(set_attr "type" "neon_shift_reg<q>")]
545 (define_expand "ashl<mode>3"
546 [(match_operand:VDQ_I 0 "register_operand" "")
547 (match_operand:VDQ_I 1 "register_operand" "")
548 (match_operand:SI 2 "general_operand" "")]
551 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
554 if (CONST_INT_P (operands[2]))
556 shift_amount = INTVAL (operands[2]);
557 if (shift_amount >= 0 && shift_amount < bit_width)
559 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
561 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
568 operands[2] = force_reg (SImode, operands[2]);
571 else if (MEM_P (operands[2]))
573 operands[2] = force_reg (SImode, operands[2]);
576 if (REG_P (operands[2]))
578 rtx tmp = gen_reg_rtx (<MODE>mode);
579 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
580 convert_to_mode (<VEL>mode,
583 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
592 (define_expand "lshr<mode>3"
593 [(match_operand:VDQ_I 0 "register_operand" "")
594 (match_operand:VDQ_I 1 "register_operand" "")
595 (match_operand:SI 2 "general_operand" "")]
598 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
601 if (CONST_INT_P (operands[2]))
603 shift_amount = INTVAL (operands[2]);
604 if (shift_amount > 0 && shift_amount <= bit_width)
606 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
608 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
614 operands[2] = force_reg (SImode, operands[2]);
616 else if (MEM_P (operands[2]))
618 operands[2] = force_reg (SImode, operands[2]);
621 if (REG_P (operands[2]))
623 rtx tmp = gen_reg_rtx (SImode);
624 rtx tmp1 = gen_reg_rtx (<MODE>mode);
625 emit_insn (gen_negsi2 (tmp, operands[2]));
626 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
627 convert_to_mode (<VEL>mode,
629 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
639 (define_expand "ashr<mode>3"
640 [(match_operand:VDQ_I 0 "register_operand" "")
641 (match_operand:VDQ_I 1 "register_operand" "")
642 (match_operand:SI 2 "general_operand" "")]
645 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
648 if (CONST_INT_P (operands[2]))
650 shift_amount = INTVAL (operands[2]);
651 if (shift_amount > 0 && shift_amount <= bit_width)
653 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
655 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
661 operands[2] = force_reg (SImode, operands[2]);
663 else if (MEM_P (operands[2]))
665 operands[2] = force_reg (SImode, operands[2]);
668 if (REG_P (operands[2]))
670 rtx tmp = gen_reg_rtx (SImode);
671 rtx tmp1 = gen_reg_rtx (<MODE>mode);
672 emit_insn (gen_negsi2 (tmp, operands[2]));
673 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
674 convert_to_mode (<VEL>mode,
676 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
686 (define_expand "vashl<mode>3"
687 [(match_operand:VDQ_I 0 "register_operand" "")
688 (match_operand:VDQ_I 1 "register_operand" "")
689 (match_operand:VDQ_I 2 "register_operand" "")]
692 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
697 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
698 ;; Negating individual lanes most certainly offsets the
699 ;; gain from vectorization.
700 (define_expand "vashr<mode>3"
701 [(match_operand:VDQ_BHSI 0 "register_operand" "")
702 (match_operand:VDQ_BHSI 1 "register_operand" "")
703 (match_operand:VDQ_BHSI 2 "register_operand" "")]
706 rtx neg = gen_reg_rtx (<MODE>mode);
707 emit (gen_neg<mode>2 (neg, operands[2]));
708 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
714 (define_expand "aarch64_ashr_simddi"
715 [(match_operand:DI 0 "register_operand" "=w")
716 (match_operand:DI 1 "register_operand" "w")
717 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
720 /* An arithmetic shift right by 64 fills the result with copies of the sign
721 bit, just like asr by 63 - however the standard pattern does not handle
723 if (INTVAL (operands[2]) == 64)
724 operands[2] = GEN_INT (63);
725 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
730 (define_expand "vlshr<mode>3"
731 [(match_operand:VDQ_BHSI 0 "register_operand" "")
732 (match_operand:VDQ_BHSI 1 "register_operand" "")
733 (match_operand:VDQ_BHSI 2 "register_operand" "")]
736 rtx neg = gen_reg_rtx (<MODE>mode);
737 emit (gen_neg<mode>2 (neg, operands[2]));
738 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
743 (define_expand "aarch64_lshr_simddi"
744 [(match_operand:DI 0 "register_operand" "=w")
745 (match_operand:DI 1 "register_operand" "w")
746 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
749 if (INTVAL (operands[2]) == 64)
750 emit_move_insn (operands[0], const0_rtx);
752 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
757 (define_expand "vec_set<mode>"
758 [(match_operand:VDQ_BHSI 0 "register_operand")
759 (match_operand:<VEL> 1 "register_operand")
760 (match_operand:SI 2 "immediate_operand")]
763 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
764 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
765 GEN_INT (elem), operands[0]));
770 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
771 (define_insn "vec_shr_<mode>"
772 [(set (match_operand:VD 0 "register_operand" "=w")
773 (lshiftrt:VD (match_operand:VD 1 "register_operand" "w")
774 (match_operand:SI 2 "immediate_operand" "i")))]
777 if (BYTES_BIG_ENDIAN)
778 return "ushl %d0, %d1, %2";
780 return "ushr %d0, %d1, %2";
782 [(set_attr "type" "neon_shift_imm")]
785 (define_insn "aarch64_simd_vec_setv2di"
786 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
789 (match_operand:DI 1 "register_operand" "r,w"))
790 (match_operand:V2DI 3 "register_operand" "0,0")
791 (match_operand:SI 2 "immediate_operand" "i,i")))]
794 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
795 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
796 switch (which_alternative)
799 return "ins\\t%0.d[%p2], %1";
801 return "ins\\t%0.d[%p2], %1.d[0]";
806 [(set_attr "type" "neon_from_gp, neon_ins_q")]
809 (define_expand "vec_setv2di"
810 [(match_operand:V2DI 0 "register_operand")
811 (match_operand:DI 1 "register_operand")
812 (match_operand:SI 2 "immediate_operand")]
815 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
816 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
817 GEN_INT (elem), operands[0]));
822 (define_insn "aarch64_simd_vec_set<mode>"
823 [(set (match_operand:VDQF 0 "register_operand" "=w")
826 (match_operand:<VEL> 1 "register_operand" "w"))
827 (match_operand:VDQF 3 "register_operand" "0")
828 (match_operand:SI 2 "immediate_operand" "i")))]
831 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
833 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
834 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
836 [(set_attr "type" "neon_ins<q>")]
839 (define_expand "vec_set<mode>"
840 [(match_operand:VDQF 0 "register_operand" "+w")
841 (match_operand:<VEL> 1 "register_operand" "w")
842 (match_operand:SI 2 "immediate_operand" "")]
845 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
846 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
847 GEN_INT (elem), operands[0]));
853 (define_insn "aarch64_mla<mode>"
854 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
855 (plus:VDQ_BHSI (mult:VDQ_BHSI
856 (match_operand:VDQ_BHSI 2 "register_operand" "w")
857 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
858 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
860 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
861 [(set_attr "type" "neon_mla_<Vetype><q>")]
864 (define_insn "*aarch64_mla_elt<mode>"
865 [(set (match_operand:VDQHS 0 "register_operand" "=w")
870 (match_operand:VDQHS 1 "register_operand" "<h_con>")
871 (parallel [(match_operand:SI 2 "immediate_operand")])))
872 (match_operand:VDQHS 3 "register_operand" "w"))
873 (match_operand:VDQHS 4 "register_operand" "0")))]
876 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
877 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
879 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
882 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
883 [(set (match_operand:VDQHS 0 "register_operand" "=w")
888 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
889 (parallel [(match_operand:SI 2 "immediate_operand")])))
890 (match_operand:VDQHS 3 "register_operand" "w"))
891 (match_operand:VDQHS 4 "register_operand" "0")))]
894 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
895 INTVAL (operands[2])));
896 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
898 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
901 (define_insn "aarch64_mls<mode>"
902 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
903 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
904 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
905 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
907 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
908 [(set_attr "type" "neon_mla_<Vetype><q>")]
911 (define_insn "*aarch64_mls_elt<mode>"
912 [(set (match_operand:VDQHS 0 "register_operand" "=w")
914 (match_operand:VDQHS 4 "register_operand" "0")
918 (match_operand:VDQHS 1 "register_operand" "<h_con>")
919 (parallel [(match_operand:SI 2 "immediate_operand")])))
920 (match_operand:VDQHS 3 "register_operand" "w"))))]
923 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
924 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
926 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
929 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
930 [(set (match_operand:VDQHS 0 "register_operand" "=w")
932 (match_operand:VDQHS 4 "register_operand" "0")
936 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
937 (parallel [(match_operand:SI 2 "immediate_operand")])))
938 (match_operand:VDQHS 3 "register_operand" "w"))))]
941 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
942 INTVAL (operands[2])));
943 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
945 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
948 ;; Max/Min operations.
949 (define_insn "<su><maxmin><mode>3"
950 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
951 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
952 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
954 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
955 [(set_attr "type" "neon_minmax<q>")]
958 (define_expand "<su><maxmin>v2di3"
959 [(set (match_operand:V2DI 0 "register_operand" "")
960 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
961 (match_operand:V2DI 2 "register_operand" "")))]
964 enum rtx_code cmp_operator;
985 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
986 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
987 operands[2], cmp_fmt, operands[1], operands[2]));
991 ;; Pairwise Integer Max/Min operations.
992 (define_insn "aarch64_<maxmin_uns>p<mode>"
993 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
994 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
995 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
998 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
999 [(set_attr "type" "neon_minmax<q>")]
1002 ;; Pairwise FP Max/Min operations.
1003 (define_insn "aarch64_<maxmin_uns>p<mode>"
1004 [(set (match_operand:VDQF 0 "register_operand" "=w")
1005 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1006 (match_operand:VDQF 2 "register_operand" "w")]
1009 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1010 [(set_attr "type" "neon_minmax<q>")]
1013 ;; vec_concat gives a new vector with the low elements from operand 1, and
1014 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1015 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1016 ;; What that means, is that the RTL descriptions of the below patterns
1017 ;; need to change depending on endianness.
1019 ;; Move to the low architectural bits of the register.
1020 ;; On little-endian this is { operand, zeroes }
1021 ;; On big-endian this is { zeroes, operand }
1023 (define_insn "move_lo_quad_internal_<mode>"
1024 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1026 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1027 (vec_duplicate:<VHALF> (const_int 0))))]
1028 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1033 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1034 (set_attr "simd" "yes,*,yes")
1035 (set_attr "fp" "*,yes,*")
1036 (set_attr "length" "4")]
1039 (define_insn "move_lo_quad_internal_<mode>"
1040 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1042 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1044 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1049 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1050 (set_attr "simd" "yes,*,yes")
1051 (set_attr "fp" "*,yes,*")
1052 (set_attr "length" "4")]
1055 (define_insn "move_lo_quad_internal_be_<mode>"
1056 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1058 (vec_duplicate:<VHALF> (const_int 0))
1059 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1060 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1065 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1066 (set_attr "simd" "yes,*,yes")
1067 (set_attr "fp" "*,yes,*")
1068 (set_attr "length" "4")]
1071 (define_insn "move_lo_quad_internal_be_<mode>"
1072 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1075 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1076 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1081 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1082 (set_attr "simd" "yes,*,yes")
1083 (set_attr "fp" "*,yes,*")
1084 (set_attr "length" "4")]
1087 (define_expand "move_lo_quad_<mode>"
1088 [(match_operand:VQ 0 "register_operand")
1089 (match_operand:VQ 1 "register_operand")]
1092 if (BYTES_BIG_ENDIAN)
1093 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1095 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1100 ;; Move operand1 to the high architectural bits of the register, keeping
1101 ;; the low architectural bits of operand2.
1102 ;; For little-endian this is { operand2, operand1 }
1103 ;; For big-endian this is { operand1, operand2 }
1105 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1106 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1110 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1111 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1112 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1114 ins\\t%0.d[1], %1.d[0]
1116 [(set_attr "type" "neon_ins")]
1119 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1120 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1122 (match_operand:<VHALF> 1 "register_operand" "w,r")
1125 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1126 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1128 ins\\t%0.d[1], %1.d[0]
1130 [(set_attr "type" "neon_ins")]
1133 (define_expand "move_hi_quad_<mode>"
1134 [(match_operand:VQ 0 "register_operand" "")
1135 (match_operand:<VHALF> 1 "register_operand" "")]
1138 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1139 if (BYTES_BIG_ENDIAN)
1140 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1143 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1148 ;; Narrowing operations.
1151 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1152 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1153 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1155 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1156 [(set_attr "type" "neon_shift_imm_narrow_q")]
1159 (define_expand "vec_pack_trunc_<mode>"
1160 [(match_operand:<VNARROWD> 0 "register_operand" "")
1161 (match_operand:VDN 1 "register_operand" "")
1162 (match_operand:VDN 2 "register_operand" "")]
1165 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1166 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1167 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1169 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1170 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1171 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1177 (define_insn "vec_pack_trunc_<mode>"
1178 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1179 (vec_concat:<VNARROWQ2>
1180 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1181 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1184 if (BYTES_BIG_ENDIAN)
1185 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1187 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1189 [(set_attr "type" "multiple")
1190 (set_attr "length" "8")]
1193 ;; Widening operations.
1195 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1196 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1197 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1198 (match_operand:VQW 1 "register_operand" "w")
1199 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1202 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1203 [(set_attr "type" "neon_shift_imm_long")]
1206 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1207 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1208 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1209 (match_operand:VQW 1 "register_operand" "w")
1210 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1213 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1214 [(set_attr "type" "neon_shift_imm_long")]
1217 (define_expand "vec_unpack<su>_hi_<mode>"
1218 [(match_operand:<VWIDE> 0 "register_operand" "")
1219 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1222 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1223 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1229 (define_expand "vec_unpack<su>_lo_<mode>"
1230 [(match_operand:<VWIDE> 0 "register_operand" "")
1231 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1234 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1235 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1241 ;; Widening arithmetic.
1243 (define_insn "*aarch64_<su>mlal_lo<mode>"
1244 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1247 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1248 (match_operand:VQW 2 "register_operand" "w")
1249 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1250 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1251 (match_operand:VQW 4 "register_operand" "w")
1253 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1255 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1256 [(set_attr "type" "neon_mla_<Vetype>_long")]
1259 (define_insn "*aarch64_<su>mlal_hi<mode>"
1260 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1263 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1264 (match_operand:VQW 2 "register_operand" "w")
1265 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1266 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1267 (match_operand:VQW 4 "register_operand" "w")
1269 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1271 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1272 [(set_attr "type" "neon_mla_<Vetype>_long")]
1275 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1276 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1278 (match_operand:<VWIDE> 1 "register_operand" "0")
1280 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1281 (match_operand:VQW 2 "register_operand" "w")
1282 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1283 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1284 (match_operand:VQW 4 "register_operand" "w")
1287 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1288 [(set_attr "type" "neon_mla_<Vetype>_long")]
1291 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1292 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1294 (match_operand:<VWIDE> 1 "register_operand" "0")
1296 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1297 (match_operand:VQW 2 "register_operand" "w")
1298 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1299 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1300 (match_operand:VQW 4 "register_operand" "w")
1303 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1304 [(set_attr "type" "neon_mla_<Vetype>_long")]
1307 (define_insn "*aarch64_<su>mlal<mode>"
1308 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1312 (match_operand:VD_BHSI 1 "register_operand" "w"))
1314 (match_operand:VD_BHSI 2 "register_operand" "w")))
1315 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1317 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1318 [(set_attr "type" "neon_mla_<Vetype>_long")]
1321 (define_insn "*aarch64_<su>mlsl<mode>"
1322 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1324 (match_operand:<VWIDE> 1 "register_operand" "0")
1327 (match_operand:VD_BHSI 2 "register_operand" "w"))
1329 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1331 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1332 [(set_attr "type" "neon_mla_<Vetype>_long")]
1335 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1336 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1337 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1338 (match_operand:VQW 1 "register_operand" "w")
1339 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1340 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1341 (match_operand:VQW 2 "register_operand" "w")
1344 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1345 [(set_attr "type" "neon_mul_<Vetype>_long")]
1348 (define_expand "vec_widen_<su>mult_lo_<mode>"
1349 [(match_operand:<VWIDE> 0 "register_operand" "")
1350 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1351 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1354 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1355 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1362 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1363 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1364 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1365 (match_operand:VQW 1 "register_operand" "w")
1366 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1367 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1368 (match_operand:VQW 2 "register_operand" "w")
1371 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1372 [(set_attr "type" "neon_mul_<Vetype>_long")]
1375 (define_expand "vec_widen_<su>mult_hi_<mode>"
1376 [(match_operand:<VWIDE> 0 "register_operand" "")
1377 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1378 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1381 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1382 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1390 ;; FP vector operations.
1391 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1392 ;; double-precision (64-bit) floating-point data types and arithmetic as
1393 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1394 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1396 ;; Floating-point operations can raise an exception. Vectorizing such
1397 ;; operations are safe because of reasons explained below.
1399 ;; ARMv8 permits an extension to enable trapped floating-point
1400 ;; exception handling, however this is an optional feature. In the
1401 ;; event of a floating-point exception being raised by vectorised
1403 ;; 1. If trapped floating-point exceptions are available, then a trap
1404 ;; will be taken when any lane raises an enabled exception. A trap
1405 ;; handler may determine which lane raised the exception.
1406 ;; 2. Alternatively a sticky exception flag is set in the
1407 ;; floating-point status register (FPSR). Software may explicitly
1408 ;; test the exception flags, in which case the tests will either
1409 ;; prevent vectorisation, allowing precise identification of the
1410 ;; failing operation, or if tested outside of vectorisable regions
1411 ;; then the specific operation and lane are not of interest.
1413 ;; FP arithmetic operations.
1415 (define_insn "add<mode>3"
1416 [(set (match_operand:VDQF 0 "register_operand" "=w")
1417 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1418 (match_operand:VDQF 2 "register_operand" "w")))]
1420 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1421 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1424 (define_insn "sub<mode>3"
1425 [(set (match_operand:VDQF 0 "register_operand" "=w")
1426 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1427 (match_operand:VDQF 2 "register_operand" "w")))]
1429 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1430 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1433 (define_insn "mul<mode>3"
1434 [(set (match_operand:VDQF 0 "register_operand" "=w")
1435 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1436 (match_operand:VDQF 2 "register_operand" "w")))]
1438 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1439 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1442 (define_insn "div<mode>3"
1443 [(set (match_operand:VDQF 0 "register_operand" "=w")
1444 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1445 (match_operand:VDQF 2 "register_operand" "w")))]
1447 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1448 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1451 (define_insn "neg<mode>2"
1452 [(set (match_operand:VDQF 0 "register_operand" "=w")
1453 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1455 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1456 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1459 (define_insn "abs<mode>2"
1460 [(set (match_operand:VDQF 0 "register_operand" "=w")
1461 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1463 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1464 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1467 (define_insn "fma<mode>4"
1468 [(set (match_operand:VDQF 0 "register_operand" "=w")
1469 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1470 (match_operand:VDQF 2 "register_operand" "w")
1471 (match_operand:VDQF 3 "register_operand" "0")))]
1473 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1474 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1477 (define_insn "*aarch64_fma4_elt<mode>"
1478 [(set (match_operand:VDQF 0 "register_operand" "=w")
1482 (match_operand:VDQF 1 "register_operand" "<h_con>")
1483 (parallel [(match_operand:SI 2 "immediate_operand")])))
1484 (match_operand:VDQF 3 "register_operand" "w")
1485 (match_operand:VDQF 4 "register_operand" "0")))]
1488 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1489 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1491 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1494 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1495 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1497 (vec_duplicate:VDQSF
1499 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1500 (parallel [(match_operand:SI 2 "immediate_operand")])))
1501 (match_operand:VDQSF 3 "register_operand" "w")
1502 (match_operand:VDQSF 4 "register_operand" "0")))]
1505 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1506 INTVAL (operands[2])));
1507 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1509 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1512 (define_insn "*aarch64_fma4_elt_to_128df"
1513 [(set (match_operand:V2DF 0 "register_operand" "=w")
1516 (match_operand:DF 1 "register_operand" "w"))
1517 (match_operand:V2DF 2 "register_operand" "w")
1518 (match_operand:V2DF 3 "register_operand" "0")))]
1520 "fmla\\t%0.2d, %2.2d, %1.2d[0]"
1521 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1524 (define_insn "*aarch64_fma4_elt_to_64v2df"
1525 [(set (match_operand:DF 0 "register_operand" "=w")
1528 (match_operand:V2DF 1 "register_operand" "w")
1529 (parallel [(match_operand:SI 2 "immediate_operand")]))
1530 (match_operand:DF 3 "register_operand" "w")
1531 (match_operand:DF 4 "register_operand" "0")))]
1534 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1535 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1537 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1540 (define_insn "fnma<mode>4"
1541 [(set (match_operand:VDQF 0 "register_operand" "=w")
1543 (match_operand:VDQF 1 "register_operand" "w")
1545 (match_operand:VDQF 2 "register_operand" "w"))
1546 (match_operand:VDQF 3 "register_operand" "0")))]
1548 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1549 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1552 (define_insn "*aarch64_fnma4_elt<mode>"
1553 [(set (match_operand:VDQF 0 "register_operand" "=w")
1556 (match_operand:VDQF 3 "register_operand" "w"))
1559 (match_operand:VDQF 1 "register_operand" "<h_con>")
1560 (parallel [(match_operand:SI 2 "immediate_operand")])))
1561 (match_operand:VDQF 4 "register_operand" "0")))]
1564 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1565 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1567 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1570 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1571 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1574 (match_operand:VDQSF 3 "register_operand" "w"))
1575 (vec_duplicate:VDQSF
1577 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1578 (parallel [(match_operand:SI 2 "immediate_operand")])))
1579 (match_operand:VDQSF 4 "register_operand" "0")))]
1582 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1583 INTVAL (operands[2])));
1584 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1586 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1589 (define_insn "*aarch64_fnma4_elt_to_128df"
1590 [(set (match_operand:V2DF 0 "register_operand" "=w")
1593 (match_operand:V2DF 2 "register_operand" "w"))
1595 (match_operand:DF 1 "register_operand" "w"))
1596 (match_operand:V2DF 3 "register_operand" "0")))]
1598 "fmls\\t%0.2d, %2.2d, %1.2d[0]"
1599 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1602 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1603 [(set (match_operand:DF 0 "register_operand" "=w")
1606 (match_operand:V2DF 1 "register_operand" "w")
1607 (parallel [(match_operand:SI 2 "immediate_operand")]))
1609 (match_operand:DF 3 "register_operand" "w"))
1610 (match_operand:DF 4 "register_operand" "0")))]
1613 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1614 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1616 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1619 ;; Vector versions of the floating-point frint patterns.
1620 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1621 (define_insn "<frint_pattern><mode>2"
1622 [(set (match_operand:VDQF 0 "register_operand" "=w")
1623 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1626 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1627 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1630 ;; Vector versions of the fcvt standard patterns.
1631 ;; Expands to lbtrunc, lround, lceil, lfloor
1632 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1633 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1634 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1635 [(match_operand:VDQF 1 "register_operand" "w")]
1638 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1639 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1642 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1643 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1644 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1645 [(match_operand:VDQF 1 "register_operand")]
1650 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1651 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1652 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1653 [(match_operand:VDQF 1 "register_operand")]
1658 (define_expand "ftrunc<VDQF:mode>2"
1659 [(set (match_operand:VDQF 0 "register_operand")
1660 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1665 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1666 [(set (match_operand:VDQF 0 "register_operand" "=w")
1668 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1670 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1671 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1674 ;; Conversions between vectors of floats and doubles.
1675 ;; Contains a mix of patterns to match standard pattern names
1676 ;; and those for intrinsics.
1678 ;; Float widening operations.
1680 (define_insn "vec_unpacks_lo_v4sf"
1681 [(set (match_operand:V2DF 0 "register_operand" "=w")
1684 (match_operand:V4SF 1 "register_operand" "w")
1685 (parallel [(const_int 0) (const_int 1)])
1688 "fcvtl\\t%0.2d, %1.2s"
1689 [(set_attr "type" "neon_fp_cvt_widen_s")]
1692 (define_insn "aarch64_float_extend_lo_v2df"
1693 [(set (match_operand:V2DF 0 "register_operand" "=w")
1695 (match_operand:V2SF 1 "register_operand" "w")))]
1697 "fcvtl\\t%0.2d, %1.2s"
1698 [(set_attr "type" "neon_fp_cvt_widen_s")]
1701 (define_insn "vec_unpacks_hi_v4sf"
1702 [(set (match_operand:V2DF 0 "register_operand" "=w")
1705 (match_operand:V4SF 1 "register_operand" "w")
1706 (parallel [(const_int 2) (const_int 3)])
1709 "fcvtl2\\t%0.2d, %1.4s"
1710 [(set_attr "type" "neon_fp_cvt_widen_s")]
1713 ;; Float narrowing operations.
1715 (define_insn "aarch64_float_truncate_lo_v2sf"
1716 [(set (match_operand:V2SF 0 "register_operand" "=w")
1717 (float_truncate:V2SF
1718 (match_operand:V2DF 1 "register_operand" "w")))]
1720 "fcvtn\\t%0.2s, %1.2d"
1721 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1724 (define_insn "aarch64_float_truncate_hi_v4sf"
1725 [(set (match_operand:V4SF 0 "register_operand" "=w")
1727 (match_operand:V2SF 1 "register_operand" "0")
1728 (float_truncate:V2SF
1729 (match_operand:V2DF 2 "register_operand" "w"))))]
1731 "fcvtn2\\t%0.4s, %2.2d"
1732 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1735 (define_expand "vec_pack_trunc_v2df"
1736 [(set (match_operand:V4SF 0 "register_operand")
1738 (float_truncate:V2SF
1739 (match_operand:V2DF 1 "register_operand"))
1740 (float_truncate:V2SF
1741 (match_operand:V2DF 2 "register_operand"))
1745 rtx tmp = gen_reg_rtx (V2SFmode);
1746 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1747 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1749 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1750 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1751 tmp, operands[hi]));
1756 (define_expand "vec_pack_trunc_df"
1757 [(set (match_operand:V2SF 0 "register_operand")
1760 (match_operand:DF 1 "register_operand"))
1762 (match_operand:DF 2 "register_operand"))
1766 rtx tmp = gen_reg_rtx (V2SFmode);
1767 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1768 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1770 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1771 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1772 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1777 (define_insn "aarch64_vmls<mode>"
1778 [(set (match_operand:VDQF 0 "register_operand" "=w")
1779 (minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
1780 (mult:VDQF (match_operand:VDQF 2 "register_operand" "w")
1781 (match_operand:VDQF 3 "register_operand" "w"))))]
1783 "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1784 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1788 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1790 ;; a = (b < c) ? b : c;
1791 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1792 ;; either explicitly or indirectly via -ffast-math.
1794 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1795 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1796 ;; operand will be returned when both operands are zero (i.e. they may not
1797 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
1798 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
1801 (define_insn "<su><maxmin><mode>3"
1802 [(set (match_operand:VDQF 0 "register_operand" "=w")
1803 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
1804 (match_operand:VDQF 2 "register_operand" "w")))]
1806 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1807 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1810 (define_insn "<maxmin_uns><mode>3"
1811 [(set (match_operand:VDQF 0 "register_operand" "=w")
1812 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1813 (match_operand:VDQF 2 "register_operand" "w")]
1816 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1817 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1820 ;; 'across lanes' add.
1822 (define_expand "reduc_plus_scal_<mode>"
1823 [(match_operand:<VEL> 0 "register_operand" "=w")
1824 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
1828 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1829 rtx scratch = gen_reg_rtx (<MODE>mode);
1830 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1831 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1836 (define_expand "reduc_plus_scal_<mode>"
1837 [(match_operand:<VEL> 0 "register_operand" "=w")
1838 (match_operand:V2F 1 "register_operand" "w")]
1841 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1842 rtx scratch = gen_reg_rtx (<MODE>mode);
1843 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1844 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1849 (define_insn "aarch64_reduc_plus_internal<mode>"
1850 [(set (match_operand:VDQV 0 "register_operand" "=w")
1851 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
1854 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
1855 [(set_attr "type" "neon_reduc_add<q>")]
1858 (define_insn "aarch64_reduc_plus_internalv2si"
1859 [(set (match_operand:V2SI 0 "register_operand" "=w")
1860 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1863 "addp\\t%0.2s, %1.2s, %1.2s"
1864 [(set_attr "type" "neon_reduc_add")]
1867 (define_insn "aarch64_reduc_plus_internal<mode>"
1868 [(set (match_operand:V2F 0 "register_operand" "=w")
1869 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
1872 "faddp\\t%<Vetype>0, %1.<Vtype>"
1873 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
1876 (define_insn "aarch64_addpv4sf"
1877 [(set (match_operand:V4SF 0 "register_operand" "=w")
1878 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
1881 "faddp\\t%0.4s, %1.4s, %1.4s"
1882 [(set_attr "type" "neon_fp_reduc_add_s_q")]
1885 (define_expand "reduc_plus_scal_v4sf"
1886 [(set (match_operand:SF 0 "register_operand")
1887 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
1891 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
1892 rtx scratch = gen_reg_rtx (V4SFmode);
1893 emit_insn (gen_aarch64_addpv4sf (scratch, operands[1]));
1894 emit_insn (gen_aarch64_addpv4sf (scratch, scratch));
1895 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
1899 (define_insn "clrsb<mode>2"
1900 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1901 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1903 "cls\\t%0.<Vtype>, %1.<Vtype>"
1904 [(set_attr "type" "neon_cls<q>")]
1907 (define_insn "clz<mode>2"
1908 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1909 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1911 "clz\\t%0.<Vtype>, %1.<Vtype>"
1912 [(set_attr "type" "neon_cls<q>")]
1915 (define_insn "popcount<mode>2"
1916 [(set (match_operand:VB 0 "register_operand" "=w")
1917 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
1919 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
1920 [(set_attr "type" "neon_cnt<q>")]
1923 ;; 'across lanes' max and min ops.
1925 ;; Template for outputting a scalar, so we can create __builtins which can be
1926 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
1927 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1928 [(match_operand:<VEL> 0 "register_operand")
1929 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1933 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1934 rtx scratch = gen_reg_rtx (<MODE>mode);
1935 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
1937 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1942 ;; Likewise for integer cases, signed and unsigned.
1943 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1944 [(match_operand:<VEL> 0 "register_operand")
1945 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
1949 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1950 rtx scratch = gen_reg_rtx (<MODE>mode);
1951 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
1953 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1958 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
1959 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
1960 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
1963 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
1964 [(set_attr "type" "neon_reduc_minmax<q>")]
1967 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
1968 [(set (match_operand:V2SI 0 "register_operand" "=w")
1969 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1972 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
1973 [(set_attr "type" "neon_reduc_minmax")]
1976 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
1977 [(set (match_operand:VDQF 0 "register_operand" "=w")
1978 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1981 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
1982 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
1985 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
1987 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
1990 ;; Thus our BSL is of the form:
1991 ;; op0 = bsl (mask, op2, op3)
1992 ;; We can use any of:
1995 ;; bsl mask, op1, op2
1996 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
1997 ;; bit op0, op2, mask
1998 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
1999 ;; bif op0, op1, mask
2001 (define_insn "aarch64_simd_bsl<mode>_internal"
2002 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2006 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2007 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2008 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2009 (match_dup:<V_cmp_result> 3)
2013 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2014 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2015 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2016 [(set_attr "type" "neon_bsl<q>")]
2019 (define_expand "aarch64_simd_bsl<mode>"
2020 [(match_operand:VALLDIF 0 "register_operand")
2021 (match_operand:<V_cmp_result> 1 "register_operand")
2022 (match_operand:VALLDIF 2 "register_operand")
2023 (match_operand:VALLDIF 3 "register_operand")]
2026 /* We can't alias operands together if they have different modes. */
2027 rtx tmp = operands[0];
2028 if (FLOAT_MODE_P (<MODE>mode))
2030 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2031 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2032 tmp = gen_reg_rtx (<V_cmp_result>mode);
2034 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2035 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2039 if (tmp != operands[0])
2040 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2045 (define_expand "aarch64_vcond_internal<mode><mode>"
2046 [(set (match_operand:VDQ_I 0 "register_operand")
2048 (match_operator 3 "comparison_operator"
2049 [(match_operand:VDQ_I 4 "register_operand")
2050 (match_operand:VDQ_I 5 "nonmemory_operand")])
2051 (match_operand:VDQ_I 1 "nonmemory_operand")
2052 (match_operand:VDQ_I 2 "nonmemory_operand")))]
2055 rtx op1 = operands[1];
2056 rtx op2 = operands[2];
2057 rtx mask = gen_reg_rtx (<MODE>mode);
2058 enum rtx_code code = GET_CODE (operands[3]);
2060 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2061 and desirable for other comparisons if it results in FOO ? -1 : 0
2062 (this allows direct use of the comparison result without a bsl). */
2065 && op1 == CONST0_RTX (<V_cmp_result>mode)
2066 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2072 case LE: code = GT; break;
2073 case LT: code = GE; break;
2074 case GE: code = LT; break;
2075 case GT: code = LE; break;
2077 case NE: code = EQ; break;
2078 case LTU: code = GEU; break;
2079 case LEU: code = GTU; break;
2080 case GTU: code = LEU; break;
2081 case GEU: code = LTU; break;
2082 default: gcc_unreachable ();
2086 /* Make sure we can handle the last operand. */
2090 /* Normalized to EQ above. */
2098 /* These instructions have a form taking an immediate zero. */
2099 if (operands[5] == CONST0_RTX (<MODE>mode))
2101 /* Fall through, as may need to load into register. */
2103 if (!REG_P (operands[5]))
2104 operands[5] = force_reg (<MODE>mode, operands[5]);
2111 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2115 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2119 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2123 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2127 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2131 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2135 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2139 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2142 /* NE has been normalized to EQ above. */
2144 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2151 /* If we have (a = (b CMP c) ? -1 : 0);
2152 Then we can simply move the generated mask. */
2154 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2155 && op2 == CONST0_RTX (<V_cmp_result>mode))
2156 emit_move_insn (operands[0], mask);
2160 op1 = force_reg (<MODE>mode, op1);
2162 op2 = force_reg (<MODE>mode, op2);
2163 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2170 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2171 [(set (match_operand:VDQF_COND 0 "register_operand")
2173 (match_operator 3 "comparison_operator"
2174 [(match_operand:VDQF 4 "register_operand")
2175 (match_operand:VDQF 5 "nonmemory_operand")])
2176 (match_operand:VDQF_COND 1 "nonmemory_operand")
2177 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2181 int use_zero_form = 0;
2182 int swap_bsl_operands = 0;
2183 rtx op1 = operands[1];
2184 rtx op2 = operands[2];
2185 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2186 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2188 rtx (*base_comparison) (rtx, rtx, rtx);
2189 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2191 switch (GET_CODE (operands[3]))
2198 if (operands[5] == CONST0_RTX (<MODE>mode))
2205 if (!REG_P (operands[5]))
2206 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2209 switch (GET_CODE (operands[3]))
2219 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2220 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2228 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2229 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2234 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2235 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2241 switch (GET_CODE (operands[3]))
2248 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2249 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2255 Note that there also exist direct comparison against 0 forms,
2256 so catch those as a special case. */
2260 switch (GET_CODE (operands[3]))
2263 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2266 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2269 /* Do nothing, other zero form cases already have the correct
2276 emit_insn (base_comparison (mask, operands[4], operands[5]));
2278 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2285 /* FCM returns false for lanes which are unordered, so if we use
2286 the inverse of the comparison we actually want to emit, then
2287 swap the operands to BSL, we will end up with the correct result.
2288 Note that a NE NaN and NaN NE b are true for all a, b.
2290 Our transformations are:
2295 a NE b -> !(a EQ b) */
2298 emit_insn (base_comparison (mask, operands[4], operands[5]));
2300 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2302 swap_bsl_operands = 1;
2305 /* We check (a > b || b > a). combining these comparisons give us
2306 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2307 will then give us (a == b || a UNORDERED b) as intended. */
2309 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2310 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2311 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2312 swap_bsl_operands = 1;
2315 /* Operands are ORDERED iff (a > b || b >= a).
2316 Swapping the operands to BSL will give the UNORDERED case. */
2317 swap_bsl_operands = 1;
2320 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2321 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2322 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2328 if (swap_bsl_operands)
2334 /* If we have (a = (b CMP c) ? -1 : 0);
2335 Then we can simply move the generated mask. */
2337 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2338 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2339 emit_move_insn (operands[0], mask);
2343 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2345 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2346 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2353 (define_expand "vcond<mode><mode>"
2354 [(set (match_operand:VALL 0 "register_operand")
2356 (match_operator 3 "comparison_operator"
2357 [(match_operand:VALL 4 "register_operand")
2358 (match_operand:VALL 5 "nonmemory_operand")])
2359 (match_operand:VALL 1 "nonmemory_operand")
2360 (match_operand:VALL 2 "nonmemory_operand")))]
2363 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2364 operands[2], operands[3],
2365 operands[4], operands[5]));
2369 (define_expand "vcond<v_cmp_result><mode>"
2370 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2371 (if_then_else:<V_cmp_result>
2372 (match_operator 3 "comparison_operator"
2373 [(match_operand:VDQF 4 "register_operand")
2374 (match_operand:VDQF 5 "nonmemory_operand")])
2375 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2376 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2379 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2380 operands[0], operands[1],
2381 operands[2], operands[3],
2382 operands[4], operands[5]));
2386 (define_expand "vcondu<mode><mode>"
2387 [(set (match_operand:VDQ_I 0 "register_operand")
2389 (match_operator 3 "comparison_operator"
2390 [(match_operand:VDQ_I 4 "register_operand")
2391 (match_operand:VDQ_I 5 "nonmemory_operand")])
2392 (match_operand:VDQ_I 1 "nonmemory_operand")
2393 (match_operand:VDQ_I 2 "nonmemory_operand")))]
2396 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2397 operands[2], operands[3],
2398 operands[4], operands[5]));
2402 ;; Patterns for AArch64 SIMD Intrinsics.
2404 ;; Lane extraction with sign extension to general purpose register.
2405 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2406 [(set (match_operand:GPI 0 "register_operand" "=r")
2409 (match_operand:VDQQH 1 "register_operand" "w")
2410 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2413 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2414 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2416 [(set_attr "type" "neon_to_gp<q>")]
2419 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2420 [(set (match_operand:SI 0 "register_operand" "=r")
2423 (match_operand:VDQQH 1 "register_operand" "w")
2424 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2427 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2428 return "umov\\t%w0, %1.<Vetype>[%2]";
2430 [(set_attr "type" "neon_to_gp<q>")]
2433 ;; Lane extraction of a value, neither sign nor zero extension
2434 ;; is guaranteed so upper bits should be considered undefined.
2435 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2436 (define_insn "aarch64_get_lane<mode>"
2437 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2439 (match_operand:VALL 1 "register_operand" "w, w, w")
2440 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2443 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2444 switch (which_alternative)
2447 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2449 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2451 return "st1\\t{%1.<Vetype>}[%2], %0";
2456 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2459 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2462 (define_insn "*aarch64_combinez<mode>"
2463 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2465 (match_operand:VD_BHSI 1 "register_operand" "w")
2466 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz")))]
2467 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2468 "mov\\t%0.8b, %1.8b"
2469 [(set_attr "type" "neon_move<q>")]
2472 (define_insn "*aarch64_combinez_be<mode>"
2473 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2475 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz")
2476 (match_operand:VD_BHSI 1 "register_operand" "w")))]
2477 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2478 "mov\\t%0.8b, %1.8b"
2479 [(set_attr "type" "neon_move<q>")]
2482 (define_expand "aarch64_combine<mode>"
2483 [(match_operand:<VDBL> 0 "register_operand")
2484 (match_operand:VDC 1 "register_operand")
2485 (match_operand:VDC 2 "register_operand")]
2489 if (BYTES_BIG_ENDIAN)
2499 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2504 (define_insn_and_split "aarch64_combine_internal<mode>"
2505 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2506 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2507 (match_operand:VDC 2 "register_operand" "w")))]
2510 "&& reload_completed"
2513 if (BYTES_BIG_ENDIAN)
2514 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2516 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2519 [(set_attr "type" "multiple")]
2522 (define_expand "aarch64_simd_combine<mode>"
2523 [(match_operand:<VDBL> 0 "register_operand")
2524 (match_operand:VDC 1 "register_operand")
2525 (match_operand:VDC 2 "register_operand")]
2528 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2529 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2532 [(set_attr "type" "multiple")]
2535 ;; <su><addsub>l<q>.
2537 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2538 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2539 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2540 (match_operand:VQW 1 "register_operand" "w")
2541 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2542 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2543 (match_operand:VQW 2 "register_operand" "w")
2546 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2547 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2550 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2551 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2552 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2553 (match_operand:VQW 1 "register_operand" "w")
2554 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2555 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2556 (match_operand:VQW 2 "register_operand" "w")
2559 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2560 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2564 (define_expand "aarch64_saddl2<mode>"
2565 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2566 (match_operand:VQW 1 "register_operand" "w")
2567 (match_operand:VQW 2 "register_operand" "w")]
2570 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2571 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2576 (define_expand "aarch64_uaddl2<mode>"
2577 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2578 (match_operand:VQW 1 "register_operand" "w")
2579 (match_operand:VQW 2 "register_operand" "w")]
2582 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2583 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2588 (define_expand "aarch64_ssubl2<mode>"
2589 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2590 (match_operand:VQW 1 "register_operand" "w")
2591 (match_operand:VQW 2 "register_operand" "w")]
2594 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2595 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2600 (define_expand "aarch64_usubl2<mode>"
2601 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2602 (match_operand:VQW 1 "register_operand" "w")
2603 (match_operand:VQW 2 "register_operand" "w")]
2606 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2607 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2612 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2613 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2614 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2615 (match_operand:VD_BHSI 1 "register_operand" "w"))
2617 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2619 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2620 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2623 ;; <su><addsub>w<q>.
2625 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2626 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2627 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2629 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2631 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2632 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2635 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2636 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2637 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2640 (match_operand:VQW 2 "register_operand" "w")
2641 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2643 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2644 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2647 (define_expand "aarch64_saddw2<mode>"
2648 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2649 (match_operand:<VWIDE> 1 "register_operand" "w")
2650 (match_operand:VQW 2 "register_operand" "w")]
2653 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2654 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2659 (define_expand "aarch64_uaddw2<mode>"
2660 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2661 (match_operand:<VWIDE> 1 "register_operand" "w")
2662 (match_operand:VQW 2 "register_operand" "w")]
2665 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2666 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2672 (define_expand "aarch64_ssubw2<mode>"
2673 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2674 (match_operand:<VWIDE> 1 "register_operand" "w")
2675 (match_operand:VQW 2 "register_operand" "w")]
2678 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2679 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2684 (define_expand "aarch64_usubw2<mode>"
2685 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2686 (match_operand:<VWIDE> 1 "register_operand" "w")
2687 (match_operand:VQW 2 "register_operand" "w")]
2690 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2691 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2696 ;; <su><r>h<addsub>.
2698 (define_insn "aarch64_<sur>h<addsub><mode>"
2699 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2700 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
2701 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
2704 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2705 [(set_attr "type" "neon_<addsub>_halve<q>")]
2708 ;; <r><addsub>hn<q>.
2710 (define_insn "aarch64_<sur><addsub>hn<mode>"
2711 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2712 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
2713 (match_operand:VQN 2 "register_operand" "w")]
2716 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
2717 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2720 (define_insn "aarch64_<sur><addsub>hn2<mode>"
2721 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2722 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
2723 (match_operand:VQN 2 "register_operand" "w")
2724 (match_operand:VQN 3 "register_operand" "w")]
2727 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
2728 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2733 (define_insn "aarch64_pmul<mode>"
2734 [(set (match_operand:VB 0 "register_operand" "=w")
2735 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
2736 (match_operand:VB 2 "register_operand" "w")]
2739 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2740 [(set_attr "type" "neon_mul_<Vetype><q>")]
2745 (define_insn "aarch64_<su_optab><optab><mode>"
2746 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2747 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
2748 (match_operand:VSDQ_I 2 "register_operand" "w")))]
2750 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2751 [(set_attr "type" "neon_<optab><q>")]
2754 ;; suqadd and usqadd
2756 (define_insn "aarch64_<sur>qadd<mode>"
2757 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2758 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
2759 (match_operand:VSDQ_I 2 "register_operand" "w")]
2762 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
2763 [(set_attr "type" "neon_qadd<q>")]
2768 (define_insn "aarch64_sqmovun<mode>"
2769 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2770 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2773 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2774 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2777 ;; sqmovn and uqmovn
2779 (define_insn "aarch64_<sur>qmovn<mode>"
2780 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2781 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2784 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2785 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2790 (define_insn "aarch64_s<optab><mode>"
2791 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2793 (match_operand:VSDQ_I 1 "register_operand" "w")))]
2795 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
2796 [(set_attr "type" "neon_<optab><q>")]
2801 (define_insn "aarch64_sq<r>dmulh<mode>"
2802 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
2804 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
2805 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
2808 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2809 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
2814 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2815 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2817 [(match_operand:VDQHS 1 "register_operand" "w")
2819 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2820 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2824 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
2825 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2826 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2829 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2830 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2832 [(match_operand:VDQHS 1 "register_operand" "w")
2834 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2835 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2839 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
2840 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2841 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2844 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2845 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2847 [(match_operand:SD_HSI 1 "register_operand" "w")
2849 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2850 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2854 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
2855 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2856 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2859 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2860 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2862 [(match_operand:SD_HSI 1 "register_operand" "w")
2864 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2865 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2869 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
2870 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2871 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2876 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
2877 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2879 (match_operand:<VWIDE> 1 "register_operand" "0")
2882 (sign_extend:<VWIDE>
2883 (match_operand:VSD_HSI 2 "register_operand" "w"))
2884 (sign_extend:<VWIDE>
2885 (match_operand:VSD_HSI 3 "register_operand" "w")))
2888 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
2889 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
2894 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
2895 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2897 (match_operand:<VWIDE> 1 "register_operand" "0")
2900 (sign_extend:<VWIDE>
2901 (match_operand:VD_HSI 2 "register_operand" "w"))
2902 (sign_extend:<VWIDE>
2903 (vec_duplicate:VD_HSI
2905 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2906 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2911 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
2913 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2915 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2918 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
2919 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2921 (match_operand:<VWIDE> 1 "register_operand" "0")
2924 (sign_extend:<VWIDE>
2925 (match_operand:VD_HSI 2 "register_operand" "w"))
2926 (sign_extend:<VWIDE>
2927 (vec_duplicate:VD_HSI
2929 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2930 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2935 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
2937 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2939 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2942 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
2943 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2945 (match_operand:<VWIDE> 1 "register_operand" "0")
2948 (sign_extend:<VWIDE>
2949 (match_operand:SD_HSI 2 "register_operand" "w"))
2950 (sign_extend:<VWIDE>
2952 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2953 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2958 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
2960 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2962 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2965 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
2966 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2968 (match_operand:<VWIDE> 1 "register_operand" "0")
2971 (sign_extend:<VWIDE>
2972 (match_operand:SD_HSI 2 "register_operand" "w"))
2973 (sign_extend:<VWIDE>
2975 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2976 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2981 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
2983 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2985 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2990 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
2991 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2993 (match_operand:<VWIDE> 1 "register_operand" "0")
2996 (sign_extend:<VWIDE>
2997 (match_operand:VD_HSI 2 "register_operand" "w"))
2998 (sign_extend:<VWIDE>
2999 (vec_duplicate:VD_HSI
3000 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3003 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3004 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3009 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3010 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3012 (match_operand:<VWIDE> 1 "register_operand" "0")
3015 (sign_extend:<VWIDE>
3017 (match_operand:VQ_HSI 2 "register_operand" "w")
3018 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3019 (sign_extend:<VWIDE>
3021 (match_operand:VQ_HSI 3 "register_operand" "w")
3025 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3026 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3029 (define_expand "aarch64_sqdmlal2<mode>"
3030 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3031 (match_operand:<VWIDE> 1 "register_operand" "w")
3032 (match_operand:VQ_HSI 2 "register_operand" "w")
3033 (match_operand:VQ_HSI 3 "register_operand" "w")]
3036 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3037 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3038 operands[2], operands[3], p));
3042 (define_expand "aarch64_sqdmlsl2<mode>"
3043 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3044 (match_operand:<VWIDE> 1 "register_operand" "w")
3045 (match_operand:VQ_HSI 2 "register_operand" "w")
3046 (match_operand:VQ_HSI 3 "register_operand" "w")]
3049 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3050 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3051 operands[2], operands[3], p));
3057 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3058 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3060 (match_operand:<VWIDE> 1 "register_operand" "0")
3063 (sign_extend:<VWIDE>
3065 (match_operand:VQ_HSI 2 "register_operand" "w")
3066 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3067 (sign_extend:<VWIDE>
3068 (vec_duplicate:<VHALF>
3070 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3071 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3076 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3078 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3080 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3083 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3084 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3086 (match_operand:<VWIDE> 1 "register_operand" "0")
3089 (sign_extend:<VWIDE>
3091 (match_operand:VQ_HSI 2 "register_operand" "w")
3092 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3093 (sign_extend:<VWIDE>
3094 (vec_duplicate:<VHALF>
3096 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3097 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3102 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3104 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3106 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3109 (define_expand "aarch64_sqdmlal2_lane<mode>"
3110 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3111 (match_operand:<VWIDE> 1 "register_operand" "w")
3112 (match_operand:VQ_HSI 2 "register_operand" "w")
3113 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3114 (match_operand:SI 4 "immediate_operand" "i")]
3117 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3118 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3119 operands[2], operands[3],
3124 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3125 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3126 (match_operand:<VWIDE> 1 "register_operand" "w")
3127 (match_operand:VQ_HSI 2 "register_operand" "w")
3128 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3129 (match_operand:SI 4 "immediate_operand" "i")]
3132 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3133 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3134 operands[2], operands[3],
3139 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3140 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3141 (match_operand:<VWIDE> 1 "register_operand" "w")
3142 (match_operand:VQ_HSI 2 "register_operand" "w")
3143 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3144 (match_operand:SI 4 "immediate_operand" "i")]
3147 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3148 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3149 operands[2], operands[3],
3154 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3155 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3156 (match_operand:<VWIDE> 1 "register_operand" "w")
3157 (match_operand:VQ_HSI 2 "register_operand" "w")
3158 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3159 (match_operand:SI 4 "immediate_operand" "i")]
3162 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3163 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3164 operands[2], operands[3],
3169 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3170 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3172 (match_operand:<VWIDE> 1 "register_operand" "0")
3175 (sign_extend:<VWIDE>
3177 (match_operand:VQ_HSI 2 "register_operand" "w")
3178 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3179 (sign_extend:<VWIDE>
3180 (vec_duplicate:<VHALF>
3181 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3184 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3185 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3188 (define_expand "aarch64_sqdmlal2_n<mode>"
3189 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3190 (match_operand:<VWIDE> 1 "register_operand" "w")
3191 (match_operand:VQ_HSI 2 "register_operand" "w")
3192 (match_operand:<VEL> 3 "register_operand" "w")]
3195 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3196 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3197 operands[2], operands[3],
3202 (define_expand "aarch64_sqdmlsl2_n<mode>"
3203 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3204 (match_operand:<VWIDE> 1 "register_operand" "w")
3205 (match_operand:VQ_HSI 2 "register_operand" "w")
3206 (match_operand:<VEL> 3 "register_operand" "w")]
3209 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3210 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3211 operands[2], operands[3],
3218 (define_insn "aarch64_sqdmull<mode>"
3219 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3222 (sign_extend:<VWIDE>
3223 (match_operand:VSD_HSI 1 "register_operand" "w"))
3224 (sign_extend:<VWIDE>
3225 (match_operand:VSD_HSI 2 "register_operand" "w")))
3228 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3229 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3234 (define_insn "aarch64_sqdmull_lane<mode>"
3235 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3238 (sign_extend:<VWIDE>
3239 (match_operand:VD_HSI 1 "register_operand" "w"))
3240 (sign_extend:<VWIDE>
3241 (vec_duplicate:VD_HSI
3243 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3244 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3249 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3250 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3252 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3255 (define_insn "aarch64_sqdmull_laneq<mode>"
3256 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3259 (sign_extend:<VWIDE>
3260 (match_operand:VD_HSI 1 "register_operand" "w"))
3261 (sign_extend:<VWIDE>
3262 (vec_duplicate:VD_HSI
3264 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3265 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3270 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3271 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3273 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3276 (define_insn "aarch64_sqdmull_lane<mode>"
3277 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3280 (sign_extend:<VWIDE>
3281 (match_operand:SD_HSI 1 "register_operand" "w"))
3282 (sign_extend:<VWIDE>
3284 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3285 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3290 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3291 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3293 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3296 (define_insn "aarch64_sqdmull_laneq<mode>"
3297 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3300 (sign_extend:<VWIDE>
3301 (match_operand:SD_HSI 1 "register_operand" "w"))
3302 (sign_extend:<VWIDE>
3304 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3305 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3310 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3311 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3313 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3318 (define_insn "aarch64_sqdmull_n<mode>"
3319 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3322 (sign_extend:<VWIDE>
3323 (match_operand:VD_HSI 1 "register_operand" "w"))
3324 (sign_extend:<VWIDE>
3325 (vec_duplicate:VD_HSI
3326 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3330 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3331 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3338 (define_insn "aarch64_sqdmull2<mode>_internal"
3339 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3342 (sign_extend:<VWIDE>
3344 (match_operand:VQ_HSI 1 "register_operand" "w")
3345 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3346 (sign_extend:<VWIDE>
3348 (match_operand:VQ_HSI 2 "register_operand" "w")
3353 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3354 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3357 (define_expand "aarch64_sqdmull2<mode>"
3358 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3359 (match_operand:VQ_HSI 1 "register_operand" "w")
3360 (match_operand:VQ_HSI 2 "register_operand" "w")]
3363 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3364 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3371 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3372 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3375 (sign_extend:<VWIDE>
3377 (match_operand:VQ_HSI 1 "register_operand" "w")
3378 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3379 (sign_extend:<VWIDE>
3380 (vec_duplicate:<VHALF>
3382 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3383 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3388 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3389 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3391 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3394 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3395 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3398 (sign_extend:<VWIDE>
3400 (match_operand:VQ_HSI 1 "register_operand" "w")
3401 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3402 (sign_extend:<VWIDE>
3403 (vec_duplicate:<VHALF>
3405 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3406 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3411 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3412 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3414 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3417 (define_expand "aarch64_sqdmull2_lane<mode>"
3418 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3419 (match_operand:VQ_HSI 1 "register_operand" "w")
3420 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3421 (match_operand:SI 3 "immediate_operand" "i")]
3424 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3425 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3426 operands[2], operands[3],
3431 (define_expand "aarch64_sqdmull2_laneq<mode>"
3432 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3433 (match_operand:VQ_HSI 1 "register_operand" "w")
3434 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3435 (match_operand:SI 3 "immediate_operand" "i")]
3438 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3439 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3440 operands[2], operands[3],
3447 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3448 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3451 (sign_extend:<VWIDE>
3453 (match_operand:VQ_HSI 1 "register_operand" "w")
3454 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3455 (sign_extend:<VWIDE>
3456 (vec_duplicate:<VHALF>
3457 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3461 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3462 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3465 (define_expand "aarch64_sqdmull2_n<mode>"
3466 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3467 (match_operand:VQ_HSI 1 "register_operand" "w")
3468 (match_operand:<VEL> 2 "register_operand" "w")]
3471 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3472 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3479 (define_insn "aarch64_<sur>shl<mode>"
3480 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3482 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3483 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3486 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3487 [(set_attr "type" "neon_shift_reg<q>")]
3493 (define_insn "aarch64_<sur>q<r>shl<mode>"
3494 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3496 [(match_operand:VSDQ_I 1 "register_operand" "w")
3497 (match_operand:VSDQ_I 2 "register_operand" "w")]
3500 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3501 [(set_attr "type" "neon_sat_shift_reg<q>")]
3506 (define_insn "aarch64_<sur>shll_n<mode>"
3507 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3508 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
3510 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
3514 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3515 if (INTVAL (operands[2]) == bit_width)
3517 return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3520 return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3522 [(set_attr "type" "neon_shift_imm_long")]
3527 (define_insn "aarch64_<sur>shll2_n<mode>"
3528 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3529 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
3530 (match_operand:SI 2 "immediate_operand" "i")]
3534 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3535 if (INTVAL (operands[2]) == bit_width)
3537 return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3540 return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3542 [(set_attr "type" "neon_shift_imm_long")]
3547 (define_insn "aarch64_<sur>shr_n<mode>"
3548 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3549 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3551 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3554 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3555 [(set_attr "type" "neon_sat_shift_imm<q>")]
3560 (define_insn "aarch64_<sur>sra_n<mode>"
3561 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3562 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3563 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3565 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3568 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3569 [(set_attr "type" "neon_shift_acc<q>")]
3574 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
3575 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3576 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3577 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3579 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
3582 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3583 [(set_attr "type" "neon_shift_imm<q>")]
3588 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
3589 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3590 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
3592 "aarch64_simd_shift_imm_<ve_mode>" "i")]
3595 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3596 [(set_attr "type" "neon_sat_shift_imm<q>")]
3602 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
3603 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3604 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
3606 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3609 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
3610 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3614 ;; cm(eq|ge|gt|lt|le)
3615 ;; Note, we have constraints for Dz and Z as different expanders
3616 ;; have different ideas of what should be passed to this pattern.
3618 (define_insn "aarch64_cm<optab><mode>"
3619 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3621 (COMPARISONS:<V_cmp_result>
3622 (match_operand:VDQ_I 1 "register_operand" "w,w")
3623 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3627 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3628 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
3629 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
3632 (define_insn_and_split "aarch64_cm<optab>di"
3633 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
3636 (match_operand:DI 1 "register_operand" "w,w,r")
3637 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
3639 (clobber (reg:CC CC_REGNUM))]
3643 [(set (match_operand:DI 0 "register_operand")
3646 (match_operand:DI 1 "register_operand")
3647 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
3650 /* If we are in the general purpose register file,
3651 we split to a sequence of comparison and store. */
3652 if (GP_REGNUM_P (REGNO (operands[0]))
3653 && GP_REGNUM_P (REGNO (operands[1])))
3655 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
3656 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3657 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3658 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3661 /* Otherwise, we expand to a similar pattern which does not
3662 clobber CC_REGNUM. */
3664 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
3667 (define_insn "*aarch64_cm<optab>di"
3668 [(set (match_operand:DI 0 "register_operand" "=w,w")
3671 (match_operand:DI 1 "register_operand" "w,w")
3672 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3674 "TARGET_SIMD && reload_completed"
3676 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
3677 cm<optab>\t%d0, %d1, #0"
3678 [(set_attr "type" "neon_compare, neon_compare_zero")]
3683 (define_insn "aarch64_cm<optab><mode>"
3684 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3686 (UCOMPARISONS:<V_cmp_result>
3687 (match_operand:VDQ_I 1 "register_operand" "w")
3688 (match_operand:VDQ_I 2 "register_operand" "w")
3691 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3692 [(set_attr "type" "neon_compare<q>")]
3695 (define_insn_and_split "aarch64_cm<optab>di"
3696 [(set (match_operand:DI 0 "register_operand" "=w,r")
3699 (match_operand:DI 1 "register_operand" "w,r")
3700 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
3702 (clobber (reg:CC CC_REGNUM))]
3706 [(set (match_operand:DI 0 "register_operand")
3709 (match_operand:DI 1 "register_operand")
3710 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
3713 /* If we are in the general purpose register file,
3714 we split to a sequence of comparison and store. */
3715 if (GP_REGNUM_P (REGNO (operands[0]))
3716 && GP_REGNUM_P (REGNO (operands[1])))
3718 machine_mode mode = CCmode;
3719 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3720 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3721 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3724 /* Otherwise, we expand to a similar pattern which does not
3725 clobber CC_REGNUM. */
3727 [(set_attr "type" "neon_compare,multiple")]
3730 (define_insn "*aarch64_cm<optab>di"
3731 [(set (match_operand:DI 0 "register_operand" "=w")
3734 (match_operand:DI 1 "register_operand" "w")
3735 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
3737 "TARGET_SIMD && reload_completed"
3738 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
3739 [(set_attr "type" "neon_compare")]
3744 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
3745 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
3746 ;; not (neg (eq (and x y) 0))
3747 ;; which is rewritten by simplify_rtx as
3748 ;; plus (eq (and x y) 0) -1.
3750 (define_insn "aarch64_cmtst<mode>"
3751 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3752 (plus:<V_cmp_result>
3755 (match_operand:VDQ_I 1 "register_operand" "w")
3756 (match_operand:VDQ_I 2 "register_operand" "w"))
3757 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
3758 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
3761 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3762 [(set_attr "type" "neon_tst<q>")]
3765 (define_insn_and_split "aarch64_cmtstdi"
3766 [(set (match_operand:DI 0 "register_operand" "=w,r")
3770 (match_operand:DI 1 "register_operand" "w,r")
3771 (match_operand:DI 2 "register_operand" "w,r"))
3773 (clobber (reg:CC CC_REGNUM))]
3777 [(set (match_operand:DI 0 "register_operand")
3781 (match_operand:DI 1 "register_operand")
3782 (match_operand:DI 2 "register_operand"))
3785 /* If we are in the general purpose register file,
3786 we split to a sequence of comparison and store. */
3787 if (GP_REGNUM_P (REGNO (operands[0]))
3788 && GP_REGNUM_P (REGNO (operands[1])))
3790 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
3791 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
3792 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
3793 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
3794 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3797 /* Otherwise, we expand to a similar pattern which does not
3798 clobber CC_REGNUM. */
3800 [(set_attr "type" "neon_tst,multiple")]
3803 (define_insn "*aarch64_cmtstdi"
3804 [(set (match_operand:DI 0 "register_operand" "=w")
3808 (match_operand:DI 1 "register_operand" "w")
3809 (match_operand:DI 2 "register_operand" "w"))
3812 "cmtst\t%d0, %d1, %d2"
3813 [(set_attr "type" "neon_tst")]
3816 ;; fcm(eq|ge|gt|le|lt)
3818 (define_insn "aarch64_cm<optab><mode>"
3819 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3821 (COMPARISONS:<V_cmp_result>
3822 (match_operand:VALLF 1 "register_operand" "w,w")
3823 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
3827 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3828 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
3829 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3833 ;; Note we can also handle what would be fac(le|lt) by
3834 ;; generating fac(ge|gt).
3836 (define_insn "*aarch64_fac<optab><mode>"
3837 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3839 (FAC_COMPARISONS:<V_cmp_result>
3840 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
3841 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
3844 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3845 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3850 (define_insn "aarch64_addp<mode>"
3851 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
3853 [(match_operand:VD_BHSI 1 "register_operand" "w")
3854 (match_operand:VD_BHSI 2 "register_operand" "w")]
3857 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3858 [(set_attr "type" "neon_reduc_add<q>")]
3861 (define_insn "aarch64_addpdi"
3862 [(set (match_operand:DI 0 "register_operand" "=w")
3864 [(match_operand:V2DI 1 "register_operand" "w")]
3868 [(set_attr "type" "neon_reduc_add")]
3873 (define_insn "sqrt<mode>2"
3874 [(set (match_operand:VDQF 0 "register_operand" "=w")
3875 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
3877 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
3878 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
3881 ;; Patterns for vector struct loads and stores.
3883 (define_insn "vec_load_lanesoi<mode>"
3884 [(set (match_operand:OI 0 "register_operand" "=w")
3885 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
3886 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3889 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3890 [(set_attr "type" "neon_load2_2reg<q>")]
3893 (define_insn "aarch64_simd_ld2r<mode>"
3894 [(set (match_operand:OI 0 "register_operand" "=w")
3895 (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3896 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3899 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3900 [(set_attr "type" "neon_load2_all_lanes<q>")]
3903 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
3904 [(set (match_operand:OI 0 "register_operand" "=w")
3905 (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3906 (match_operand:OI 2 "register_operand" "0")
3907 (match_operand:SI 3 "immediate_operand" "i")
3908 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3911 "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"
3912 [(set_attr "type" "neon_load2_one_lane")]
3915 (define_insn "vec_store_lanesoi<mode>"
3916 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3917 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
3918 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3921 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
3922 [(set_attr "type" "neon_store2_2reg<q>")]
3925 (define_insn "vec_store_lanesoi_lane<mode>"
3926 [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
3927 (unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
3928 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
3929 (match_operand:SI 2 "immediate_operand" "i")]
3932 "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"
3933 [(set_attr "type" "neon_store3_one_lane<q>")]
3936 (define_insn "vec_load_lanesci<mode>"
3937 [(set (match_operand:CI 0 "register_operand" "=w")
3938 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
3939 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3942 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3943 [(set_attr "type" "neon_load3_3reg<q>")]
3946 (define_insn "aarch64_simd_ld3r<mode>"
3947 [(set (match_operand:CI 0 "register_operand" "=w")
3948 (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3949 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3952 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3953 [(set_attr "type" "neon_load3_all_lanes<q>")]
3956 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
3957 [(set (match_operand:CI 0 "register_operand" "=w")
3958 (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3959 (match_operand:CI 2 "register_operand" "0")
3960 (match_operand:SI 3 "immediate_operand" "i")
3961 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3964 "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"
3965 [(set_attr "type" "neon_load3_one_lane")]
3968 (define_insn "vec_store_lanesci<mode>"
3969 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
3970 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
3971 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3974 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
3975 [(set_attr "type" "neon_store3_3reg<q>")]
3978 (define_insn "vec_store_lanesci_lane<mode>"
3979 [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
3980 (unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
3981 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
3982 (match_operand:SI 2 "immediate_operand" "i")]
3985 "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"
3986 [(set_attr "type" "neon_store3_one_lane<q>")]
3989 (define_insn "vec_load_lanesxi<mode>"
3990 [(set (match_operand:XI 0 "register_operand" "=w")
3991 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
3992 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3995 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
3996 [(set_attr "type" "neon_load4_4reg<q>")]
3999 (define_insn "aarch64_simd_ld4r<mode>"
4000 [(set (match_operand:XI 0 "register_operand" "=w")
4001 (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
4002 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4005 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4006 [(set_attr "type" "neon_load4_all_lanes<q>")]
4009 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4010 [(set (match_operand:XI 0 "register_operand" "=w")
4011 (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
4012 (match_operand:XI 2 "register_operand" "0")
4013 (match_operand:SI 3 "immediate_operand" "i")
4014 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4017 "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"
4018 [(set_attr "type" "neon_load4_one_lane")]
4021 (define_insn "vec_store_lanesxi<mode>"
4022 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4023 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4024 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4027 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4028 [(set_attr "type" "neon_store4_4reg<q>")]
4031 (define_insn "vec_store_lanesxi_lane<mode>"
4032 [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
4033 (unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
4034 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4035 (match_operand:SI 2 "immediate_operand" "i")]
4038 "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"
4039 [(set_attr "type" "neon_store4_one_lane<q>")]
4042 ;; Reload patterns for AdvSIMD register list operands.
4044 (define_expand "mov<mode>"
4045 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4046 (match_operand:VSTRUCT 1 "general_operand" ""))]
4049 if (can_create_pseudo_p ())
4051 if (GET_CODE (operands[0]) != REG)
4052 operands[1] = force_reg (<MODE>mode, operands[1]);
4056 (define_insn "*aarch64_mov<mode>"
4057 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4058 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4059 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4060 && (register_operand (operands[0], <MODE>mode)
4061 || register_operand (operands[1], <MODE>mode))"
4064 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4065 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4066 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4067 neon_load<nregs>_<nregs>reg_q")
4068 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4071 (define_insn "aarch64_be_ld1<mode>"
4072 [(set (match_operand:VALLDI 0 "register_operand" "=w")
4073 (unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
4076 "ld1\\t{%0<Vmtype>}, %1"
4077 [(set_attr "type" "neon_load1_1reg<q>")]
4080 (define_insn "aarch64_be_st1<mode>"
4081 [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
4082 (unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
4085 "st1\\t{%1<Vmtype>}, %0"
4086 [(set_attr "type" "neon_store1_1reg<q>")]
4089 (define_insn "*aarch64_be_movoi"
4090 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4091 (match_operand:OI 1 "general_operand" " w,w,m"))]
4092 "TARGET_SIMD && BYTES_BIG_ENDIAN
4093 && (register_operand (operands[0], OImode)
4094 || register_operand (operands[1], OImode))"
4099 [(set_attr "type" "multiple,neon_store2_2reg_q,neon_load2_2reg_q")
4100 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4103 (define_insn "*aarch64_be_movci"
4104 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4105 (match_operand:CI 1 "general_operand" " w,w,o"))]
4106 "TARGET_SIMD && BYTES_BIG_ENDIAN
4107 && (register_operand (operands[0], CImode)
4108 || register_operand (operands[1], CImode))"
4110 [(set_attr "type" "multiple")
4111 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4114 (define_insn "*aarch64_be_movxi"
4115 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4116 (match_operand:XI 1 "general_operand" " w,w,o"))]
4117 "TARGET_SIMD && BYTES_BIG_ENDIAN
4118 && (register_operand (operands[0], XImode)
4119 || register_operand (operands[1], XImode))"
4121 [(set_attr "type" "multiple")
4122 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4126 [(set (match_operand:OI 0 "register_operand")
4127 (match_operand:OI 1 "register_operand"))]
4128 "TARGET_SIMD && reload_completed"
4131 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4136 [(set (match_operand:CI 0 "nonimmediate_operand")
4137 (match_operand:CI 1 "general_operand"))]
4138 "TARGET_SIMD && reload_completed"
4141 if (register_operand (operands[0], CImode)
4142 && register_operand (operands[1], CImode))
4144 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4147 else if (BYTES_BIG_ENDIAN)
4149 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4150 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4151 emit_move_insn (gen_lowpart (V16QImode,
4152 simplify_gen_subreg (TImode, operands[0],
4154 gen_lowpart (V16QImode,
4155 simplify_gen_subreg (TImode, operands[1],
4164 [(set (match_operand:XI 0 "nonimmediate_operand")
4165 (match_operand:XI 1 "general_operand"))]
4166 "TARGET_SIMD && reload_completed"
4169 if (register_operand (operands[0], XImode)
4170 && register_operand (operands[1], XImode))
4172 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4175 else if (BYTES_BIG_ENDIAN)
4177 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4178 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4179 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4180 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4187 (define_expand "aarch64_ld2r<mode>"
4188 [(match_operand:OI 0 "register_operand" "=w")
4189 (match_operand:DI 1 "register_operand" "w")
4190 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4193 machine_mode mode = <V_TWO_ELEM>mode;
4194 rtx mem = gen_rtx_MEM (mode, operands[1]);
4196 emit_insn (gen_aarch64_simd_ld2r<mode> (operands[0], mem));
4200 (define_expand "aarch64_ld3r<mode>"
4201 [(match_operand:CI 0 "register_operand" "=w")
4202 (match_operand:DI 1 "register_operand" "w")
4203 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4206 machine_mode mode = <V_THREE_ELEM>mode;
4207 rtx mem = gen_rtx_MEM (mode, operands[1]);
4209 emit_insn (gen_aarch64_simd_ld3r<mode> (operands[0], mem));
4213 (define_expand "aarch64_ld4r<mode>"
4214 [(match_operand:XI 0 "register_operand" "=w")
4215 (match_operand:DI 1 "register_operand" "w")
4216 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4219 machine_mode mode = <V_FOUR_ELEM>mode;
4220 rtx mem = gen_rtx_MEM (mode, operands[1]);
4222 emit_insn (gen_aarch64_simd_ld4r<mode> (operands[0],mem));
4226 (define_insn "aarch64_ld2<mode>_dreg"
4227 [(set (match_operand:OI 0 "register_operand" "=w")
4231 (unspec:VD [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
4233 (vec_duplicate:VD (const_int 0)))
4235 (unspec:VD [(match_dup 1)]
4237 (vec_duplicate:VD (const_int 0)))) 0))]
4239 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4240 [(set_attr "type" "neon_load2_2reg<q>")]
4243 (define_insn "aarch64_ld2<mode>_dreg"
4244 [(set (match_operand:OI 0 "register_operand" "=w")
4248 (unspec:DX [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
4252 (unspec:DX [(match_dup 1)]
4254 (const_int 0))) 0))]
4256 "ld1\\t{%S0.1d - %T0.1d}, %1"
4257 [(set_attr "type" "neon_load1_2reg<q>")]
4260 (define_insn "aarch64_ld3<mode>_dreg"
4261 [(set (match_operand:CI 0 "register_operand" "=w")
4266 (unspec:VD [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
4268 (vec_duplicate:VD (const_int 0)))
4270 (unspec:VD [(match_dup 1)]
4272 (vec_duplicate:VD (const_int 0))))
4274 (unspec:VD [(match_dup 1)]
4276 (vec_duplicate:VD (const_int 0)))) 0))]
4278 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4279 [(set_attr "type" "neon_load3_3reg<q>")]
4282 (define_insn "aarch64_ld3<mode>_dreg"
4283 [(set (match_operand:CI 0 "register_operand" "=w")
4288 (unspec:DX [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
4292 (unspec:DX [(match_dup 1)]
4296 (unspec:DX [(match_dup 1)]
4298 (const_int 0))) 0))]
4300 "ld1\\t{%S0.1d - %U0.1d}, %1"
4301 [(set_attr "type" "neon_load1_3reg<q>")]
4304 (define_insn "aarch64_ld4<mode>_dreg"
4305 [(set (match_operand:XI 0 "register_operand" "=w")
4310 (unspec:VD [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
4312 (vec_duplicate:VD (const_int 0)))
4314 (unspec:VD [(match_dup 1)]
4316 (vec_duplicate:VD (const_int 0))))
4319 (unspec:VD [(match_dup 1)]
4321 (vec_duplicate:VD (const_int 0)))
4323 (unspec:VD [(match_dup 1)]
4325 (vec_duplicate:VD (const_int 0))))) 0))]
4327 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4328 [(set_attr "type" "neon_load4_4reg<q>")]
4331 (define_insn "aarch64_ld4<mode>_dreg"
4332 [(set (match_operand:XI 0 "register_operand" "=w")
4337 (unspec:DX [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
4341 (unspec:DX [(match_dup 1)]
4346 (unspec:DX [(match_dup 1)]
4350 (unspec:DX [(match_dup 1)]
4352 (const_int 0)))) 0))]
4354 "ld1\\t{%S0.1d - %V0.1d}, %1"
4355 [(set_attr "type" "neon_load1_4reg<q>")]
4358 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
4359 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4360 (match_operand:DI 1 "register_operand" "r")
4361 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4364 machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
4365 rtx mem = gen_rtx_MEM (mode, operands[1]);
4367 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
4371 (define_expand "aarch64_ld1<VALL:mode>"
4372 [(match_operand:VALL 0 "register_operand")
4373 (match_operand:DI 1 "register_operand")]
4376 machine_mode mode = <VALL:MODE>mode;
4377 rtx mem = gen_rtx_MEM (mode, operands[1]);
4379 if (BYTES_BIG_ENDIAN)
4380 emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem));
4382 emit_move_insn (operands[0], mem);
4386 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
4387 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4388 (match_operand:DI 1 "register_operand" "r")
4389 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4392 machine_mode mode = <VSTRUCT:MODE>mode;
4393 rtx mem = gen_rtx_MEM (mode, operands[1]);
4395 emit_insn (gen_vec_load_lanes<VSTRUCT:mode><VQ:mode> (operands[0], mem));
4399 (define_expand "aarch64_ld2_lane<mode>"
4400 [(match_operand:OI 0 "register_operand" "=w")
4401 (match_operand:DI 1 "register_operand" "w")
4402 (match_operand:OI 2 "register_operand" "0")
4403 (match_operand:SI 3 "immediate_operand" "i")
4404 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4407 machine_mode mode = <V_TWO_ELEM>mode;
4408 rtx mem = gen_rtx_MEM (mode, operands[1]);
4410 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4412 emit_insn (gen_aarch64_vec_load_lanesoi_lane<mode> (operands[0],
4419 (define_expand "aarch64_ld3_lane<mode>"
4420 [(match_operand:CI 0 "register_operand" "=w")
4421 (match_operand:DI 1 "register_operand" "w")
4422 (match_operand:CI 2 "register_operand" "0")
4423 (match_operand:SI 3 "immediate_operand" "i")
4424 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4427 machine_mode mode = <V_THREE_ELEM>mode;
4428 rtx mem = gen_rtx_MEM (mode, operands[1]);
4430 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4432 emit_insn (gen_aarch64_vec_load_lanesci_lane<mode> (operands[0],
4439 (define_expand "aarch64_ld4_lane<mode>"
4440 [(match_operand:XI 0 "register_operand" "=w")
4441 (match_operand:DI 1 "register_operand" "w")
4442 (match_operand:XI 2 "register_operand" "0")
4443 (match_operand:SI 3 "immediate_operand" "i")
4444 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4447 machine_mode mode = <V_FOUR_ELEM>mode;
4448 rtx mem = gen_rtx_MEM (mode, operands[1]);
4450 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4452 emit_insn (gen_aarch64_vec_load_lanesxi_lane<mode> (operands[0],
4461 ;; Expanders for builtins to extract vector registers from large
4462 ;; opaque integer modes.
4466 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
4467 [(match_operand:VDC 0 "register_operand" "=w")
4468 (match_operand:VSTRUCT 1 "register_operand" "w")
4469 (match_operand:SI 2 "immediate_operand" "i")]
4472 int part = INTVAL (operands[2]);
4473 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
4474 int offset = part * 16;
4476 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
4477 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
4483 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
4484 [(match_operand:VQ 0 "register_operand" "=w")
4485 (match_operand:VSTRUCT 1 "register_operand" "w")
4486 (match_operand:SI 2 "immediate_operand" "i")]
4489 int part = INTVAL (operands[2]);
4490 int offset = part * 16;
4492 emit_move_insn (operands[0],
4493 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
4497 ;; Permuted-store expanders for neon intrinsics.
4499 ;; Permute instructions
4503 (define_expand "vec_perm_const<mode>"
4504 [(match_operand:VALL 0 "register_operand")
4505 (match_operand:VALL 1 "register_operand")
4506 (match_operand:VALL 2 "register_operand")
4507 (match_operand:<V_cmp_result> 3)]
4510 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
4511 operands[2], operands[3]))
4517 (define_expand "vec_perm<mode>"
4518 [(match_operand:VB 0 "register_operand")
4519 (match_operand:VB 1 "register_operand")
4520 (match_operand:VB 2 "register_operand")
4521 (match_operand:VB 3 "register_operand")]
4524 aarch64_expand_vec_perm (operands[0], operands[1],
4525 operands[2], operands[3]);
4529 (define_insn "aarch64_tbl1<mode>"
4530 [(set (match_operand:VB 0 "register_operand" "=w")
4531 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
4532 (match_operand:VB 2 "register_operand" "w")]
4535 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
4536 [(set_attr "type" "neon_tbl1<q>")]
4539 ;; Two source registers.
4541 (define_insn "aarch64_tbl2v16qi"
4542 [(set (match_operand:V16QI 0 "register_operand" "=w")
4543 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
4544 (match_operand:V16QI 2 "register_operand" "w")]
4547 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
4548 [(set_attr "type" "neon_tbl2_q")]
4551 (define_insn_and_split "aarch64_combinev16qi"
4552 [(set (match_operand:OI 0 "register_operand" "=w")
4553 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
4554 (match_operand:V16QI 2 "register_operand" "w")]
4558 "&& reload_completed"
4561 aarch64_split_combinev16qi (operands);
4564 [(set_attr "type" "multiple")]
4567 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
4568 [(set (match_operand:VALL 0 "register_operand" "=w")
4569 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
4570 (match_operand:VALL 2 "register_operand" "w")]
4573 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4574 [(set_attr "type" "neon_permute<q>")]
4577 ;; Note immediate (third) operand is lane index not byte index.
4578 (define_insn "aarch64_ext<mode>"
4579 [(set (match_operand:VALL 0 "register_operand" "=w")
4580 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
4581 (match_operand:VALL 2 "register_operand" "w")
4582 (match_operand:SI 3 "immediate_operand" "i")]
4586 operands[3] = GEN_INT (INTVAL (operands[3])
4587 * GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
4588 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
4590 [(set_attr "type" "neon_ext<q>")]
4593 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
4594 [(set (match_operand:VALL 0 "register_operand" "=w")
4595 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")]
4598 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
4599 [(set_attr "type" "neon_rev<q>")]
4602 (define_insn "aarch64_st2<mode>_dreg"
4603 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
4604 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
4605 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4608 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4609 [(set_attr "type" "neon_store2_2reg")]
4612 (define_insn "aarch64_st2<mode>_dreg"
4613 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
4614 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
4615 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4618 "st1\\t{%S1.1d - %T1.1d}, %0"
4619 [(set_attr "type" "neon_store1_2reg")]
4622 (define_insn "aarch64_st3<mode>_dreg"
4623 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
4624 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
4625 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4628 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4629 [(set_attr "type" "neon_store3_3reg")]
4632 (define_insn "aarch64_st3<mode>_dreg"
4633 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
4634 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
4635 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4638 "st1\\t{%S1.1d - %U1.1d}, %0"
4639 [(set_attr "type" "neon_store1_3reg")]
4642 (define_insn "aarch64_st4<mode>_dreg"
4643 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4644 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
4645 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4648 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4649 [(set_attr "type" "neon_store4_4reg")]
4652 (define_insn "aarch64_st4<mode>_dreg"
4653 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4654 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
4655 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4658 "st1\\t{%S1.1d - %V1.1d}, %0"
4659 [(set_attr "type" "neon_store1_4reg")]
4662 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
4663 [(match_operand:DI 0 "register_operand" "r")
4664 (match_operand:VSTRUCT 1 "register_operand" "w")
4665 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4668 machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
4669 rtx mem = gen_rtx_MEM (mode, operands[0]);
4671 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
4675 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
4676 [(match_operand:DI 0 "register_operand" "r")
4677 (match_operand:VSTRUCT 1 "register_operand" "w")
4678 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4681 machine_mode mode = <VSTRUCT:MODE>mode;
4682 rtx mem = gen_rtx_MEM (mode, operands[0]);
4684 emit_insn (gen_vec_store_lanes<VSTRUCT:mode><VQ:mode> (mem, operands[1]));
4688 (define_expand "aarch64_st2_lane<VQ:mode>"
4689 [(match_operand:DI 0 "register_operand" "r")
4690 (match_operand:OI 1 "register_operand" "w")
4691 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4692 (match_operand:SI 2 "immediate_operand")]
4695 machine_mode mode = <V_TWO_ELEM>mode;
4696 rtx mem = gen_rtx_MEM (mode, operands[0]);
4697 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4699 emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem,
4705 (define_expand "aarch64_st3_lane<VQ:mode>"
4706 [(match_operand:DI 0 "register_operand" "r")
4707 (match_operand:CI 1 "register_operand" "w")
4708 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4709 (match_operand:SI 2 "immediate_operand")]
4712 machine_mode mode = <V_THREE_ELEM>mode;
4713 rtx mem = gen_rtx_MEM (mode, operands[0]);
4714 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4716 emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem,
4722 (define_expand "aarch64_st4_lane<VQ:mode>"
4723 [(match_operand:DI 0 "register_operand" "r")
4724 (match_operand:XI 1 "register_operand" "w")
4725 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4726 (match_operand:SI 2 "immediate_operand")]
4729 machine_mode mode = <V_FOUR_ELEM>mode;
4730 rtx mem = gen_rtx_MEM (mode, operands[0]);
4731 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4733 emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem,
4739 (define_expand "aarch64_st1<VALL:mode>"
4740 [(match_operand:DI 0 "register_operand")
4741 (match_operand:VALL 1 "register_operand")]
4744 machine_mode mode = <VALL:MODE>mode;
4745 rtx mem = gen_rtx_MEM (mode, operands[0]);
4747 if (BYTES_BIG_ENDIAN)
4748 emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1]));
4750 emit_move_insn (mem, operands[1]);
4754 ;; Expander for builtins to insert vector registers into large
4755 ;; opaque integer modes.
4757 ;; Q-register list. We don't need a D-reg inserter as we zero
4758 ;; extend them in arm_neon.h and insert the resulting Q-regs.
4760 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
4761 [(match_operand:VSTRUCT 0 "register_operand" "+w")
4762 (match_operand:VSTRUCT 1 "register_operand" "0")
4763 (match_operand:VQ 2 "register_operand" "w")
4764 (match_operand:SI 3 "immediate_operand" "i")]
4767 int part = INTVAL (operands[3]);
4768 int offset = part * 16;
4770 emit_move_insn (operands[0], operands[1]);
4771 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
4776 ;; Standard pattern name vec_init<mode>.
4778 (define_expand "vec_init<mode>"
4779 [(match_operand:VALL 0 "register_operand" "")
4780 (match_operand 1 "" "")]
4783 aarch64_expand_vector_init (operands[0], operands[1]);
4787 (define_insn "*aarch64_simd_ld1r<mode>"
4788 [(set (match_operand:VALL 0 "register_operand" "=w")
4790 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
4792 "ld1r\\t{%0.<Vtype>}, %1"
4793 [(set_attr "type" "neon_load1_all_lanes")]
4796 (define_insn "aarch64_frecpe<mode>"
4797 [(set (match_operand:VDQF 0 "register_operand" "=w")
4798 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
4801 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
4802 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
4805 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
4806 [(set (match_operand:GPF 0 "register_operand" "=w")
4807 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
4810 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
4811 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
4814 (define_insn "aarch64_frecps<mode>"
4815 [(set (match_operand:VALLF 0 "register_operand" "=w")
4816 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
4817 (match_operand:VALLF 2 "register_operand" "w")]
4820 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4821 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
4824 (define_insn "aarch64_urecpe<mode>"
4825 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
4826 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
4829 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
4830 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
4832 ;; Standard pattern name vec_extract<mode>.
4834 (define_expand "vec_extract<mode>"
4835 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
4836 (match_operand:VALL 1 "register_operand" "")
4837 (match_operand:SI 2 "immediate_operand" "")]
4841 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
4847 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
4848 [(set (match_operand:V16QI 0 "register_operand" "=w")
4849 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4850 (match_operand:V16QI 2 "register_operand" "w")]
4852 "TARGET_SIMD && TARGET_CRYPTO"
4853 "aes<aes_op>\\t%0.16b, %2.16b"
4854 [(set_attr "type" "crypto_aese")]
4857 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
4858 [(set (match_operand:V16QI 0 "register_operand" "=w")
4859 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
4861 "TARGET_SIMD && TARGET_CRYPTO"
4862 "aes<aesmc_op>\\t%0.16b, %1.16b"
4863 [(set_attr "type" "crypto_aesmc")]
4868 (define_insn "aarch64_crypto_sha1hsi"
4869 [(set (match_operand:SI 0 "register_operand" "=w")
4870 (unspec:SI [(match_operand:SI 1
4871 "register_operand" "w")]
4873 "TARGET_SIMD && TARGET_CRYPTO"
4875 [(set_attr "type" "crypto_sha1_fast")]
4878 (define_insn "aarch64_crypto_sha1su1v4si"
4879 [(set (match_operand:V4SI 0 "register_operand" "=w")
4880 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4881 (match_operand:V4SI 2 "register_operand" "w")]
4883 "TARGET_SIMD && TARGET_CRYPTO"
4884 "sha1su1\\t%0.4s, %2.4s"
4885 [(set_attr "type" "crypto_sha1_fast")]
4888 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
4889 [(set (match_operand:V4SI 0 "register_operand" "=w")
4890 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4891 (match_operand:SI 2 "register_operand" "w")
4892 (match_operand:V4SI 3 "register_operand" "w")]
4894 "TARGET_SIMD && TARGET_CRYPTO"
4895 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
4896 [(set_attr "type" "crypto_sha1_slow")]
4899 (define_insn "aarch64_crypto_sha1su0v4si"
4900 [(set (match_operand:V4SI 0 "register_operand" "=w")
4901 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4902 (match_operand:V4SI 2 "register_operand" "w")
4903 (match_operand:V4SI 3 "register_operand" "w")]
4905 "TARGET_SIMD && TARGET_CRYPTO"
4906 "sha1su0\\t%0.4s, %2.4s, %3.4s"
4907 [(set_attr "type" "crypto_sha1_xor")]
4912 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
4913 [(set (match_operand:V4SI 0 "register_operand" "=w")
4914 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4915 (match_operand:V4SI 2 "register_operand" "w")
4916 (match_operand:V4SI 3 "register_operand" "w")]
4918 "TARGET_SIMD && TARGET_CRYPTO"
4919 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
4920 [(set_attr "type" "crypto_sha256_slow")]
4923 (define_insn "aarch64_crypto_sha256su0v4si"
4924 [(set (match_operand:V4SI 0 "register_operand" "=w")
4925 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4926 (match_operand:V4SI 2 "register_operand" "w")]
4928 "TARGET_SIMD &&TARGET_CRYPTO"
4929 "sha256su0\\t%0.4s, %2.4s"
4930 [(set_attr "type" "crypto_sha256_fast")]
4933 (define_insn "aarch64_crypto_sha256su1v4si"
4934 [(set (match_operand:V4SI 0 "register_operand" "=w")
4935 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4936 (match_operand:V4SI 2 "register_operand" "w")
4937 (match_operand:V4SI 3 "register_operand" "w")]
4939 "TARGET_SIMD &&TARGET_CRYPTO"
4940 "sha256su1\\t%0.4s, %2.4s, %3.4s"
4941 [(set_attr "type" "crypto_sha256_slow")]
4946 (define_insn "aarch64_crypto_pmulldi"
4947 [(set (match_operand:TI 0 "register_operand" "=w")
4948 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
4949 (match_operand:DI 2 "register_operand" "w")]
4951 "TARGET_SIMD && TARGET_CRYPTO"
4952 "pmull\\t%0.1q, %1.1d, %2.1d"
4953 [(set_attr "type" "neon_mul_d_long")]
4956 (define_insn "aarch64_crypto_pmullv2di"
4957 [(set (match_operand:TI 0 "register_operand" "=w")
4958 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
4959 (match_operand:V2DI 2 "register_operand" "w")]
4961 "TARGET_SIMD && TARGET_CRYPTO"
4962 "pmull2\\t%0.1q, %1.2d, %2.2d"
4963 [(set_attr "type" "neon_mul_d_long")]