1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2016 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
76 [(set_attr "type" "neon_dup<q>")]
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
92 [(set_attr "type" "neon_dup<q>")]
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
104 switch (which_alternative)
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "ins\t%0.d[0], %1";
111 case 5: return "mov\t%0, %1";
113 return aarch64_output_simd_mov_immediate (operands[1],
115 default: gcc_unreachable ();
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
120 mov_reg, neon_move<q>")]
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
132 switch (which_alternative)
135 return "ldr\\t%q0, %1";
137 return "str\\t%q1, %0";
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
153 (set_attr "length" "4,4,4,8,8,8,4")]
156 (define_insn "load_pair<mode>"
157 [(set (match_operand:VD 0 "register_operand" "=w")
158 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
159 (set (match_operand:VD 2 "register_operand" "=w")
160 (match_operand:VD 3 "memory_operand" "m"))]
162 && rtx_equal_p (XEXP (operands[3], 0),
163 plus_constant (Pmode,
164 XEXP (operands[1], 0),
165 GET_MODE_SIZE (<MODE>mode)))"
167 [(set_attr "type" "neon_ldp")]
170 (define_insn "store_pair<mode>"
171 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
172 (match_operand:VD 1 "register_operand" "w"))
173 (set (match_operand:VD 2 "memory_operand" "=m")
174 (match_operand:VD 3 "register_operand" "w"))]
176 && rtx_equal_p (XEXP (operands[2], 0),
177 plus_constant (Pmode,
178 XEXP (operands[0], 0),
179 GET_MODE_SIZE (<MODE>mode)))"
181 [(set_attr "type" "neon_stp")]
185 [(set (match_operand:VQ 0 "register_operand" "")
186 (match_operand:VQ 1 "register_operand" ""))]
187 "TARGET_SIMD && reload_completed
188 && GP_REGNUM_P (REGNO (operands[0]))
189 && GP_REGNUM_P (REGNO (operands[1]))"
192 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
197 [(set (match_operand:VQ 0 "register_operand" "")
198 (match_operand:VQ 1 "register_operand" ""))]
199 "TARGET_SIMD && reload_completed
200 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
201 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
204 aarch64_split_simd_move (operands[0], operands[1]);
208 (define_expand "aarch64_split_simd_mov<mode>"
209 [(set (match_operand:VQ 0)
210 (match_operand:VQ 1))]
213 rtx dst = operands[0];
214 rtx src = operands[1];
216 if (GP_REGNUM_P (REGNO (src)))
218 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
219 rtx src_high_part = gen_highpart (<VHALF>mode, src);
222 (gen_move_lo_quad_<mode> (dst, src_low_part));
224 (gen_move_hi_quad_<mode> (dst, src_high_part));
229 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
230 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
231 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
232 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
235 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
237 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
243 (define_insn "aarch64_simd_mov_from_<mode>low"
244 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
246 (match_operand:VQ 1 "register_operand" "w")
247 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
248 "TARGET_SIMD && reload_completed"
250 [(set_attr "type" "neon_to_gp<q>")
251 (set_attr "length" "4")
254 (define_insn "aarch64_simd_mov_from_<mode>high"
255 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
257 (match_operand:VQ 1 "register_operand" "w")
258 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
259 "TARGET_SIMD && reload_completed"
261 [(set_attr "type" "neon_to_gp<q>")
262 (set_attr "length" "4")
265 (define_insn "orn<mode>3"
266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
267 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
268 (match_operand:VDQ_I 2 "register_operand" "w")))]
270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
271 [(set_attr "type" "neon_logic<q>")]
274 (define_insn "bic<mode>3"
275 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
276 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
277 (match_operand:VDQ_I 2 "register_operand" "w")))]
279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
280 [(set_attr "type" "neon_logic<q>")]
283 (define_insn "add<mode>3"
284 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
285 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
286 (match_operand:VDQ_I 2 "register_operand" "w")))]
288 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
289 [(set_attr "type" "neon_add<q>")]
292 (define_insn "sub<mode>3"
293 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
294 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
295 (match_operand:VDQ_I 2 "register_operand" "w")))]
297 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
298 [(set_attr "type" "neon_sub<q>")]
301 (define_insn "mul<mode>3"
302 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
303 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
304 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
306 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
307 [(set_attr "type" "neon_mul_<Vetype><q>")]
310 (define_insn "bswap<mode>2"
311 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
312 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
314 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
315 [(set_attr "type" "neon_rev<q>")]
318 (define_insn "aarch64_rbit<mode>"
319 [(set (match_operand:VB 0 "register_operand" "=w")
320 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
323 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
324 [(set_attr "type" "neon_rbit")]
327 (define_expand "ctz<mode>2"
328 [(set (match_operand:VS 0 "register_operand")
329 (ctz:VS (match_operand:VS 1 "register_operand")))]
332 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
333 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
335 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
336 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
341 (define_insn "*aarch64_mul3_elt<mode>"
342 [(set (match_operand:VMUL 0 "register_operand" "=w")
346 (match_operand:VMUL 1 "register_operand" "<h_con>")
347 (parallel [(match_operand:SI 2 "immediate_operand")])))
348 (match_operand:VMUL 3 "register_operand" "w")))]
351 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
352 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
354 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
357 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
358 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
359 (mult:VMUL_CHANGE_NLANES
360 (vec_duplicate:VMUL_CHANGE_NLANES
362 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
363 (parallel [(match_operand:SI 2 "immediate_operand")])))
364 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
367 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
368 INTVAL (operands[2])));
369 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
371 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
374 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
375 [(set (match_operand:VMUL 0 "register_operand" "=w")
378 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
379 (match_operand:VMUL 2 "register_operand" "w")))]
381 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
382 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
385 (define_insn "aarch64_rsqrte_<mode>2"
386 [(set (match_operand:VALLF 0 "register_operand" "=w")
387 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
390 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
391 [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
393 (define_insn "aarch64_rsqrts_<mode>3"
394 [(set (match_operand:VALLF 0 "register_operand" "=w")
395 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
396 (match_operand:VALLF 2 "register_operand" "w")]
399 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
400 [(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")])
402 (define_expand "rsqrt<mode>2"
403 [(set (match_operand:VALLF 0 "register_operand" "=w")
404 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
408 aarch64_emit_approx_rsqrt (operands[0], operands[1]);
412 (define_insn "*aarch64_mul3_elt_to_64v2df"
413 [(set (match_operand:DF 0 "register_operand" "=w")
416 (match_operand:V2DF 1 "register_operand" "w")
417 (parallel [(match_operand:SI 2 "immediate_operand")]))
418 (match_operand:DF 3 "register_operand" "w")))]
421 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
422 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
424 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
427 (define_insn "neg<mode>2"
428 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
429 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
431 "neg\t%0.<Vtype>, %1.<Vtype>"
432 [(set_attr "type" "neon_neg<q>")]
435 (define_insn "abs<mode>2"
436 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
437 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
439 "abs\t%0.<Vtype>, %1.<Vtype>"
440 [(set_attr "type" "neon_abs<q>")]
443 ;; The intrinsic version of integer ABS must not be allowed to
444 ;; combine with any operation with an integerated ABS step, such
446 (define_insn "aarch64_abs<mode>"
447 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
449 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
452 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
453 [(set_attr "type" "neon_abs<q>")]
456 (define_insn "abd<mode>_3"
457 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
458 (abs:VDQ_BHSI (minus:VDQ_BHSI
459 (match_operand:VDQ_BHSI 1 "register_operand" "w")
460 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
462 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
463 [(set_attr "type" "neon_abd<q>")]
466 (define_insn "aba<mode>_3"
467 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
468 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
469 (match_operand:VDQ_BHSI 1 "register_operand" "w")
470 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
471 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
473 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
474 [(set_attr "type" "neon_arith_acc<q>")]
477 (define_insn "fabd<mode>_3"
478 [(set (match_operand:VDQF 0 "register_operand" "=w")
479 (abs:VDQF (minus:VDQF
480 (match_operand:VDQF 1 "register_operand" "w")
481 (match_operand:VDQF 2 "register_operand" "w"))))]
483 "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
484 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
487 (define_insn "*fabd_scalar<mode>3"
488 [(set (match_operand:GPF 0 "register_operand" "=w")
490 (match_operand:GPF 1 "register_operand" "w")
491 (match_operand:GPF 2 "register_operand" "w"))))]
493 "fabd\t%<s>0, %<s>1, %<s>2"
494 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
497 (define_insn "and<mode>3"
498 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
499 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
500 (match_operand:VDQ_I 2 "register_operand" "w")))]
502 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
503 [(set_attr "type" "neon_logic<q>")]
506 (define_insn "ior<mode>3"
507 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
508 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
509 (match_operand:VDQ_I 2 "register_operand" "w")))]
511 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
512 [(set_attr "type" "neon_logic<q>")]
515 (define_insn "xor<mode>3"
516 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
517 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
518 (match_operand:VDQ_I 2 "register_operand" "w")))]
520 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
521 [(set_attr "type" "neon_logic<q>")]
524 (define_insn "one_cmpl<mode>2"
525 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
526 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
528 "not\t%0.<Vbtype>, %1.<Vbtype>"
529 [(set_attr "type" "neon_logic<q>")]
532 (define_insn "aarch64_simd_vec_set<mode>"
533 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
535 (vec_duplicate:VDQ_BHSI
536 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
537 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
538 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
541 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
542 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
543 switch (which_alternative)
546 return "ins\\t%0.<Vetype>[%p2], %w1";
548 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
550 return "ld1\\t{%0.<Vetype>}[%p2], %1";
555 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
558 (define_insn "aarch64_simd_lshr<mode>"
559 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
560 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
561 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
563 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
564 [(set_attr "type" "neon_shift_imm<q>")]
567 (define_insn "aarch64_simd_ashr<mode>"
568 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
569 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
570 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
572 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
573 [(set_attr "type" "neon_shift_imm<q>")]
576 (define_insn "aarch64_simd_imm_shl<mode>"
577 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
578 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
579 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
581 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
582 [(set_attr "type" "neon_shift_imm<q>")]
585 (define_insn "aarch64_simd_reg_sshl<mode>"
586 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
587 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
588 (match_operand:VDQ_I 2 "register_operand" "w")))]
590 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
591 [(set_attr "type" "neon_shift_reg<q>")]
594 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
595 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
596 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
597 (match_operand:VDQ_I 2 "register_operand" "w")]
598 UNSPEC_ASHIFT_UNSIGNED))]
600 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
601 [(set_attr "type" "neon_shift_reg<q>")]
604 (define_insn "aarch64_simd_reg_shl<mode>_signed"
605 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
607 (match_operand:VDQ_I 2 "register_operand" "w")]
608 UNSPEC_ASHIFT_SIGNED))]
610 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
611 [(set_attr "type" "neon_shift_reg<q>")]
614 (define_expand "ashl<mode>3"
615 [(match_operand:VDQ_I 0 "register_operand" "")
616 (match_operand:VDQ_I 1 "register_operand" "")
617 (match_operand:SI 2 "general_operand" "")]
620 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
623 if (CONST_INT_P (operands[2]))
625 shift_amount = INTVAL (operands[2]);
626 if (shift_amount >= 0 && shift_amount < bit_width)
628 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
630 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
637 operands[2] = force_reg (SImode, operands[2]);
640 else if (MEM_P (operands[2]))
642 operands[2] = force_reg (SImode, operands[2]);
645 if (REG_P (operands[2]))
647 rtx tmp = gen_reg_rtx (<MODE>mode);
648 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
649 convert_to_mode (<VEL>mode,
652 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
661 (define_expand "lshr<mode>3"
662 [(match_operand:VDQ_I 0 "register_operand" "")
663 (match_operand:VDQ_I 1 "register_operand" "")
664 (match_operand:SI 2 "general_operand" "")]
667 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
670 if (CONST_INT_P (operands[2]))
672 shift_amount = INTVAL (operands[2]);
673 if (shift_amount > 0 && shift_amount <= bit_width)
675 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
677 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
683 operands[2] = force_reg (SImode, operands[2]);
685 else if (MEM_P (operands[2]))
687 operands[2] = force_reg (SImode, operands[2]);
690 if (REG_P (operands[2]))
692 rtx tmp = gen_reg_rtx (SImode);
693 rtx tmp1 = gen_reg_rtx (<MODE>mode);
694 emit_insn (gen_negsi2 (tmp, operands[2]));
695 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
696 convert_to_mode (<VEL>mode,
698 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
708 (define_expand "ashr<mode>3"
709 [(match_operand:VDQ_I 0 "register_operand" "")
710 (match_operand:VDQ_I 1 "register_operand" "")
711 (match_operand:SI 2 "general_operand" "")]
714 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
717 if (CONST_INT_P (operands[2]))
719 shift_amount = INTVAL (operands[2]);
720 if (shift_amount > 0 && shift_amount <= bit_width)
722 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
724 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
730 operands[2] = force_reg (SImode, operands[2]);
732 else if (MEM_P (operands[2]))
734 operands[2] = force_reg (SImode, operands[2]);
737 if (REG_P (operands[2]))
739 rtx tmp = gen_reg_rtx (SImode);
740 rtx tmp1 = gen_reg_rtx (<MODE>mode);
741 emit_insn (gen_negsi2 (tmp, operands[2]));
742 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
743 convert_to_mode (<VEL>mode,
745 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
755 (define_expand "vashl<mode>3"
756 [(match_operand:VDQ_I 0 "register_operand" "")
757 (match_operand:VDQ_I 1 "register_operand" "")
758 (match_operand:VDQ_I 2 "register_operand" "")]
761 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
766 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
767 ;; Negating individual lanes most certainly offsets the
768 ;; gain from vectorization.
769 (define_expand "vashr<mode>3"
770 [(match_operand:VDQ_BHSI 0 "register_operand" "")
771 (match_operand:VDQ_BHSI 1 "register_operand" "")
772 (match_operand:VDQ_BHSI 2 "register_operand" "")]
775 rtx neg = gen_reg_rtx (<MODE>mode);
776 emit (gen_neg<mode>2 (neg, operands[2]));
777 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
783 (define_expand "aarch64_ashr_simddi"
784 [(match_operand:DI 0 "register_operand" "=w")
785 (match_operand:DI 1 "register_operand" "w")
786 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
789 /* An arithmetic shift right by 64 fills the result with copies of the sign
790 bit, just like asr by 63 - however the standard pattern does not handle
792 if (INTVAL (operands[2]) == 64)
793 operands[2] = GEN_INT (63);
794 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
799 (define_expand "vlshr<mode>3"
800 [(match_operand:VDQ_BHSI 0 "register_operand" "")
801 (match_operand:VDQ_BHSI 1 "register_operand" "")
802 (match_operand:VDQ_BHSI 2 "register_operand" "")]
805 rtx neg = gen_reg_rtx (<MODE>mode);
806 emit (gen_neg<mode>2 (neg, operands[2]));
807 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
812 (define_expand "aarch64_lshr_simddi"
813 [(match_operand:DI 0 "register_operand" "=w")
814 (match_operand:DI 1 "register_operand" "w")
815 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
818 if (INTVAL (operands[2]) == 64)
819 emit_move_insn (operands[0], const0_rtx);
821 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
826 (define_expand "vec_set<mode>"
827 [(match_operand:VDQ_BHSI 0 "register_operand")
828 (match_operand:<VEL> 1 "register_operand")
829 (match_operand:SI 2 "immediate_operand")]
832 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
833 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
834 GEN_INT (elem), operands[0]));
839 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
840 (define_insn "vec_shr_<mode>"
841 [(set (match_operand:VD 0 "register_operand" "=w")
842 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
843 (match_operand:SI 2 "immediate_operand" "i")]
847 if (BYTES_BIG_ENDIAN)
848 return "shl %d0, %d1, %2";
850 return "ushr %d0, %d1, %2";
852 [(set_attr "type" "neon_shift_imm")]
855 (define_insn "aarch64_simd_vec_setv2di"
856 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
859 (match_operand:DI 1 "register_operand" "r,w"))
860 (match_operand:V2DI 3 "register_operand" "0,0")
861 (match_operand:SI 2 "immediate_operand" "i,i")))]
864 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
865 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
866 switch (which_alternative)
869 return "ins\\t%0.d[%p2], %1";
871 return "ins\\t%0.d[%p2], %1.d[0]";
876 [(set_attr "type" "neon_from_gp, neon_ins_q")]
879 (define_expand "vec_setv2di"
880 [(match_operand:V2DI 0 "register_operand")
881 (match_operand:DI 1 "register_operand")
882 (match_operand:SI 2 "immediate_operand")]
885 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
886 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
887 GEN_INT (elem), operands[0]));
892 (define_insn "aarch64_simd_vec_set<mode>"
893 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
895 (vec_duplicate:VDQF_F16
896 (match_operand:<VEL> 1 "register_operand" "w"))
897 (match_operand:VDQF_F16 3 "register_operand" "0")
898 (match_operand:SI 2 "immediate_operand" "i")))]
901 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
903 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
904 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
906 [(set_attr "type" "neon_ins<q>")]
909 (define_expand "vec_set<mode>"
910 [(match_operand:VDQF_F16 0 "register_operand" "+w")
911 (match_operand:<VEL> 1 "register_operand" "w")
912 (match_operand:SI 2 "immediate_operand" "")]
915 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
916 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
917 GEN_INT (elem), operands[0]));
923 (define_insn "aarch64_mla<mode>"
924 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
925 (plus:VDQ_BHSI (mult:VDQ_BHSI
926 (match_operand:VDQ_BHSI 2 "register_operand" "w")
927 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
928 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
930 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
931 [(set_attr "type" "neon_mla_<Vetype><q>")]
934 (define_insn "*aarch64_mla_elt<mode>"
935 [(set (match_operand:VDQHS 0 "register_operand" "=w")
940 (match_operand:VDQHS 1 "register_operand" "<h_con>")
941 (parallel [(match_operand:SI 2 "immediate_operand")])))
942 (match_operand:VDQHS 3 "register_operand" "w"))
943 (match_operand:VDQHS 4 "register_operand" "0")))]
946 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
947 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
949 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
952 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
953 [(set (match_operand:VDQHS 0 "register_operand" "=w")
958 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
959 (parallel [(match_operand:SI 2 "immediate_operand")])))
960 (match_operand:VDQHS 3 "register_operand" "w"))
961 (match_operand:VDQHS 4 "register_operand" "0")))]
964 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
965 INTVAL (operands[2])));
966 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
968 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
971 (define_insn "aarch64_mls<mode>"
972 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
973 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
974 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
975 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
977 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
978 [(set_attr "type" "neon_mla_<Vetype><q>")]
981 (define_insn "*aarch64_mls_elt<mode>"
982 [(set (match_operand:VDQHS 0 "register_operand" "=w")
984 (match_operand:VDQHS 4 "register_operand" "0")
988 (match_operand:VDQHS 1 "register_operand" "<h_con>")
989 (parallel [(match_operand:SI 2 "immediate_operand")])))
990 (match_operand:VDQHS 3 "register_operand" "w"))))]
993 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
994 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
996 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
999 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1000 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1002 (match_operand:VDQHS 4 "register_operand" "0")
1004 (vec_duplicate:VDQHS
1006 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1007 (parallel [(match_operand:SI 2 "immediate_operand")])))
1008 (match_operand:VDQHS 3 "register_operand" "w"))))]
1011 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1012 INTVAL (operands[2])));
1013 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1015 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1018 ;; Max/Min operations.
1019 (define_insn "<su><maxmin><mode>3"
1020 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1021 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1022 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1024 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1025 [(set_attr "type" "neon_minmax<q>")]
1028 (define_expand "<su><maxmin>v2di3"
1029 [(set (match_operand:V2DI 0 "register_operand" "")
1030 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1031 (match_operand:V2DI 2 "register_operand" "")))]
1034 enum rtx_code cmp_operator;
1055 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1056 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
1057 operands[2], cmp_fmt, operands[1], operands[2]));
1061 ;; Pairwise Integer Max/Min operations.
1062 (define_insn "aarch64_<maxmin_uns>p<mode>"
1063 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1064 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1065 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1068 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1069 [(set_attr "type" "neon_minmax<q>")]
1072 ;; Pairwise FP Max/Min operations.
1073 (define_insn "aarch64_<maxmin_uns>p<mode>"
1074 [(set (match_operand:VDQF 0 "register_operand" "=w")
1075 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1076 (match_operand:VDQF 2 "register_operand" "w")]
1079 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1080 [(set_attr "type" "neon_minmax<q>")]
1083 ;; vec_concat gives a new vector with the low elements from operand 1, and
1084 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1085 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1086 ;; What that means, is that the RTL descriptions of the below patterns
1087 ;; need to change depending on endianness.
1089 ;; Move to the low architectural bits of the register.
1090 ;; On little-endian this is { operand, zeroes }
1091 ;; On big-endian this is { zeroes, operand }
1093 (define_insn "move_lo_quad_internal_<mode>"
1094 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1096 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1097 (vec_duplicate:<VHALF> (const_int 0))))]
1098 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1103 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1104 (set_attr "simd" "yes,*,yes")
1105 (set_attr "fp" "*,yes,*")
1106 (set_attr "length" "4")]
1109 (define_insn "move_lo_quad_internal_<mode>"
1110 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1112 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1114 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1119 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1120 (set_attr "simd" "yes,*,yes")
1121 (set_attr "fp" "*,yes,*")
1122 (set_attr "length" "4")]
1125 (define_insn "move_lo_quad_internal_be_<mode>"
1126 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1128 (vec_duplicate:<VHALF> (const_int 0))
1129 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1130 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1135 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1136 (set_attr "simd" "yes,*,yes")
1137 (set_attr "fp" "*,yes,*")
1138 (set_attr "length" "4")]
1141 (define_insn "move_lo_quad_internal_be_<mode>"
1142 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1145 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1146 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1151 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1152 (set_attr "simd" "yes,*,yes")
1153 (set_attr "fp" "*,yes,*")
1154 (set_attr "length" "4")]
1157 (define_expand "move_lo_quad_<mode>"
1158 [(match_operand:VQ 0 "register_operand")
1159 (match_operand:VQ 1 "register_operand")]
1162 if (BYTES_BIG_ENDIAN)
1163 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1165 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1170 ;; Move operand1 to the high architectural bits of the register, keeping
1171 ;; the low architectural bits of operand2.
1172 ;; For little-endian this is { operand2, operand1 }
1173 ;; For big-endian this is { operand1, operand2 }
1175 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1176 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1180 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1181 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1182 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1184 ins\\t%0.d[1], %1.d[0]
1186 [(set_attr "type" "neon_ins")]
1189 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1190 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1192 (match_operand:<VHALF> 1 "register_operand" "w,r")
1195 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1196 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1198 ins\\t%0.d[1], %1.d[0]
1200 [(set_attr "type" "neon_ins")]
1203 (define_expand "move_hi_quad_<mode>"
1204 [(match_operand:VQ 0 "register_operand" "")
1205 (match_operand:<VHALF> 1 "register_operand" "")]
1208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1209 if (BYTES_BIG_ENDIAN)
1210 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1213 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1218 ;; Narrowing operations.
1221 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1222 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1223 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1225 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1226 [(set_attr "type" "neon_shift_imm_narrow_q")]
1229 (define_expand "vec_pack_trunc_<mode>"
1230 [(match_operand:<VNARROWD> 0 "register_operand" "")
1231 (match_operand:VDN 1 "register_operand" "")
1232 (match_operand:VDN 2 "register_operand" "")]
1235 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1236 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1237 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1239 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1240 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1241 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1247 (define_insn "vec_pack_trunc_<mode>"
1248 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1249 (vec_concat:<VNARROWQ2>
1250 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1251 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1254 if (BYTES_BIG_ENDIAN)
1255 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1257 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1259 [(set_attr "type" "multiple")
1260 (set_attr "length" "8")]
1263 ;; Widening operations.
1265 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1266 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1267 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1268 (match_operand:VQW 1 "register_operand" "w")
1269 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1272 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1273 [(set_attr "type" "neon_shift_imm_long")]
1276 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1277 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1278 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1279 (match_operand:VQW 1 "register_operand" "w")
1280 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1283 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1284 [(set_attr "type" "neon_shift_imm_long")]
1287 (define_expand "vec_unpack<su>_hi_<mode>"
1288 [(match_operand:<VWIDE> 0 "register_operand" "")
1289 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1292 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1293 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1299 (define_expand "vec_unpack<su>_lo_<mode>"
1300 [(match_operand:<VWIDE> 0 "register_operand" "")
1301 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1304 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1305 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1311 ;; Widening arithmetic.
1313 (define_insn "*aarch64_<su>mlal_lo<mode>"
1314 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1317 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1318 (match_operand:VQW 2 "register_operand" "w")
1319 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1320 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1321 (match_operand:VQW 4 "register_operand" "w")
1323 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1325 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1326 [(set_attr "type" "neon_mla_<Vetype>_long")]
1329 (define_insn "*aarch64_<su>mlal_hi<mode>"
1330 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1333 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1334 (match_operand:VQW 2 "register_operand" "w")
1335 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1336 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1337 (match_operand:VQW 4 "register_operand" "w")
1339 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1341 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1342 [(set_attr "type" "neon_mla_<Vetype>_long")]
1345 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1346 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1348 (match_operand:<VWIDE> 1 "register_operand" "0")
1350 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1351 (match_operand:VQW 2 "register_operand" "w")
1352 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1353 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1354 (match_operand:VQW 4 "register_operand" "w")
1357 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1358 [(set_attr "type" "neon_mla_<Vetype>_long")]
1361 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1362 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1364 (match_operand:<VWIDE> 1 "register_operand" "0")
1366 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1367 (match_operand:VQW 2 "register_operand" "w")
1368 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1369 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1370 (match_operand:VQW 4 "register_operand" "w")
1373 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1374 [(set_attr "type" "neon_mla_<Vetype>_long")]
1377 (define_insn "*aarch64_<su>mlal<mode>"
1378 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1382 (match_operand:VD_BHSI 1 "register_operand" "w"))
1384 (match_operand:VD_BHSI 2 "register_operand" "w")))
1385 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1387 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1388 [(set_attr "type" "neon_mla_<Vetype>_long")]
1391 (define_insn "*aarch64_<su>mlsl<mode>"
1392 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1394 (match_operand:<VWIDE> 1 "register_operand" "0")
1397 (match_operand:VD_BHSI 2 "register_operand" "w"))
1399 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1401 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1402 [(set_attr "type" "neon_mla_<Vetype>_long")]
1405 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1406 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1407 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1408 (match_operand:VQW 1 "register_operand" "w")
1409 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1410 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1411 (match_operand:VQW 2 "register_operand" "w")
1414 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1415 [(set_attr "type" "neon_mul_<Vetype>_long")]
1418 (define_expand "vec_widen_<su>mult_lo_<mode>"
1419 [(match_operand:<VWIDE> 0 "register_operand" "")
1420 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1421 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1424 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1425 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1432 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1433 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1434 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1435 (match_operand:VQW 1 "register_operand" "w")
1436 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1437 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1438 (match_operand:VQW 2 "register_operand" "w")
1441 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1442 [(set_attr "type" "neon_mul_<Vetype>_long")]
1445 (define_expand "vec_widen_<su>mult_hi_<mode>"
1446 [(match_operand:<VWIDE> 0 "register_operand" "")
1447 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1448 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1451 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1452 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1460 ;; FP vector operations.
1461 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1462 ;; double-precision (64-bit) floating-point data types and arithmetic as
1463 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1464 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1466 ;; Floating-point operations can raise an exception. Vectorizing such
1467 ;; operations are safe because of reasons explained below.
1469 ;; ARMv8 permits an extension to enable trapped floating-point
1470 ;; exception handling, however this is an optional feature. In the
1471 ;; event of a floating-point exception being raised by vectorised
1473 ;; 1. If trapped floating-point exceptions are available, then a trap
1474 ;; will be taken when any lane raises an enabled exception. A trap
1475 ;; handler may determine which lane raised the exception.
1476 ;; 2. Alternatively a sticky exception flag is set in the
1477 ;; floating-point status register (FPSR). Software may explicitly
1478 ;; test the exception flags, in which case the tests will either
1479 ;; prevent vectorisation, allowing precise identification of the
1480 ;; failing operation, or if tested outside of vectorisable regions
1481 ;; then the specific operation and lane are not of interest.
1483 ;; FP arithmetic operations.
1485 (define_insn "add<mode>3"
1486 [(set (match_operand:VDQF 0 "register_operand" "=w")
1487 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1488 (match_operand:VDQF 2 "register_operand" "w")))]
1490 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1491 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1494 (define_insn "sub<mode>3"
1495 [(set (match_operand:VDQF 0 "register_operand" "=w")
1496 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1497 (match_operand:VDQF 2 "register_operand" "w")))]
1499 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1500 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1503 (define_insn "mul<mode>3"
1504 [(set (match_operand:VDQF 0 "register_operand" "=w")
1505 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1506 (match_operand:VDQF 2 "register_operand" "w")))]
1508 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1509 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1512 (define_insn "div<mode>3"
1513 [(set (match_operand:VDQF 0 "register_operand" "=w")
1514 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1515 (match_operand:VDQF 2 "register_operand" "w")))]
1517 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1518 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1521 (define_insn "neg<mode>2"
1522 [(set (match_operand:VDQF 0 "register_operand" "=w")
1523 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1525 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1526 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1529 (define_insn "abs<mode>2"
1530 [(set (match_operand:VDQF 0 "register_operand" "=w")
1531 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1533 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1534 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1537 (define_insn "fma<mode>4"
1538 [(set (match_operand:VDQF 0 "register_operand" "=w")
1539 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1540 (match_operand:VDQF 2 "register_operand" "w")
1541 (match_operand:VDQF 3 "register_operand" "0")))]
1543 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1544 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1547 (define_insn "*aarch64_fma4_elt<mode>"
1548 [(set (match_operand:VDQF 0 "register_operand" "=w")
1552 (match_operand:VDQF 1 "register_operand" "<h_con>")
1553 (parallel [(match_operand:SI 2 "immediate_operand")])))
1554 (match_operand:VDQF 3 "register_operand" "w")
1555 (match_operand:VDQF 4 "register_operand" "0")))]
1558 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1559 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1561 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1564 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1565 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1567 (vec_duplicate:VDQSF
1569 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1570 (parallel [(match_operand:SI 2 "immediate_operand")])))
1571 (match_operand:VDQSF 3 "register_operand" "w")
1572 (match_operand:VDQSF 4 "register_operand" "0")))]
1575 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1576 INTVAL (operands[2])));
1577 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1579 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1582 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1583 [(set (match_operand:VMUL 0 "register_operand" "=w")
1586 (match_operand:<VEL> 1 "register_operand" "w"))
1587 (match_operand:VMUL 2 "register_operand" "w")
1588 (match_operand:VMUL 3 "register_operand" "0")))]
1590 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1591 [(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")]
1594 (define_insn "*aarch64_fma4_elt_to_64v2df"
1595 [(set (match_operand:DF 0 "register_operand" "=w")
1598 (match_operand:V2DF 1 "register_operand" "w")
1599 (parallel [(match_operand:SI 2 "immediate_operand")]))
1600 (match_operand:DF 3 "register_operand" "w")
1601 (match_operand:DF 4 "register_operand" "0")))]
1604 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1605 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1607 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1610 (define_insn "fnma<mode>4"
1611 [(set (match_operand:VDQF 0 "register_operand" "=w")
1613 (match_operand:VDQF 1 "register_operand" "w")
1615 (match_operand:VDQF 2 "register_operand" "w"))
1616 (match_operand:VDQF 3 "register_operand" "0")))]
1618 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1619 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1622 (define_insn "*aarch64_fnma4_elt<mode>"
1623 [(set (match_operand:VDQF 0 "register_operand" "=w")
1626 (match_operand:VDQF 3 "register_operand" "w"))
1629 (match_operand:VDQF 1 "register_operand" "<h_con>")
1630 (parallel [(match_operand:SI 2 "immediate_operand")])))
1631 (match_operand:VDQF 4 "register_operand" "0")))]
1634 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1635 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1637 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1640 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1641 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1644 (match_operand:VDQSF 3 "register_operand" "w"))
1645 (vec_duplicate:VDQSF
1647 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1648 (parallel [(match_operand:SI 2 "immediate_operand")])))
1649 (match_operand:VDQSF 4 "register_operand" "0")))]
1652 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1653 INTVAL (operands[2])));
1654 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1656 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1659 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1660 [(set (match_operand:VMUL 0 "register_operand" "=w")
1663 (match_operand:VMUL 2 "register_operand" "w"))
1665 (match_operand:<VEL> 1 "register_operand" "w"))
1666 (match_operand:VMUL 3 "register_operand" "0")))]
1668 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1669 [(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")]
1672 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1673 [(set (match_operand:DF 0 "register_operand" "=w")
1676 (match_operand:V2DF 1 "register_operand" "w")
1677 (parallel [(match_operand:SI 2 "immediate_operand")]))
1679 (match_operand:DF 3 "register_operand" "w"))
1680 (match_operand:DF 4 "register_operand" "0")))]
1683 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1684 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1686 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1689 ;; Vector versions of the floating-point frint patterns.
1690 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1691 (define_insn "<frint_pattern><mode>2"
1692 [(set (match_operand:VDQF 0 "register_operand" "=w")
1693 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1696 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1697 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1700 ;; Vector versions of the fcvt standard patterns.
1701 ;; Expands to lbtrunc, lround, lceil, lfloor
1702 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1703 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1704 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1705 [(match_operand:VDQF 1 "register_operand" "w")]
1708 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1709 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1712 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1713 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1714 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1716 (match_operand:VDQF 1 "register_operand" "w")
1717 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1720 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1721 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1723 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1725 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1726 output_asm_insn (buf, operands);
1729 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1732 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1733 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1734 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1735 [(match_operand:VDQF 1 "register_operand")]
1740 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1741 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1742 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1743 [(match_operand:VDQF 1 "register_operand")]
1748 (define_expand "ftrunc<VDQF:mode>2"
1749 [(set (match_operand:VDQF 0 "register_operand")
1750 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1755 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1756 [(set (match_operand:VDQF 0 "register_operand" "=w")
1758 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1760 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1761 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1764 ;; Conversions between vectors of floats and doubles.
1765 ;; Contains a mix of patterns to match standard pattern names
1766 ;; and those for intrinsics.
1768 ;; Float widening operations.
1770 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1771 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1772 (float_extend:<VWIDE> (vec_select:<VHALF>
1773 (match_operand:VQ_HSF 1 "register_operand" "w")
1774 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1777 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1778 [(set_attr "type" "neon_fp_cvt_widen_s")]
1781 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1782 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1783 ;; the meaning of HI and LO changes depending on the target endianness.
1784 ;; While elsewhere we map the higher numbered elements of a vector to
1785 ;; the lower architectural lanes of the vector, for these patterns we want
1786 ;; to always treat "hi" as referring to the higher architectural lanes.
1787 ;; Consequently, while the patterns below look inconsistent with our
1788 ;; other big-endian patterns their behavior is as required.
1790 (define_expand "vec_unpacks_lo_<mode>"
1791 [(match_operand:<VWIDE> 0 "register_operand" "")
1792 (match_operand:VQ_HSF 1 "register_operand" "")]
1795 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1796 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1802 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1803 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1804 (float_extend:<VWIDE> (vec_select:<VHALF>
1805 (match_operand:VQ_HSF 1 "register_operand" "w")
1806 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1809 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1810 [(set_attr "type" "neon_fp_cvt_widen_s")]
1813 (define_expand "vec_unpacks_hi_<mode>"
1814 [(match_operand:<VWIDE> 0 "register_operand" "")
1815 (match_operand:VQ_HSF 1 "register_operand" "")]
1818 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1819 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1824 (define_insn "aarch64_float_extend_lo_<Vwide>"
1825 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1826 (float_extend:<VWIDE>
1827 (match_operand:VDF 1 "register_operand" "w")))]
1829 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1830 [(set_attr "type" "neon_fp_cvt_widen_s")]
1833 ;; Float narrowing operations.
1835 (define_insn "aarch64_float_truncate_lo_<mode>"
1836 [(set (match_operand:VDF 0 "register_operand" "=w")
1838 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1840 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1841 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1844 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1845 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1847 (match_operand:VDF 1 "register_operand" "0")
1849 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1850 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1851 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1852 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1855 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1856 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1859 (match_operand:<VWIDE> 2 "register_operand" "w"))
1860 (match_operand:VDF 1 "register_operand" "0")))]
1861 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1862 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1863 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1866 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1867 [(match_operand:<VDBL> 0 "register_operand" "=w")
1868 (match_operand:VDF 1 "register_operand" "0")
1869 (match_operand:<VWIDE> 2 "register_operand" "w")]
1872 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1873 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1874 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1875 emit_insn (gen (operands[0], operands[1], operands[2]));
1880 (define_expand "vec_pack_trunc_v2df"
1881 [(set (match_operand:V4SF 0 "register_operand")
1883 (float_truncate:V2SF
1884 (match_operand:V2DF 1 "register_operand"))
1885 (float_truncate:V2SF
1886 (match_operand:V2DF 2 "register_operand"))
1890 rtx tmp = gen_reg_rtx (V2SFmode);
1891 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1892 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1894 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1895 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1896 tmp, operands[hi]));
1901 (define_expand "vec_pack_trunc_df"
1902 [(set (match_operand:V2SF 0 "register_operand")
1905 (match_operand:DF 1 "register_operand"))
1907 (match_operand:DF 2 "register_operand"))
1911 rtx tmp = gen_reg_rtx (V2SFmode);
1912 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1913 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1915 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1916 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1917 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1923 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1925 ;; a = (b < c) ? b : c;
1926 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1927 ;; either explicitly or indirectly via -ffast-math.
1929 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1930 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1931 ;; operand will be returned when both operands are zero (i.e. they may not
1932 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
1933 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
1936 (define_insn "<su><maxmin><mode>3"
1937 [(set (match_operand:VDQF 0 "register_operand" "=w")
1938 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
1939 (match_operand:VDQF 2 "register_operand" "w")))]
1941 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1942 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1945 (define_insn "<maxmin_uns><mode>3"
1946 [(set (match_operand:VDQF 0 "register_operand" "=w")
1947 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1948 (match_operand:VDQF 2 "register_operand" "w")]
1951 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1952 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1955 ;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
1956 (define_insn "<fmaxmin><mode>3"
1957 [(set (match_operand:VDQF 0 "register_operand" "=w")
1958 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1959 (match_operand:VDQF 2 "register_operand" "w")]
1962 "<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1963 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1966 ;; 'across lanes' add.
1968 (define_expand "reduc_plus_scal_<mode>"
1969 [(match_operand:<VEL> 0 "register_operand" "=w")
1970 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
1974 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1975 rtx scratch = gen_reg_rtx (<MODE>mode);
1976 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1977 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1982 (define_insn "aarch64_reduc_plus_internal<mode>"
1983 [(set (match_operand:VDQV 0 "register_operand" "=w")
1984 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
1987 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
1988 [(set_attr "type" "neon_reduc_add<q>")]
1991 (define_insn "aarch64_reduc_plus_internalv2si"
1992 [(set (match_operand:V2SI 0 "register_operand" "=w")
1993 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1996 "addp\\t%0.2s, %1.2s, %1.2s"
1997 [(set_attr "type" "neon_reduc_add")]
2000 (define_insn "reduc_plus_scal_<mode>"
2001 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2002 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2005 "faddp\\t%<Vetype>0, %1.<Vtype>"
2006 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2009 (define_insn "aarch64_addpv4sf"
2010 [(set (match_operand:V4SF 0 "register_operand" "=w")
2011 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
2014 "faddp\\t%0.4s, %1.4s, %1.4s"
2015 [(set_attr "type" "neon_fp_reduc_add_s_q")]
2018 (define_expand "reduc_plus_scal_v4sf"
2019 [(set (match_operand:SF 0 "register_operand")
2020 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2024 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2025 rtx scratch = gen_reg_rtx (V4SFmode);
2026 emit_insn (gen_aarch64_addpv4sf (scratch, operands[1]));
2027 emit_insn (gen_aarch64_addpv4sf (scratch, scratch));
2028 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2032 (define_insn "clrsb<mode>2"
2033 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2034 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2036 "cls\\t%0.<Vtype>, %1.<Vtype>"
2037 [(set_attr "type" "neon_cls<q>")]
2040 (define_insn "clz<mode>2"
2041 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2042 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2044 "clz\\t%0.<Vtype>, %1.<Vtype>"
2045 [(set_attr "type" "neon_cls<q>")]
2048 (define_insn "popcount<mode>2"
2049 [(set (match_operand:VB 0 "register_operand" "=w")
2050 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2052 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2053 [(set_attr "type" "neon_cnt<q>")]
2056 ;; 'across lanes' max and min ops.
2058 ;; Template for outputting a scalar, so we can create __builtins which can be
2059 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2060 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2061 [(match_operand:<VEL> 0 "register_operand")
2062 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
2066 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2067 rtx scratch = gen_reg_rtx (<MODE>mode);
2068 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2070 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2075 ;; Likewise for integer cases, signed and unsigned.
2076 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2077 [(match_operand:<VEL> 0 "register_operand")
2078 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2082 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2083 rtx scratch = gen_reg_rtx (<MODE>mode);
2084 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2086 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2091 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2092 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2093 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2096 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2097 [(set_attr "type" "neon_reduc_minmax<q>")]
2100 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2101 [(set (match_operand:V2SI 0 "register_operand" "=w")
2102 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2105 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2106 [(set_attr "type" "neon_reduc_minmax")]
2109 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2110 [(set (match_operand:VDQF 0 "register_operand" "=w")
2111 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
2114 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2115 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
2118 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2120 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2123 ;; Thus our BSL is of the form:
2124 ;; op0 = bsl (mask, op2, op3)
2125 ;; We can use any of:
2128 ;; bsl mask, op1, op2
2129 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2130 ;; bit op0, op2, mask
2131 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2132 ;; bif op0, op1, mask
2134 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2135 ;; Some forms of straight-line code may generate the equivalent form
2136 ;; in *aarch64_simd_bsl<mode>_alt.
2138 (define_insn "aarch64_simd_bsl<mode>_internal"
2139 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2143 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2144 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2145 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2146 (match_dup:<V_cmp_result> 3)
2150 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2151 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2152 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2153 [(set_attr "type" "neon_bsl<q>")]
2156 ;; We need this form in addition to the above pattern to match the case
2157 ;; when combine tries merging three insns such that the second operand of
2158 ;; the outer XOR matches the second operand of the inner XOR rather than
2159 ;; the first. The two are equivalent but since recog doesn't try all
2160 ;; permutations of commutative operations, we have to have a separate pattern.
2162 (define_insn "*aarch64_simd_bsl<mode>_alt"
2163 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2167 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2168 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2169 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2170 (match_dup:VSDQ_I_DI 2)))]
2173 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2174 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2175 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2176 [(set_attr "type" "neon_bsl<q>")]
2179 (define_expand "aarch64_simd_bsl<mode>"
2180 [(match_operand:VALLDIF 0 "register_operand")
2181 (match_operand:<V_cmp_result> 1 "register_operand")
2182 (match_operand:VALLDIF 2 "register_operand")
2183 (match_operand:VALLDIF 3 "register_operand")]
2186 /* We can't alias operands together if they have different modes. */
2187 rtx tmp = operands[0];
2188 if (FLOAT_MODE_P (<MODE>mode))
2190 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2191 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2192 tmp = gen_reg_rtx (<V_cmp_result>mode);
2194 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2195 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2199 if (tmp != operands[0])
2200 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2205 (define_expand "aarch64_vcond_internal<mode><mode>"
2206 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2207 (if_then_else:VSDQ_I_DI
2208 (match_operator 3 "comparison_operator"
2209 [(match_operand:VSDQ_I_DI 4 "register_operand")
2210 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2211 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2212 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2215 rtx op1 = operands[1];
2216 rtx op2 = operands[2];
2217 rtx mask = gen_reg_rtx (<MODE>mode);
2218 enum rtx_code code = GET_CODE (operands[3]);
2220 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2221 and desirable for other comparisons if it results in FOO ? -1 : 0
2222 (this allows direct use of the comparison result without a bsl). */
2225 && op1 == CONST0_RTX (<V_cmp_result>mode)
2226 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2232 case LE: code = GT; break;
2233 case LT: code = GE; break;
2234 case GE: code = LT; break;
2235 case GT: code = LE; break;
2237 case NE: code = EQ; break;
2238 case LTU: code = GEU; break;
2239 case LEU: code = GTU; break;
2240 case GTU: code = LEU; break;
2241 case GEU: code = LTU; break;
2242 default: gcc_unreachable ();
2246 /* Make sure we can handle the last operand. */
2250 /* Normalized to EQ above. */
2258 /* These instructions have a form taking an immediate zero. */
2259 if (operands[5] == CONST0_RTX (<MODE>mode))
2261 /* Fall through, as may need to load into register. */
2263 if (!REG_P (operands[5]))
2264 operands[5] = force_reg (<MODE>mode, operands[5]);
2271 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2275 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2279 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2283 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2287 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2291 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2295 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2299 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2302 /* NE has been normalized to EQ above. */
2304 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2311 /* If we have (a = (b CMP c) ? -1 : 0);
2312 Then we can simply move the generated mask. */
2314 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2315 && op2 == CONST0_RTX (<V_cmp_result>mode))
2316 emit_move_insn (operands[0], mask);
2320 op1 = force_reg (<MODE>mode, op1);
2322 op2 = force_reg (<MODE>mode, op2);
2323 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2330 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2331 [(set (match_operand:VDQF_COND 0 "register_operand")
2333 (match_operator 3 "comparison_operator"
2334 [(match_operand:VDQF 4 "register_operand")
2335 (match_operand:VDQF 5 "nonmemory_operand")])
2336 (match_operand:VDQF_COND 1 "nonmemory_operand")
2337 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2341 int use_zero_form = 0;
2342 int swap_bsl_operands = 0;
2343 rtx op1 = operands[1];
2344 rtx op2 = operands[2];
2345 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2346 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2348 rtx (*base_comparison) (rtx, rtx, rtx);
2349 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2351 switch (GET_CODE (operands[3]))
2358 if (operands[5] == CONST0_RTX (<MODE>mode))
2365 if (!REG_P (operands[5]))
2366 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2369 switch (GET_CODE (operands[3]))
2379 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2380 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2388 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2389 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2394 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2395 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2401 switch (GET_CODE (operands[3]))
2408 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2409 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2415 Note that there also exist direct comparison against 0 forms,
2416 so catch those as a special case. */
2420 switch (GET_CODE (operands[3]))
2423 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2426 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2429 /* Do nothing, other zero form cases already have the correct
2436 emit_insn (base_comparison (mask, operands[4], operands[5]));
2438 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2445 /* FCM returns false for lanes which are unordered, so if we use
2446 the inverse of the comparison we actually want to emit, then
2447 swap the operands to BSL, we will end up with the correct result.
2448 Note that a NE NaN and NaN NE b are true for all a, b.
2450 Our transformations are:
2455 a NE b -> !(a EQ b) */
2458 emit_insn (base_comparison (mask, operands[4], operands[5]));
2460 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2462 swap_bsl_operands = 1;
2465 /* We check (a > b || b > a). combining these comparisons give us
2466 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2467 will then give us (a == b || a UNORDERED b) as intended. */
2469 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2470 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2471 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2472 swap_bsl_operands = 1;
2475 /* Operands are ORDERED iff (a > b || b >= a).
2476 Swapping the operands to BSL will give the UNORDERED case. */
2477 swap_bsl_operands = 1;
2480 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2481 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2482 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2488 if (swap_bsl_operands)
2494 /* If we have (a = (b CMP c) ? -1 : 0);
2495 Then we can simply move the generated mask. */
2497 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2498 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2499 emit_move_insn (operands[0], mask);
2503 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2505 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2506 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2513 (define_expand "vcond<mode><mode>"
2514 [(set (match_operand:VALLDI 0 "register_operand")
2515 (if_then_else:VALLDI
2516 (match_operator 3 "comparison_operator"
2517 [(match_operand:VALLDI 4 "register_operand")
2518 (match_operand:VALLDI 5 "nonmemory_operand")])
2519 (match_operand:VALLDI 1 "nonmemory_operand")
2520 (match_operand:VALLDI 2 "nonmemory_operand")))]
2523 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2524 operands[2], operands[3],
2525 operands[4], operands[5]));
2529 (define_expand "vcond<v_cmp_result><mode>"
2530 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2531 (if_then_else:<V_cmp_result>
2532 (match_operator 3 "comparison_operator"
2533 [(match_operand:VDQF 4 "register_operand")
2534 (match_operand:VDQF 5 "nonmemory_operand")])
2535 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2536 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2539 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2540 operands[0], operands[1],
2541 operands[2], operands[3],
2542 operands[4], operands[5]));
2546 (define_expand "vcondu<mode><mode>"
2547 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2548 (if_then_else:VSDQ_I_DI
2549 (match_operator 3 "comparison_operator"
2550 [(match_operand:VSDQ_I_DI 4 "register_operand")
2551 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2552 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2553 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2556 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2557 operands[2], operands[3],
2558 operands[4], operands[5]));
2562 ;; Patterns for AArch64 SIMD Intrinsics.
2564 ;; Lane extraction with sign extension to general purpose register.
2565 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2566 [(set (match_operand:GPI 0 "register_operand" "=r")
2569 (match_operand:VDQQH 1 "register_operand" "w")
2570 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2573 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2574 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2576 [(set_attr "type" "neon_to_gp<q>")]
2579 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2580 [(set (match_operand:SI 0 "register_operand" "=r")
2583 (match_operand:VDQQH 1 "register_operand" "w")
2584 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2587 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2588 return "umov\\t%w0, %1.<Vetype>[%2]";
2590 [(set_attr "type" "neon_to_gp<q>")]
2593 ;; Lane extraction of a value, neither sign nor zero extension
2594 ;; is guaranteed so upper bits should be considered undefined.
2595 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2596 (define_insn "aarch64_get_lane<mode>"
2597 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2599 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2600 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2603 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2604 switch (which_alternative)
2607 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2609 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2611 return "st1\\t{%1.<Vetype>}[%2], %0";
2616 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2619 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2622 (define_insn "*aarch64_combinez<mode>"
2623 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2625 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2626 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2627 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2632 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2633 (set_attr "simd" "yes,*,yes")
2634 (set_attr "fp" "*,yes,*")]
2637 (define_insn "*aarch64_combinez_be<mode>"
2638 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2640 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2641 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2642 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2647 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2648 (set_attr "simd" "yes,*,yes")
2649 (set_attr "fp" "*,yes,*")]
2652 (define_expand "aarch64_combine<mode>"
2653 [(match_operand:<VDBL> 0 "register_operand")
2654 (match_operand:VDC 1 "register_operand")
2655 (match_operand:VDC 2 "register_operand")]
2659 if (BYTES_BIG_ENDIAN)
2669 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2674 (define_insn_and_split "aarch64_combine_internal<mode>"
2675 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2676 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2677 (match_operand:VDC 2 "register_operand" "w")))]
2680 "&& reload_completed"
2683 if (BYTES_BIG_ENDIAN)
2684 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2686 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2689 [(set_attr "type" "multiple")]
2692 (define_expand "aarch64_simd_combine<mode>"
2693 [(match_operand:<VDBL> 0 "register_operand")
2694 (match_operand:VDC 1 "register_operand")
2695 (match_operand:VDC 2 "register_operand")]
2698 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2699 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2702 [(set_attr "type" "multiple")]
2705 ;; <su><addsub>l<q>.
2707 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2708 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2709 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2710 (match_operand:VQW 1 "register_operand" "w")
2711 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2712 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2713 (match_operand:VQW 2 "register_operand" "w")
2716 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2717 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2720 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2721 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2722 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2723 (match_operand:VQW 1 "register_operand" "w")
2724 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2725 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2726 (match_operand:VQW 2 "register_operand" "w")
2729 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2730 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2734 (define_expand "aarch64_saddl2<mode>"
2735 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2736 (match_operand:VQW 1 "register_operand" "w")
2737 (match_operand:VQW 2 "register_operand" "w")]
2740 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2741 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2746 (define_expand "aarch64_uaddl2<mode>"
2747 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2748 (match_operand:VQW 1 "register_operand" "w")
2749 (match_operand:VQW 2 "register_operand" "w")]
2752 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2753 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2758 (define_expand "aarch64_ssubl2<mode>"
2759 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2760 (match_operand:VQW 1 "register_operand" "w")
2761 (match_operand:VQW 2 "register_operand" "w")]
2764 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2765 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2770 (define_expand "aarch64_usubl2<mode>"
2771 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2772 (match_operand:VQW 1 "register_operand" "w")
2773 (match_operand:VQW 2 "register_operand" "w")]
2776 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2777 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2782 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2783 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2784 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2785 (match_operand:VD_BHSI 1 "register_operand" "w"))
2787 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2789 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2790 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2793 ;; <su><addsub>w<q>.
2795 (define_expand "widen_ssum<mode>3"
2796 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2797 (plus:<VDBLW> (sign_extend:<VDBLW>
2798 (match_operand:VQW 1 "register_operand" ""))
2799 (match_operand:<VDBLW> 2 "register_operand" "")))]
2802 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2803 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2805 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2807 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2812 (define_expand "widen_ssum<mode>3"
2813 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2814 (plus:<VWIDE> (sign_extend:<VWIDE>
2815 (match_operand:VD_BHSI 1 "register_operand" ""))
2816 (match_operand:<VWIDE> 2 "register_operand" "")))]
2819 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2823 (define_expand "widen_usum<mode>3"
2824 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2825 (plus:<VDBLW> (zero_extend:<VDBLW>
2826 (match_operand:VQW 1 "register_operand" ""))
2827 (match_operand:<VDBLW> 2 "register_operand" "")))]
2830 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2831 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2833 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2835 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2840 (define_expand "widen_usum<mode>3"
2841 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2842 (plus:<VWIDE> (zero_extend:<VWIDE>
2843 (match_operand:VD_BHSI 1 "register_operand" ""))
2844 (match_operand:<VWIDE> 2 "register_operand" "")))]
2847 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
2851 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2852 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2853 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2855 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2857 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2858 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2861 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
2862 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2863 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2866 (match_operand:VQW 2 "register_operand" "w")
2867 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
2869 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
2870 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2873 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2874 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2875 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2878 (match_operand:VQW 2 "register_operand" "w")
2879 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2881 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2882 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2885 (define_expand "aarch64_saddw2<mode>"
2886 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2887 (match_operand:<VWIDE> 1 "register_operand" "w")
2888 (match_operand:VQW 2 "register_operand" "w")]
2891 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2892 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2897 (define_expand "aarch64_uaddw2<mode>"
2898 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2899 (match_operand:<VWIDE> 1 "register_operand" "w")
2900 (match_operand:VQW 2 "register_operand" "w")]
2903 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2904 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2910 (define_expand "aarch64_ssubw2<mode>"
2911 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2912 (match_operand:<VWIDE> 1 "register_operand" "w")
2913 (match_operand:VQW 2 "register_operand" "w")]
2916 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2917 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2922 (define_expand "aarch64_usubw2<mode>"
2923 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2924 (match_operand:<VWIDE> 1 "register_operand" "w")
2925 (match_operand:VQW 2 "register_operand" "w")]
2928 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2929 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2934 ;; <su><r>h<addsub>.
2936 (define_insn "aarch64_<sur>h<addsub><mode>"
2937 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2938 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
2939 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
2942 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2943 [(set_attr "type" "neon_<addsub>_halve<q>")]
2946 ;; <r><addsub>hn<q>.
2948 (define_insn "aarch64_<sur><addsub>hn<mode>"
2949 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2950 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
2951 (match_operand:VQN 2 "register_operand" "w")]
2954 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
2955 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2958 (define_insn "aarch64_<sur><addsub>hn2<mode>"
2959 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2960 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
2961 (match_operand:VQN 2 "register_operand" "w")
2962 (match_operand:VQN 3 "register_operand" "w")]
2965 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
2966 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2971 (define_insn "aarch64_pmul<mode>"
2972 [(set (match_operand:VB 0 "register_operand" "=w")
2973 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
2974 (match_operand:VB 2 "register_operand" "w")]
2977 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2978 [(set_attr "type" "neon_mul_<Vetype><q>")]
2983 (define_insn "aarch64_fmulx<mode>"
2984 [(set (match_operand:VALLF 0 "register_operand" "=w")
2985 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
2986 (match_operand:VALLF 2 "register_operand" "w")]
2989 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2990 [(set_attr "type" "neon_fp_mul_<Vetype>")]
2993 ;; vmulxq_lane_f32, and vmulx_laneq_f32
2995 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
2996 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2998 [(match_operand:VDQSF 1 "register_operand" "w")
2999 (vec_duplicate:VDQSF
3001 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3002 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3006 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3007 INTVAL (operands[3])));
3008 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3010 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3013 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3015 (define_insn "*aarch64_mulx_elt<mode>"
3016 [(set (match_operand:VDQF 0 "register_operand" "=w")
3018 [(match_operand:VDQF 1 "register_operand" "w")
3021 (match_operand:VDQF 2 "register_operand" "w")
3022 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3026 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3027 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3029 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3034 (define_insn "*aarch64_mulx_elt_to_64v2df"
3035 [(set (match_operand:V2DF 0 "register_operand" "=w")
3037 [(match_operand:V2DF 1 "register_operand" "w")
3039 (match_operand:DF 2 "register_operand" "w"))]
3043 return "fmulx\t%0.2d, %1.2d, %2.d[0]";
3045 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
3048 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3049 ;; vmulxd_lane_f64 == vmulx_lane_f64
3050 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3052 (define_insn "*aarch64_vgetfmulx<mode>"
3053 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3055 [(match_operand:<VEL> 1 "register_operand" "w")
3057 (match_operand:VDQF_DF 2 "register_operand" "w")
3058 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3062 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3063 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3065 [(set_attr "type" "fmul<Vetype>")]
3069 (define_insn "aarch64_<su_optab><optab><mode>"
3070 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3071 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3072 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3074 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3075 [(set_attr "type" "neon_<optab><q>")]
3078 ;; suqadd and usqadd
3080 (define_insn "aarch64_<sur>qadd<mode>"
3081 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3082 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3083 (match_operand:VSDQ_I 2 "register_operand" "w")]
3086 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3087 [(set_attr "type" "neon_qadd<q>")]
3092 (define_insn "aarch64_sqmovun<mode>"
3093 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3094 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3097 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3098 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3101 ;; sqmovn and uqmovn
3103 (define_insn "aarch64_<sur>qmovn<mode>"
3104 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3105 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3108 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3109 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3114 (define_insn "aarch64_s<optab><mode>"
3115 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3117 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3119 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3120 [(set_attr "type" "neon_<optab><q>")]
3125 (define_insn "aarch64_sq<r>dmulh<mode>"
3126 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3128 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3129 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3132 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3133 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3138 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3139 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3141 [(match_operand:VDQHS 1 "register_operand" "w")
3143 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3144 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3148 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3149 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3150 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3153 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3154 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3156 [(match_operand:VDQHS 1 "register_operand" "w")
3158 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3159 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3163 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3164 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3165 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3168 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3169 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3171 [(match_operand:SD_HSI 1 "register_operand" "w")
3173 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3174 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3178 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3179 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3180 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3183 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3184 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3186 [(match_operand:SD_HSI 1 "register_operand" "w")
3188 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3189 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3193 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3194 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3195 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3200 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3201 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3203 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3204 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3205 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3208 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3209 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3212 ;; sqrdml[as]h_lane.
3214 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3215 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3217 [(match_operand:VDQHS 1 "register_operand" "0")
3218 (match_operand:VDQHS 2 "register_operand" "w")
3220 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3221 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3225 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3227 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3229 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3232 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3233 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3235 [(match_operand:SD_HSI 1 "register_operand" "0")
3236 (match_operand:SD_HSI 2 "register_operand" "w")
3238 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3239 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3243 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3245 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3247 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3250 ;; sqrdml[as]h_laneq.
3252 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3253 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3255 [(match_operand:VDQHS 1 "register_operand" "0")
3256 (match_operand:VDQHS 2 "register_operand" "w")
3258 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3259 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3263 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3265 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3267 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3270 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3271 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3273 [(match_operand:SD_HSI 1 "register_operand" "0")
3274 (match_operand:SD_HSI 2 "register_operand" "w")
3276 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3277 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3281 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3283 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3285 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3290 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3291 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3293 (match_operand:<VWIDE> 1 "register_operand" "0")
3296 (sign_extend:<VWIDE>
3297 (match_operand:VSD_HSI 2 "register_operand" "w"))
3298 (sign_extend:<VWIDE>
3299 (match_operand:VSD_HSI 3 "register_operand" "w")))
3302 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3303 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3308 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3309 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3311 (match_operand:<VWIDE> 1 "register_operand" "0")
3314 (sign_extend:<VWIDE>
3315 (match_operand:VD_HSI 2 "register_operand" "w"))
3316 (sign_extend:<VWIDE>
3317 (vec_duplicate:VD_HSI
3319 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3320 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3325 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3327 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3329 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3332 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3333 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3335 (match_operand:<VWIDE> 1 "register_operand" "0")
3338 (sign_extend:<VWIDE>
3339 (match_operand:VD_HSI 2 "register_operand" "w"))
3340 (sign_extend:<VWIDE>
3341 (vec_duplicate:VD_HSI
3343 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3344 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3349 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3351 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3353 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3356 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3357 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3359 (match_operand:<VWIDE> 1 "register_operand" "0")
3362 (sign_extend:<VWIDE>
3363 (match_operand:SD_HSI 2 "register_operand" "w"))
3364 (sign_extend:<VWIDE>
3366 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3367 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3372 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3374 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3376 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3379 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3380 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3382 (match_operand:<VWIDE> 1 "register_operand" "0")
3385 (sign_extend:<VWIDE>
3386 (match_operand:SD_HSI 2 "register_operand" "w"))
3387 (sign_extend:<VWIDE>
3389 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3390 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3395 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3397 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3399 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3404 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3405 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3407 (match_operand:<VWIDE> 1 "register_operand" "0")
3410 (sign_extend:<VWIDE>
3411 (match_operand:VD_HSI 2 "register_operand" "w"))
3412 (sign_extend:<VWIDE>
3413 (vec_duplicate:VD_HSI
3414 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3417 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3418 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3423 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3424 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3426 (match_operand:<VWIDE> 1 "register_operand" "0")
3429 (sign_extend:<VWIDE>
3431 (match_operand:VQ_HSI 2 "register_operand" "w")
3432 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3433 (sign_extend:<VWIDE>
3435 (match_operand:VQ_HSI 3 "register_operand" "w")
3439 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3440 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3443 (define_expand "aarch64_sqdmlal2<mode>"
3444 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3445 (match_operand:<VWIDE> 1 "register_operand" "w")
3446 (match_operand:VQ_HSI 2 "register_operand" "w")
3447 (match_operand:VQ_HSI 3 "register_operand" "w")]
3450 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3451 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3452 operands[2], operands[3], p));
3456 (define_expand "aarch64_sqdmlsl2<mode>"
3457 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3458 (match_operand:<VWIDE> 1 "register_operand" "w")
3459 (match_operand:VQ_HSI 2 "register_operand" "w")
3460 (match_operand:VQ_HSI 3 "register_operand" "w")]
3463 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3464 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3465 operands[2], operands[3], p));
3471 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3472 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3474 (match_operand:<VWIDE> 1 "register_operand" "0")
3477 (sign_extend:<VWIDE>
3479 (match_operand:VQ_HSI 2 "register_operand" "w")
3480 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3481 (sign_extend:<VWIDE>
3482 (vec_duplicate:<VHALF>
3484 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3485 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3490 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3492 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3494 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3497 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3498 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3500 (match_operand:<VWIDE> 1 "register_operand" "0")
3503 (sign_extend:<VWIDE>
3505 (match_operand:VQ_HSI 2 "register_operand" "w")
3506 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3507 (sign_extend:<VWIDE>
3508 (vec_duplicate:<VHALF>
3510 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3511 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3516 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3518 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3520 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3523 (define_expand "aarch64_sqdmlal2_lane<mode>"
3524 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3525 (match_operand:<VWIDE> 1 "register_operand" "w")
3526 (match_operand:VQ_HSI 2 "register_operand" "w")
3527 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3528 (match_operand:SI 4 "immediate_operand" "i")]
3531 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3532 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3533 operands[2], operands[3],
3538 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3539 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3540 (match_operand:<VWIDE> 1 "register_operand" "w")
3541 (match_operand:VQ_HSI 2 "register_operand" "w")
3542 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3543 (match_operand:SI 4 "immediate_operand" "i")]
3546 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3547 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3548 operands[2], operands[3],
3553 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3554 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3555 (match_operand:<VWIDE> 1 "register_operand" "w")
3556 (match_operand:VQ_HSI 2 "register_operand" "w")
3557 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3558 (match_operand:SI 4 "immediate_operand" "i")]
3561 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3562 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3563 operands[2], operands[3],
3568 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3569 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3570 (match_operand:<VWIDE> 1 "register_operand" "w")
3571 (match_operand:VQ_HSI 2 "register_operand" "w")
3572 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3573 (match_operand:SI 4 "immediate_operand" "i")]
3576 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3577 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3578 operands[2], operands[3],
3583 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3584 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3586 (match_operand:<VWIDE> 1 "register_operand" "0")
3589 (sign_extend:<VWIDE>
3591 (match_operand:VQ_HSI 2 "register_operand" "w")
3592 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3593 (sign_extend:<VWIDE>
3594 (vec_duplicate:<VHALF>
3595 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3598 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3599 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3602 (define_expand "aarch64_sqdmlal2_n<mode>"
3603 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3604 (match_operand:<VWIDE> 1 "register_operand" "w")
3605 (match_operand:VQ_HSI 2 "register_operand" "w")
3606 (match_operand:<VEL> 3 "register_operand" "w")]
3609 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3610 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3611 operands[2], operands[3],
3616 (define_expand "aarch64_sqdmlsl2_n<mode>"
3617 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3618 (match_operand:<VWIDE> 1 "register_operand" "w")
3619 (match_operand:VQ_HSI 2 "register_operand" "w")
3620 (match_operand:<VEL> 3 "register_operand" "w")]
3623 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3624 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3625 operands[2], operands[3],
3632 (define_insn "aarch64_sqdmull<mode>"
3633 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3636 (sign_extend:<VWIDE>
3637 (match_operand:VSD_HSI 1 "register_operand" "w"))
3638 (sign_extend:<VWIDE>
3639 (match_operand:VSD_HSI 2 "register_operand" "w")))
3642 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3643 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3648 (define_insn "aarch64_sqdmull_lane<mode>"
3649 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3652 (sign_extend:<VWIDE>
3653 (match_operand:VD_HSI 1 "register_operand" "w"))
3654 (sign_extend:<VWIDE>
3655 (vec_duplicate:VD_HSI
3657 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3658 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3663 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3664 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3666 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3669 (define_insn "aarch64_sqdmull_laneq<mode>"
3670 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3673 (sign_extend:<VWIDE>
3674 (match_operand:VD_HSI 1 "register_operand" "w"))
3675 (sign_extend:<VWIDE>
3676 (vec_duplicate:VD_HSI
3678 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3679 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3684 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3685 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3687 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3690 (define_insn "aarch64_sqdmull_lane<mode>"
3691 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3694 (sign_extend:<VWIDE>
3695 (match_operand:SD_HSI 1 "register_operand" "w"))
3696 (sign_extend:<VWIDE>
3698 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3699 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3704 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3705 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3707 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3710 (define_insn "aarch64_sqdmull_laneq<mode>"
3711 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3714 (sign_extend:<VWIDE>
3715 (match_operand:SD_HSI 1 "register_operand" "w"))
3716 (sign_extend:<VWIDE>
3718 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3719 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3724 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3725 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3727 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3732 (define_insn "aarch64_sqdmull_n<mode>"
3733 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3736 (sign_extend:<VWIDE>
3737 (match_operand:VD_HSI 1 "register_operand" "w"))
3738 (sign_extend:<VWIDE>
3739 (vec_duplicate:VD_HSI
3740 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3744 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3745 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3752 (define_insn "aarch64_sqdmull2<mode>_internal"
3753 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3756 (sign_extend:<VWIDE>
3758 (match_operand:VQ_HSI 1 "register_operand" "w")
3759 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3760 (sign_extend:<VWIDE>
3762 (match_operand:VQ_HSI 2 "register_operand" "w")
3767 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3768 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3771 (define_expand "aarch64_sqdmull2<mode>"
3772 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3773 (match_operand:VQ_HSI 1 "register_operand" "w")
3774 (match_operand:VQ_HSI 2 "register_operand" "w")]
3777 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3778 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3785 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3786 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3789 (sign_extend:<VWIDE>
3791 (match_operand:VQ_HSI 1 "register_operand" "w")
3792 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3793 (sign_extend:<VWIDE>
3794 (vec_duplicate:<VHALF>
3796 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3797 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3802 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3803 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3805 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3808 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3809 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3812 (sign_extend:<VWIDE>
3814 (match_operand:VQ_HSI 1 "register_operand" "w")
3815 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3816 (sign_extend:<VWIDE>
3817 (vec_duplicate:<VHALF>
3819 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3820 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3825 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3826 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3828 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3831 (define_expand "aarch64_sqdmull2_lane<mode>"
3832 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3833 (match_operand:VQ_HSI 1 "register_operand" "w")
3834 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3835 (match_operand:SI 3 "immediate_operand" "i")]
3838 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3839 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3840 operands[2], operands[3],
3845 (define_expand "aarch64_sqdmull2_laneq<mode>"
3846 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3847 (match_operand:VQ_HSI 1 "register_operand" "w")
3848 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3849 (match_operand:SI 3 "immediate_operand" "i")]
3852 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3853 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3854 operands[2], operands[3],
3861 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3862 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3865 (sign_extend:<VWIDE>
3867 (match_operand:VQ_HSI 1 "register_operand" "w")
3868 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3869 (sign_extend:<VWIDE>
3870 (vec_duplicate:<VHALF>
3871 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3875 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3876 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3879 (define_expand "aarch64_sqdmull2_n<mode>"
3880 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3881 (match_operand:VQ_HSI 1 "register_operand" "w")
3882 (match_operand:<VEL> 2 "register_operand" "w")]
3885 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3886 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3893 (define_insn "aarch64_<sur>shl<mode>"
3894 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3896 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3897 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3900 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3901 [(set_attr "type" "neon_shift_reg<q>")]
3907 (define_insn "aarch64_<sur>q<r>shl<mode>"
3908 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3910 [(match_operand:VSDQ_I 1 "register_operand" "w")
3911 (match_operand:VSDQ_I 2 "register_operand" "w")]
3914 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3915 [(set_attr "type" "neon_sat_shift_reg<q>")]
3920 (define_insn "aarch64_<sur>shll_n<mode>"
3921 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3922 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
3924 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
3928 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3929 if (INTVAL (operands[2]) == bit_width)
3931 return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3934 return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3936 [(set_attr "type" "neon_shift_imm_long")]
3941 (define_insn "aarch64_<sur>shll2_n<mode>"
3942 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3943 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
3944 (match_operand:SI 2 "immediate_operand" "i")]
3948 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3949 if (INTVAL (operands[2]) == bit_width)
3951 return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3954 return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3956 [(set_attr "type" "neon_shift_imm_long")]
3961 (define_insn "aarch64_<sur>shr_n<mode>"
3962 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3963 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3965 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3968 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3969 [(set_attr "type" "neon_sat_shift_imm<q>")]
3974 (define_insn "aarch64_<sur>sra_n<mode>"
3975 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3976 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3977 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3979 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3982 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3983 [(set_attr "type" "neon_shift_acc<q>")]
3988 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
3989 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3990 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3991 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3993 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
3996 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3997 [(set_attr "type" "neon_shift_imm<q>")]
4002 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4003 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4004 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4006 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4009 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4010 [(set_attr "type" "neon_sat_shift_imm<q>")]
4016 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4017 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4018 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4020 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4023 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4024 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4028 ;; cm(eq|ge|gt|lt|le)
4029 ;; Note, we have constraints for Dz and Z as different expanders
4030 ;; have different ideas of what should be passed to this pattern.
4032 (define_insn "aarch64_cm<optab><mode>"
4033 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4035 (COMPARISONS:<V_cmp_result>
4036 (match_operand:VDQ_I 1 "register_operand" "w,w")
4037 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4041 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4042 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4043 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4046 (define_insn_and_split "aarch64_cm<optab>di"
4047 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4050 (match_operand:DI 1 "register_operand" "w,w,r")
4051 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4053 (clobber (reg:CC CC_REGNUM))]
4057 [(set (match_operand:DI 0 "register_operand")
4060 (match_operand:DI 1 "register_operand")
4061 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4064 /* If we are in the general purpose register file,
4065 we split to a sequence of comparison and store. */
4066 if (GP_REGNUM_P (REGNO (operands[0]))
4067 && GP_REGNUM_P (REGNO (operands[1])))
4069 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4070 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4071 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4072 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4075 /* Otherwise, we expand to a similar pattern which does not
4076 clobber CC_REGNUM. */
4078 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4081 (define_insn "*aarch64_cm<optab>di"
4082 [(set (match_operand:DI 0 "register_operand" "=w,w")
4085 (match_operand:DI 1 "register_operand" "w,w")
4086 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4088 "TARGET_SIMD && reload_completed"
4090 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4091 cm<optab>\t%d0, %d1, #0"
4092 [(set_attr "type" "neon_compare, neon_compare_zero")]
4097 (define_insn "aarch64_cm<optab><mode>"
4098 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4100 (UCOMPARISONS:<V_cmp_result>
4101 (match_operand:VDQ_I 1 "register_operand" "w")
4102 (match_operand:VDQ_I 2 "register_operand" "w")
4105 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4106 [(set_attr "type" "neon_compare<q>")]
4109 (define_insn_and_split "aarch64_cm<optab>di"
4110 [(set (match_operand:DI 0 "register_operand" "=w,r")
4113 (match_operand:DI 1 "register_operand" "w,r")
4114 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4116 (clobber (reg:CC CC_REGNUM))]
4120 [(set (match_operand:DI 0 "register_operand")
4123 (match_operand:DI 1 "register_operand")
4124 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4127 /* If we are in the general purpose register file,
4128 we split to a sequence of comparison and store. */
4129 if (GP_REGNUM_P (REGNO (operands[0]))
4130 && GP_REGNUM_P (REGNO (operands[1])))
4132 machine_mode mode = CCmode;
4133 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4134 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4135 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4138 /* Otherwise, we expand to a similar pattern which does not
4139 clobber CC_REGNUM. */
4141 [(set_attr "type" "neon_compare,multiple")]
4144 (define_insn "*aarch64_cm<optab>di"
4145 [(set (match_operand:DI 0 "register_operand" "=w")
4148 (match_operand:DI 1 "register_operand" "w")
4149 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4151 "TARGET_SIMD && reload_completed"
4152 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4153 [(set_attr "type" "neon_compare")]
4158 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4159 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
4160 ;; not (neg (eq (and x y) 0))
4161 ;; which is rewritten by simplify_rtx as
4162 ;; plus (eq (and x y) 0) -1.
4164 (define_insn "aarch64_cmtst<mode>"
4165 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4166 (plus:<V_cmp_result>
4169 (match_operand:VDQ_I 1 "register_operand" "w")
4170 (match_operand:VDQ_I 2 "register_operand" "w"))
4171 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4172 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4175 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4176 [(set_attr "type" "neon_tst<q>")]
4179 (define_insn_and_split "aarch64_cmtstdi"
4180 [(set (match_operand:DI 0 "register_operand" "=w,r")
4184 (match_operand:DI 1 "register_operand" "w,r")
4185 (match_operand:DI 2 "register_operand" "w,r"))
4187 (clobber (reg:CC CC_REGNUM))]
4191 [(set (match_operand:DI 0 "register_operand")
4195 (match_operand:DI 1 "register_operand")
4196 (match_operand:DI 2 "register_operand"))
4199 /* If we are in the general purpose register file,
4200 we split to a sequence of comparison and store. */
4201 if (GP_REGNUM_P (REGNO (operands[0]))
4202 && GP_REGNUM_P (REGNO (operands[1])))
4204 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4205 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4206 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4207 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4208 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4211 /* Otherwise, we expand to a similar pattern which does not
4212 clobber CC_REGNUM. */
4214 [(set_attr "type" "neon_tst,multiple")]
4217 (define_insn "*aarch64_cmtstdi"
4218 [(set (match_operand:DI 0 "register_operand" "=w")
4222 (match_operand:DI 1 "register_operand" "w")
4223 (match_operand:DI 2 "register_operand" "w"))
4226 "cmtst\t%d0, %d1, %d2"
4227 [(set_attr "type" "neon_tst")]
4230 ;; fcm(eq|ge|gt|le|lt)
4232 (define_insn "aarch64_cm<optab><mode>"
4233 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4235 (COMPARISONS:<V_cmp_result>
4236 (match_operand:VALLF 1 "register_operand" "w,w")
4237 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4241 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4242 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4243 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
4247 ;; Note we can also handle what would be fac(le|lt) by
4248 ;; generating fac(ge|gt).
4250 (define_insn "*aarch64_fac<optab><mode>"
4251 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4253 (FAC_COMPARISONS:<V_cmp_result>
4254 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
4255 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
4258 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4259 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
4264 (define_insn "aarch64_addp<mode>"
4265 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4267 [(match_operand:VD_BHSI 1 "register_operand" "w")
4268 (match_operand:VD_BHSI 2 "register_operand" "w")]
4271 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4272 [(set_attr "type" "neon_reduc_add<q>")]
4275 (define_insn "aarch64_addpdi"
4276 [(set (match_operand:DI 0 "register_operand" "=w")
4278 [(match_operand:V2DI 1 "register_operand" "w")]
4282 [(set_attr "type" "neon_reduc_add")]
4287 (define_insn "sqrt<mode>2"
4288 [(set (match_operand:VDQF 0 "register_operand" "=w")
4289 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
4291 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4292 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
4295 ;; Patterns for vector struct loads and stores.
4297 (define_insn "aarch64_simd_ld2<mode>"
4298 [(set (match_operand:OI 0 "register_operand" "=w")
4299 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4300 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4303 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4304 [(set_attr "type" "neon_load2_2reg<q>")]
4307 (define_insn "aarch64_simd_ld2r<mode>"
4308 [(set (match_operand:OI 0 "register_operand" "=w")
4309 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4310 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4313 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4314 [(set_attr "type" "neon_load2_all_lanes<q>")]
4317 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4318 [(set (match_operand:OI 0 "register_operand" "=w")
4319 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4320 (match_operand:OI 2 "register_operand" "0")
4321 (match_operand:SI 3 "immediate_operand" "i")
4322 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4326 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4327 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4329 [(set_attr "type" "neon_load2_one_lane")]
4332 (define_expand "vec_load_lanesoi<mode>"
4333 [(set (match_operand:OI 0 "register_operand" "=w")
4334 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4335 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4339 if (BYTES_BIG_ENDIAN)
4341 rtx tmp = gen_reg_rtx (OImode);
4342 rtx mask = aarch64_reverse_mask (<MODE>mode);
4343 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4344 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4347 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4351 (define_insn "aarch64_simd_st2<mode>"
4352 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4353 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4354 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4357 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4358 [(set_attr "type" "neon_store2_2reg<q>")]
4361 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4362 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4363 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4364 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4365 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4366 (match_operand:SI 2 "immediate_operand" "i")]
4370 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4371 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4373 [(set_attr "type" "neon_store2_one_lane<q>")]
4376 (define_expand "vec_store_lanesoi<mode>"
4377 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4378 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4379 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4383 if (BYTES_BIG_ENDIAN)
4385 rtx tmp = gen_reg_rtx (OImode);
4386 rtx mask = aarch64_reverse_mask (<MODE>mode);
4387 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4388 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4391 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4395 (define_insn "aarch64_simd_ld3<mode>"
4396 [(set (match_operand:CI 0 "register_operand" "=w")
4397 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4398 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4401 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4402 [(set_attr "type" "neon_load3_3reg<q>")]
4405 (define_insn "aarch64_simd_ld3r<mode>"
4406 [(set (match_operand:CI 0 "register_operand" "=w")
4407 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4408 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4411 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4412 [(set_attr "type" "neon_load3_all_lanes<q>")]
4415 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4416 [(set (match_operand:CI 0 "register_operand" "=w")
4417 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4418 (match_operand:CI 2 "register_operand" "0")
4419 (match_operand:SI 3 "immediate_operand" "i")
4420 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4424 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4425 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4427 [(set_attr "type" "neon_load3_one_lane")]
4430 (define_expand "vec_load_lanesci<mode>"
4431 [(set (match_operand:CI 0 "register_operand" "=w")
4432 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4433 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4437 if (BYTES_BIG_ENDIAN)
4439 rtx tmp = gen_reg_rtx (CImode);
4440 rtx mask = aarch64_reverse_mask (<MODE>mode);
4441 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4442 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4445 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4449 (define_insn "aarch64_simd_st3<mode>"
4450 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4451 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4452 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4455 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4456 [(set_attr "type" "neon_store3_3reg<q>")]
4459 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4460 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4461 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4462 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4463 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4464 (match_operand:SI 2 "immediate_operand" "i")]
4468 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4469 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4471 [(set_attr "type" "neon_store3_one_lane<q>")]
4474 (define_expand "vec_store_lanesci<mode>"
4475 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4476 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4477 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4481 if (BYTES_BIG_ENDIAN)
4483 rtx tmp = gen_reg_rtx (CImode);
4484 rtx mask = aarch64_reverse_mask (<MODE>mode);
4485 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4486 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4489 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4493 (define_insn "aarch64_simd_ld4<mode>"
4494 [(set (match_operand:XI 0 "register_operand" "=w")
4495 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4496 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4499 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4500 [(set_attr "type" "neon_load4_4reg<q>")]
4503 (define_insn "aarch64_simd_ld4r<mode>"
4504 [(set (match_operand:XI 0 "register_operand" "=w")
4505 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4506 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4509 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4510 [(set_attr "type" "neon_load4_all_lanes<q>")]
4513 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4514 [(set (match_operand:XI 0 "register_operand" "=w")
4515 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4516 (match_operand:XI 2 "register_operand" "0")
4517 (match_operand:SI 3 "immediate_operand" "i")
4518 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4522 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4523 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4525 [(set_attr "type" "neon_load4_one_lane")]
4528 (define_expand "vec_load_lanesxi<mode>"
4529 [(set (match_operand:XI 0 "register_operand" "=w")
4530 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4531 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4535 if (BYTES_BIG_ENDIAN)
4537 rtx tmp = gen_reg_rtx (XImode);
4538 rtx mask = aarch64_reverse_mask (<MODE>mode);
4539 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4540 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4543 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4547 (define_insn "aarch64_simd_st4<mode>"
4548 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4549 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4550 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4553 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4554 [(set_attr "type" "neon_store4_4reg<q>")]
4557 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4558 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4559 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4560 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4561 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4562 (match_operand:SI 2 "immediate_operand" "i")]
4566 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4567 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4569 [(set_attr "type" "neon_store4_one_lane<q>")]
4572 (define_expand "vec_store_lanesxi<mode>"
4573 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4574 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4575 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4579 if (BYTES_BIG_ENDIAN)
4581 rtx tmp = gen_reg_rtx (XImode);
4582 rtx mask = aarch64_reverse_mask (<MODE>mode);
4583 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4584 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4587 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4591 (define_insn_and_split "aarch64_rev_reglist<mode>"
4592 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4594 [(match_operand:VSTRUCT 1 "register_operand" "w")
4595 (match_operand:V16QI 2 "register_operand" "w")]
4596 UNSPEC_REV_REGLIST))]
4599 "&& reload_completed"
4603 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4604 for (i = 0; i < nregs; i++)
4606 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4607 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4608 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4612 [(set_attr "type" "neon_tbl1_q")
4613 (set_attr "length" "<insn_count>")]
4616 ;; Reload patterns for AdvSIMD register list operands.
4618 (define_expand "mov<mode>"
4619 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4620 (match_operand:VSTRUCT 1 "general_operand" ""))]
4623 if (can_create_pseudo_p ())
4625 if (GET_CODE (operands[0]) != REG)
4626 operands[1] = force_reg (<MODE>mode, operands[1]);
4630 (define_insn "*aarch64_mov<mode>"
4631 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4632 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4633 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4634 && (register_operand (operands[0], <MODE>mode)
4635 || register_operand (operands[1], <MODE>mode))"
4638 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4639 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4640 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4641 neon_load<nregs>_<nregs>reg_q")
4642 (set_attr "length" "<insn_count>,4,4")]
4645 (define_insn "aarch64_be_ld1<mode>"
4646 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4647 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4648 "aarch64_simd_struct_operand" "Utv")]
4651 "ld1\\t{%0<Vmtype>}, %1"
4652 [(set_attr "type" "neon_load1_1reg<q>")]
4655 (define_insn "aarch64_be_st1<mode>"
4656 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4657 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4660 "st1\\t{%1<Vmtype>}, %0"
4661 [(set_attr "type" "neon_store1_1reg<q>")]
4664 (define_insn "*aarch64_be_movoi"
4665 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4666 (match_operand:OI 1 "general_operand" " w,w,m"))]
4667 "TARGET_SIMD && BYTES_BIG_ENDIAN
4668 && (register_operand (operands[0], OImode)
4669 || register_operand (operands[1], OImode))"
4674 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4675 (set_attr "length" "8,4,4")]
4678 (define_insn "*aarch64_be_movci"
4679 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4680 (match_operand:CI 1 "general_operand" " w,w,o"))]
4681 "TARGET_SIMD && BYTES_BIG_ENDIAN
4682 && (register_operand (operands[0], CImode)
4683 || register_operand (operands[1], CImode))"
4685 [(set_attr "type" "multiple")
4686 (set_attr "length" "12,4,4")]
4689 (define_insn "*aarch64_be_movxi"
4690 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4691 (match_operand:XI 1 "general_operand" " w,w,o"))]
4692 "TARGET_SIMD && BYTES_BIG_ENDIAN
4693 && (register_operand (operands[0], XImode)
4694 || register_operand (operands[1], XImode))"
4696 [(set_attr "type" "multiple")
4697 (set_attr "length" "16,4,4")]
4701 [(set (match_operand:OI 0 "register_operand")
4702 (match_operand:OI 1 "register_operand"))]
4703 "TARGET_SIMD && reload_completed"
4706 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4711 [(set (match_operand:CI 0 "nonimmediate_operand")
4712 (match_operand:CI 1 "general_operand"))]
4713 "TARGET_SIMD && reload_completed"
4716 if (register_operand (operands[0], CImode)
4717 && register_operand (operands[1], CImode))
4719 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4722 else if (BYTES_BIG_ENDIAN)
4724 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4725 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4726 emit_move_insn (gen_lowpart (V16QImode,
4727 simplify_gen_subreg (TImode, operands[0],
4729 gen_lowpart (V16QImode,
4730 simplify_gen_subreg (TImode, operands[1],
4739 [(set (match_operand:XI 0 "nonimmediate_operand")
4740 (match_operand:XI 1 "general_operand"))]
4741 "TARGET_SIMD && reload_completed"
4744 if (register_operand (operands[0], XImode)
4745 && register_operand (operands[1], XImode))
4747 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4750 else if (BYTES_BIG_ENDIAN)
4752 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4753 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4754 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4755 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4762 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4763 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4764 (match_operand:DI 1 "register_operand" "w")
4765 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4768 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4769 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4772 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4777 (define_insn "aarch64_ld2<mode>_dreg"
4778 [(set (match_operand:OI 0 "register_operand" "=w")
4783 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4785 (vec_duplicate:VD (const_int 0)))
4787 (unspec:VD [(match_dup 1)]
4789 (vec_duplicate:VD (const_int 0)))) 0))]
4791 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4792 [(set_attr "type" "neon_load2_2reg<q>")]
4795 (define_insn "aarch64_ld2<mode>_dreg"
4796 [(set (match_operand:OI 0 "register_operand" "=w")
4801 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4805 (unspec:DX [(match_dup 1)]
4807 (const_int 0))) 0))]
4809 "ld1\\t{%S0.1d - %T0.1d}, %1"
4810 [(set_attr "type" "neon_load1_2reg<q>")]
4813 (define_insn "aarch64_ld3<mode>_dreg"
4814 [(set (match_operand:CI 0 "register_operand" "=w")
4820 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4822 (vec_duplicate:VD (const_int 0)))
4824 (unspec:VD [(match_dup 1)]
4826 (vec_duplicate:VD (const_int 0))))
4828 (unspec:VD [(match_dup 1)]
4830 (vec_duplicate:VD (const_int 0)))) 0))]
4832 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4833 [(set_attr "type" "neon_load3_3reg<q>")]
4836 (define_insn "aarch64_ld3<mode>_dreg"
4837 [(set (match_operand:CI 0 "register_operand" "=w")
4843 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4847 (unspec:DX [(match_dup 1)]
4851 (unspec:DX [(match_dup 1)]
4853 (const_int 0))) 0))]
4855 "ld1\\t{%S0.1d - %U0.1d}, %1"
4856 [(set_attr "type" "neon_load1_3reg<q>")]
4859 (define_insn "aarch64_ld4<mode>_dreg"
4860 [(set (match_operand:XI 0 "register_operand" "=w")
4866 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4868 (vec_duplicate:VD (const_int 0)))
4870 (unspec:VD [(match_dup 1)]
4872 (vec_duplicate:VD (const_int 0))))
4875 (unspec:VD [(match_dup 1)]
4877 (vec_duplicate:VD (const_int 0)))
4879 (unspec:VD [(match_dup 1)]
4881 (vec_duplicate:VD (const_int 0))))) 0))]
4883 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4884 [(set_attr "type" "neon_load4_4reg<q>")]
4887 (define_insn "aarch64_ld4<mode>_dreg"
4888 [(set (match_operand:XI 0 "register_operand" "=w")
4894 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4898 (unspec:DX [(match_dup 1)]
4903 (unspec:DX [(match_dup 1)]
4907 (unspec:DX [(match_dup 1)]
4909 (const_int 0)))) 0))]
4911 "ld1\\t{%S0.1d - %V0.1d}, %1"
4912 [(set_attr "type" "neon_load1_4reg<q>")]
4915 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
4916 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4917 (match_operand:DI 1 "register_operand" "r")
4918 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4921 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4922 set_mem_size (mem, <VSTRUCT:nregs> * 8);
4924 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
4928 (define_expand "aarch64_ld1<VALL_F16:mode>"
4929 [(match_operand:VALL_F16 0 "register_operand")
4930 (match_operand:DI 1 "register_operand")]
4933 machine_mode mode = <VALL_F16:MODE>mode;
4934 rtx mem = gen_rtx_MEM (mode, operands[1]);
4936 if (BYTES_BIG_ENDIAN)
4937 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
4939 emit_move_insn (operands[0], mem);
4943 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
4944 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4945 (match_operand:DI 1 "register_operand" "r")
4946 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4949 machine_mode mode = <VSTRUCT:MODE>mode;
4950 rtx mem = gen_rtx_MEM (mode, operands[1]);
4952 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
4956 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
4957 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4958 (match_operand:DI 1 "register_operand" "w")
4959 (match_operand:VSTRUCT 2 "register_operand" "0")
4960 (match_operand:SI 3 "immediate_operand" "i")
4961 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4964 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4965 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4968 aarch64_simd_lane_bounds (operands[3], 0,
4969 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
4971 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
4972 operands[0], mem, operands[2], operands[3]));
4976 ;; Expanders for builtins to extract vector registers from large
4977 ;; opaque integer modes.
4981 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
4982 [(match_operand:VDC 0 "register_operand" "=w")
4983 (match_operand:VSTRUCT 1 "register_operand" "w")
4984 (match_operand:SI 2 "immediate_operand" "i")]
4987 int part = INTVAL (operands[2]);
4988 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
4989 int offset = part * 16;
4991 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
4992 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
4998 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
4999 [(match_operand:VQ 0 "register_operand" "=w")
5000 (match_operand:VSTRUCT 1 "register_operand" "w")
5001 (match_operand:SI 2 "immediate_operand" "i")]
5004 int part = INTVAL (operands[2]);
5005 int offset = part * 16;
5007 emit_move_insn (operands[0],
5008 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5012 ;; Permuted-store expanders for neon intrinsics.
5014 ;; Permute instructions
5018 (define_expand "vec_perm_const<mode>"
5019 [(match_operand:VALL_F16 0 "register_operand")
5020 (match_operand:VALL_F16 1 "register_operand")
5021 (match_operand:VALL_F16 2 "register_operand")
5022 (match_operand:<V_cmp_result> 3)]
5025 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5026 operands[2], operands[3]))
5032 (define_expand "vec_perm<mode>"
5033 [(match_operand:VB 0 "register_operand")
5034 (match_operand:VB 1 "register_operand")
5035 (match_operand:VB 2 "register_operand")
5036 (match_operand:VB 3 "register_operand")]
5039 aarch64_expand_vec_perm (operands[0], operands[1],
5040 operands[2], operands[3]);
5044 (define_insn "aarch64_tbl1<mode>"
5045 [(set (match_operand:VB 0 "register_operand" "=w")
5046 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5047 (match_operand:VB 2 "register_operand" "w")]
5050 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5051 [(set_attr "type" "neon_tbl1<q>")]
5054 ;; Two source registers.
5056 (define_insn "aarch64_tbl2v16qi"
5057 [(set (match_operand:V16QI 0 "register_operand" "=w")
5058 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5059 (match_operand:V16QI 2 "register_operand" "w")]
5062 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5063 [(set_attr "type" "neon_tbl2_q")]
5066 (define_insn "aarch64_tbl3<mode>"
5067 [(set (match_operand:VB 0 "register_operand" "=w")
5068 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5069 (match_operand:VB 2 "register_operand" "w")]
5072 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5073 [(set_attr "type" "neon_tbl3")]
5076 (define_insn "aarch64_tbx4<mode>"
5077 [(set (match_operand:VB 0 "register_operand" "=w")
5078 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5079 (match_operand:OI 2 "register_operand" "w")
5080 (match_operand:VB 3 "register_operand" "w")]
5083 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5084 [(set_attr "type" "neon_tbl4")]
5087 ;; Three source registers.
5089 (define_insn "aarch64_qtbl3<mode>"
5090 [(set (match_operand:VB 0 "register_operand" "=w")
5091 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5092 (match_operand:VB 2 "register_operand" "w")]
5095 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5096 [(set_attr "type" "neon_tbl3")]
5099 (define_insn "aarch64_qtbx3<mode>"
5100 [(set (match_operand:VB 0 "register_operand" "=w")
5101 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5102 (match_operand:CI 2 "register_operand" "w")
5103 (match_operand:VB 3 "register_operand" "w")]
5106 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5107 [(set_attr "type" "neon_tbl3")]
5110 ;; Four source registers.
5112 (define_insn "aarch64_qtbl4<mode>"
5113 [(set (match_operand:VB 0 "register_operand" "=w")
5114 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5115 (match_operand:VB 2 "register_operand" "w")]
5118 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5119 [(set_attr "type" "neon_tbl4")]
5122 (define_insn "aarch64_qtbx4<mode>"
5123 [(set (match_operand:VB 0 "register_operand" "=w")
5124 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5125 (match_operand:XI 2 "register_operand" "w")
5126 (match_operand:VB 3 "register_operand" "w")]
5129 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5130 [(set_attr "type" "neon_tbl4")]
5133 (define_insn_and_split "aarch64_combinev16qi"
5134 [(set (match_operand:OI 0 "register_operand" "=w")
5135 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5136 (match_operand:V16QI 2 "register_operand" "w")]
5140 "&& reload_completed"
5143 aarch64_split_combinev16qi (operands);
5146 [(set_attr "type" "multiple")]
5149 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5150 [(set (match_operand:VALL 0 "register_operand" "=w")
5151 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
5152 (match_operand:VALL 2 "register_operand" "w")]
5155 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5156 [(set_attr "type" "neon_permute<q>")]
5159 ;; Note immediate (third) operand is lane index not byte index.
5160 (define_insn "aarch64_ext<mode>"
5161 [(set (match_operand:VALL 0 "register_operand" "=w")
5162 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
5163 (match_operand:VALL 2 "register_operand" "w")
5164 (match_operand:SI 3 "immediate_operand" "i")]
5168 operands[3] = GEN_INT (INTVAL (operands[3])
5169 * GET_MODE_UNIT_SIZE (<MODE>mode));
5170 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5172 [(set_attr "type" "neon_ext<q>")]
5175 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5176 [(set (match_operand:VALL 0 "register_operand" "=w")
5177 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")]
5180 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5181 [(set_attr "type" "neon_rev<q>")]
5184 (define_insn "aarch64_st2<mode>_dreg"
5185 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5186 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5187 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5190 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5191 [(set_attr "type" "neon_store2_2reg")]
5194 (define_insn "aarch64_st2<mode>_dreg"
5195 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5196 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5197 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5200 "st1\\t{%S1.1d - %T1.1d}, %0"
5201 [(set_attr "type" "neon_store1_2reg")]
5204 (define_insn "aarch64_st3<mode>_dreg"
5205 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5206 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5207 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5210 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5211 [(set_attr "type" "neon_store3_3reg")]
5214 (define_insn "aarch64_st3<mode>_dreg"
5215 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5216 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5217 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5220 "st1\\t{%S1.1d - %U1.1d}, %0"
5221 [(set_attr "type" "neon_store1_3reg")]
5224 (define_insn "aarch64_st4<mode>_dreg"
5225 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5226 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5227 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5230 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5231 [(set_attr "type" "neon_store4_4reg")]
5234 (define_insn "aarch64_st4<mode>_dreg"
5235 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5236 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5237 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5240 "st1\\t{%S1.1d - %V1.1d}, %0"
5241 [(set_attr "type" "neon_store1_4reg")]
5244 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5245 [(match_operand:DI 0 "register_operand" "r")
5246 (match_operand:VSTRUCT 1 "register_operand" "w")
5247 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5250 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5251 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5253 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5257 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5258 [(match_operand:DI 0 "register_operand" "r")
5259 (match_operand:VSTRUCT 1 "register_operand" "w")
5260 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5263 machine_mode mode = <VSTRUCT:MODE>mode;
5264 rtx mem = gen_rtx_MEM (mode, operands[0]);
5266 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5270 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5271 [(match_operand:DI 0 "register_operand" "r")
5272 (match_operand:VSTRUCT 1 "register_operand" "w")
5273 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5274 (match_operand:SI 2 "immediate_operand")]
5277 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5278 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5281 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5282 mem, operands[1], operands[2]));
5286 (define_expand "aarch64_st1<VALL_F16:mode>"
5287 [(match_operand:DI 0 "register_operand")
5288 (match_operand:VALL_F16 1 "register_operand")]
5291 machine_mode mode = <VALL_F16:MODE>mode;
5292 rtx mem = gen_rtx_MEM (mode, operands[0]);
5294 if (BYTES_BIG_ENDIAN)
5295 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5297 emit_move_insn (mem, operands[1]);
5301 ;; Expander for builtins to insert vector registers into large
5302 ;; opaque integer modes.
5304 ;; Q-register list. We don't need a D-reg inserter as we zero
5305 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5307 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5308 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5309 (match_operand:VSTRUCT 1 "register_operand" "0")
5310 (match_operand:VQ 2 "register_operand" "w")
5311 (match_operand:SI 3 "immediate_operand" "i")]
5314 int part = INTVAL (operands[3]);
5315 int offset = part * 16;
5317 emit_move_insn (operands[0], operands[1]);
5318 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5323 ;; Standard pattern name vec_init<mode>.
5325 (define_expand "vec_init<mode>"
5326 [(match_operand:VALL_F16 0 "register_operand" "")
5327 (match_operand 1 "" "")]
5330 aarch64_expand_vector_init (operands[0], operands[1]);
5334 (define_insn "*aarch64_simd_ld1r<mode>"
5335 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5336 (vec_duplicate:VALL_F16
5337 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5339 "ld1r\\t{%0.<Vtype>}, %1"
5340 [(set_attr "type" "neon_load1_all_lanes")]
5343 (define_insn "aarch64_frecpe<mode>"
5344 [(set (match_operand:VDQF 0 "register_operand" "=w")
5345 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
5348 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5349 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
5352 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5353 [(set (match_operand:GPF 0 "register_operand" "=w")
5354 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
5357 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5358 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
5361 (define_insn "aarch64_frecps<mode>"
5362 [(set (match_operand:VALLF 0 "register_operand" "=w")
5363 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
5364 (match_operand:VALLF 2 "register_operand" "w")]
5367 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5368 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
5371 (define_insn "aarch64_urecpe<mode>"
5372 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5373 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5376 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5377 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5379 ;; Standard pattern name vec_extract<mode>.
5381 (define_expand "vec_extract<mode>"
5382 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5383 (match_operand:VALL_F16 1 "register_operand" "")
5384 (match_operand:SI 2 "immediate_operand" "")]
5388 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5394 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5395 [(set (match_operand:V16QI 0 "register_operand" "=w")
5396 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5397 (match_operand:V16QI 2 "register_operand" "w")]
5399 "TARGET_SIMD && TARGET_CRYPTO"
5400 "aes<aes_op>\\t%0.16b, %2.16b"
5401 [(set_attr "type" "crypto_aese")]
5404 ;; When AES/AESMC fusion is enabled we want the register allocation to
5408 ;; So prefer to tie operand 1 to operand 0 when fusing.
5410 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5411 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5412 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5414 "TARGET_SIMD && TARGET_CRYPTO"
5415 "aes<aesmc_op>\\t%0.16b, %1.16b"
5416 [(set_attr "type" "crypto_aesmc")
5417 (set_attr_alternative "enabled"
5418 [(if_then_else (match_test
5419 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5420 (const_string "yes" )
5421 (const_string "no"))
5422 (const_string "yes")])]
5427 (define_insn "aarch64_crypto_sha1hsi"
5428 [(set (match_operand:SI 0 "register_operand" "=w")
5429 (unspec:SI [(match_operand:SI 1
5430 "register_operand" "w")]
5432 "TARGET_SIMD && TARGET_CRYPTO"
5434 [(set_attr "type" "crypto_sha1_fast")]
5437 (define_insn "aarch64_crypto_sha1su1v4si"
5438 [(set (match_operand:V4SI 0 "register_operand" "=w")
5439 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5440 (match_operand:V4SI 2 "register_operand" "w")]
5442 "TARGET_SIMD && TARGET_CRYPTO"
5443 "sha1su1\\t%0.4s, %2.4s"
5444 [(set_attr "type" "crypto_sha1_fast")]
5447 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5448 [(set (match_operand:V4SI 0 "register_operand" "=w")
5449 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5450 (match_operand:SI 2 "register_operand" "w")
5451 (match_operand:V4SI 3 "register_operand" "w")]
5453 "TARGET_SIMD && TARGET_CRYPTO"
5454 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5455 [(set_attr "type" "crypto_sha1_slow")]
5458 (define_insn "aarch64_crypto_sha1su0v4si"
5459 [(set (match_operand:V4SI 0 "register_operand" "=w")
5460 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5461 (match_operand:V4SI 2 "register_operand" "w")
5462 (match_operand:V4SI 3 "register_operand" "w")]
5464 "TARGET_SIMD && TARGET_CRYPTO"
5465 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5466 [(set_attr "type" "crypto_sha1_xor")]
5471 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5472 [(set (match_operand:V4SI 0 "register_operand" "=w")
5473 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5474 (match_operand:V4SI 2 "register_operand" "w")
5475 (match_operand:V4SI 3 "register_operand" "w")]
5477 "TARGET_SIMD && TARGET_CRYPTO"
5478 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5479 [(set_attr "type" "crypto_sha256_slow")]
5482 (define_insn "aarch64_crypto_sha256su0v4si"
5483 [(set (match_operand:V4SI 0 "register_operand" "=w")
5484 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5485 (match_operand:V4SI 2 "register_operand" "w")]
5487 "TARGET_SIMD &&TARGET_CRYPTO"
5488 "sha256su0\\t%0.4s, %2.4s"
5489 [(set_attr "type" "crypto_sha256_fast")]
5492 (define_insn "aarch64_crypto_sha256su1v4si"
5493 [(set (match_operand:V4SI 0 "register_operand" "=w")
5494 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5495 (match_operand:V4SI 2 "register_operand" "w")
5496 (match_operand:V4SI 3 "register_operand" "w")]
5498 "TARGET_SIMD &&TARGET_CRYPTO"
5499 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5500 [(set_attr "type" "crypto_sha256_slow")]
5505 (define_insn "aarch64_crypto_pmulldi"
5506 [(set (match_operand:TI 0 "register_operand" "=w")
5507 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5508 (match_operand:DI 2 "register_operand" "w")]
5510 "TARGET_SIMD && TARGET_CRYPTO"
5511 "pmull\\t%0.1q, %1.1d, %2.1d"
5512 [(set_attr "type" "neon_mul_d_long")]
5515 (define_insn "aarch64_crypto_pmullv2di"
5516 [(set (match_operand:TI 0 "register_operand" "=w")
5517 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5518 (match_operand:V2DI 2 "register_operand" "w")]
5520 "TARGET_SIMD && TARGET_CRYPTO"
5521 "pmull2\\t%0.1q, %1.2d, %2.2d"
5522 [(set_attr "type" "neon_mul_d_long")]