1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2016 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
76 [(set_attr "type" "neon_dup<q>")]
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
92 [(set_attr "type" "neon_dup<q>")]
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
104 switch (which_alternative)
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "ins\t%0.d[0], %1";
111 case 5: return "mov\t%0, %1";
113 return aarch64_output_simd_mov_immediate (operands[1],
115 default: gcc_unreachable ();
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
120 mov_reg, neon_move<q>")]
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
132 switch (which_alternative)
135 return "ldr\\t%q0, %1";
137 return "str\\t%q1, %0";
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
153 (set_attr "length" "4,4,4,8,8,8,4")]
156 (define_insn "load_pair<mode>"
157 [(set (match_operand:VD 0 "register_operand" "=w")
158 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
159 (set (match_operand:VD 2 "register_operand" "=w")
160 (match_operand:VD 3 "memory_operand" "m"))]
162 && rtx_equal_p (XEXP (operands[3], 0),
163 plus_constant (Pmode,
164 XEXP (operands[1], 0),
165 GET_MODE_SIZE (<MODE>mode)))"
167 [(set_attr "type" "neon_ldp")]
170 (define_insn "store_pair<mode>"
171 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
172 (match_operand:VD 1 "register_operand" "w"))
173 (set (match_operand:VD 2 "memory_operand" "=m")
174 (match_operand:VD 3 "register_operand" "w"))]
176 && rtx_equal_p (XEXP (operands[2], 0),
177 plus_constant (Pmode,
178 XEXP (operands[0], 0),
179 GET_MODE_SIZE (<MODE>mode)))"
181 [(set_attr "type" "neon_stp")]
185 [(set (match_operand:VQ 0 "register_operand" "")
186 (match_operand:VQ 1 "register_operand" ""))]
187 "TARGET_SIMD && reload_completed
188 && GP_REGNUM_P (REGNO (operands[0]))
189 && GP_REGNUM_P (REGNO (operands[1]))"
192 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
197 [(set (match_operand:VQ 0 "register_operand" "")
198 (match_operand:VQ 1 "register_operand" ""))]
199 "TARGET_SIMD && reload_completed
200 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
201 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
204 aarch64_split_simd_move (operands[0], operands[1]);
208 (define_expand "aarch64_split_simd_mov<mode>"
209 [(set (match_operand:VQ 0)
210 (match_operand:VQ 1))]
213 rtx dst = operands[0];
214 rtx src = operands[1];
216 if (GP_REGNUM_P (REGNO (src)))
218 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
219 rtx src_high_part = gen_highpart (<VHALF>mode, src);
222 (gen_move_lo_quad_<mode> (dst, src_low_part));
224 (gen_move_hi_quad_<mode> (dst, src_high_part));
229 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
230 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
231 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
232 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
235 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
237 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
243 (define_insn "aarch64_simd_mov_from_<mode>low"
244 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
246 (match_operand:VQ 1 "register_operand" "w")
247 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
248 "TARGET_SIMD && reload_completed"
250 [(set_attr "type" "neon_to_gp<q>")
251 (set_attr "length" "4")
254 (define_insn "aarch64_simd_mov_from_<mode>high"
255 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
257 (match_operand:VQ 1 "register_operand" "w")
258 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
259 "TARGET_SIMD && reload_completed"
261 [(set_attr "type" "neon_to_gp<q>")
262 (set_attr "length" "4")
265 (define_insn "orn<mode>3"
266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
267 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
268 (match_operand:VDQ_I 2 "register_operand" "w")))]
270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
271 [(set_attr "type" "neon_logic<q>")]
274 (define_insn "bic<mode>3"
275 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
276 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
277 (match_operand:VDQ_I 2 "register_operand" "w")))]
279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
280 [(set_attr "type" "neon_logic<q>")]
283 (define_insn "add<mode>3"
284 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
285 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
286 (match_operand:VDQ_I 2 "register_operand" "w")))]
288 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
289 [(set_attr "type" "neon_add<q>")]
292 (define_insn "sub<mode>3"
293 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
294 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
295 (match_operand:VDQ_I 2 "register_operand" "w")))]
297 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
298 [(set_attr "type" "neon_sub<q>")]
301 (define_insn "mul<mode>3"
302 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
303 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
304 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
306 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
307 [(set_attr "type" "neon_mul_<Vetype><q>")]
310 (define_insn "bswap<mode>2"
311 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
312 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
314 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
315 [(set_attr "type" "neon_rev<q>")]
318 (define_insn "aarch64_rbit<mode>"
319 [(set (match_operand:VB 0 "register_operand" "=w")
320 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
323 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
324 [(set_attr "type" "neon_rbit")]
327 (define_expand "ctz<mode>2"
328 [(set (match_operand:VS 0 "register_operand")
329 (ctz:VS (match_operand:VS 1 "register_operand")))]
332 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
333 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
335 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
336 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
341 (define_insn "*aarch64_mul3_elt<mode>"
342 [(set (match_operand:VMUL 0 "register_operand" "=w")
346 (match_operand:VMUL 1 "register_operand" "<h_con>")
347 (parallel [(match_operand:SI 2 "immediate_operand")])))
348 (match_operand:VMUL 3 "register_operand" "w")))]
351 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
352 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
354 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
357 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
358 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
359 (mult:VMUL_CHANGE_NLANES
360 (vec_duplicate:VMUL_CHANGE_NLANES
362 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
363 (parallel [(match_operand:SI 2 "immediate_operand")])))
364 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
367 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
368 INTVAL (operands[2])));
369 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
371 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
374 (define_insn "*aarch64_mul3_elt_to_128df"
375 [(set (match_operand:V2DF 0 "register_operand" "=w")
378 (match_operand:DF 2 "register_operand" "w"))
379 (match_operand:V2DF 1 "register_operand" "w")))]
381 "fmul\\t%0.2d, %1.2d, %2.d[0]"
382 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
385 (define_insn "aarch64_rsqrte_<mode>2"
386 [(set (match_operand:VALLF 0 "register_operand" "=w")
387 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
390 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
391 [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
393 (define_insn "aarch64_rsqrts_<mode>3"
394 [(set (match_operand:VALLF 0 "register_operand" "=w")
395 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
396 (match_operand:VALLF 2 "register_operand" "w")]
399 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
400 [(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")])
402 (define_expand "rsqrt<mode>2"
403 [(set (match_operand:VALLF 0 "register_operand" "=w")
404 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
408 aarch64_emit_approx_rsqrt (operands[0], operands[1]);
412 (define_insn "*aarch64_mul3_elt_to_64v2df"
413 [(set (match_operand:DF 0 "register_operand" "=w")
416 (match_operand:V2DF 1 "register_operand" "w")
417 (parallel [(match_operand:SI 2 "immediate_operand")]))
418 (match_operand:DF 3 "register_operand" "w")))]
421 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
422 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
424 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
427 (define_insn "neg<mode>2"
428 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
429 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
431 "neg\t%0.<Vtype>, %1.<Vtype>"
432 [(set_attr "type" "neon_neg<q>")]
435 (define_insn "abs<mode>2"
436 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
437 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
439 "abs\t%0.<Vtype>, %1.<Vtype>"
440 [(set_attr "type" "neon_abs<q>")]
443 ;; The intrinsic version of integer ABS must not be allowed to
444 ;; combine with any operation with an integerated ABS step, such
446 (define_insn "aarch64_abs<mode>"
447 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
449 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
452 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
453 [(set_attr "type" "neon_abs<q>")]
456 (define_insn "abd<mode>_3"
457 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
458 (abs:VDQ_BHSI (minus:VDQ_BHSI
459 (match_operand:VDQ_BHSI 1 "register_operand" "w")
460 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
462 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
463 [(set_attr "type" "neon_abd<q>")]
466 (define_insn "aba<mode>_3"
467 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
468 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
469 (match_operand:VDQ_BHSI 1 "register_operand" "w")
470 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
471 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
473 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
474 [(set_attr "type" "neon_arith_acc<q>")]
477 (define_insn "fabd<mode>_3"
478 [(set (match_operand:VDQF 0 "register_operand" "=w")
479 (abs:VDQF (minus:VDQF
480 (match_operand:VDQF 1 "register_operand" "w")
481 (match_operand:VDQF 2 "register_operand" "w"))))]
483 "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
484 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
487 (define_insn "*fabd_scalar<mode>3"
488 [(set (match_operand:GPF 0 "register_operand" "=w")
490 (match_operand:GPF 1 "register_operand" "w")
491 (match_operand:GPF 2 "register_operand" "w"))))]
493 "fabd\t%<s>0, %<s>1, %<s>2"
494 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
497 (define_insn "and<mode>3"
498 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
499 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
500 (match_operand:VDQ_I 2 "register_operand" "w")))]
502 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
503 [(set_attr "type" "neon_logic<q>")]
506 (define_insn "ior<mode>3"
507 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
508 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
509 (match_operand:VDQ_I 2 "register_operand" "w")))]
511 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
512 [(set_attr "type" "neon_logic<q>")]
515 (define_insn "xor<mode>3"
516 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
517 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
518 (match_operand:VDQ_I 2 "register_operand" "w")))]
520 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
521 [(set_attr "type" "neon_logic<q>")]
524 (define_insn "one_cmpl<mode>2"
525 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
526 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
528 "not\t%0.<Vbtype>, %1.<Vbtype>"
529 [(set_attr "type" "neon_logic<q>")]
532 (define_insn "aarch64_simd_vec_set<mode>"
533 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
535 (vec_duplicate:VDQ_BHSI
536 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
537 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
538 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
541 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
542 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
543 switch (which_alternative)
546 return "ins\\t%0.<Vetype>[%p2], %w1";
548 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
550 return "ld1\\t{%0.<Vetype>}[%p2], %1";
555 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
558 (define_insn "aarch64_simd_lshr<mode>"
559 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
560 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
561 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
563 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
564 [(set_attr "type" "neon_shift_imm<q>")]
567 (define_insn "aarch64_simd_ashr<mode>"
568 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
569 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
570 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
572 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
573 [(set_attr "type" "neon_shift_imm<q>")]
576 (define_insn "aarch64_simd_imm_shl<mode>"
577 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
578 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
579 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
581 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
582 [(set_attr "type" "neon_shift_imm<q>")]
585 (define_insn "aarch64_simd_reg_sshl<mode>"
586 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
587 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
588 (match_operand:VDQ_I 2 "register_operand" "w")))]
590 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
591 [(set_attr "type" "neon_shift_reg<q>")]
594 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
595 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
596 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
597 (match_operand:VDQ_I 2 "register_operand" "w")]
598 UNSPEC_ASHIFT_UNSIGNED))]
600 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
601 [(set_attr "type" "neon_shift_reg<q>")]
604 (define_insn "aarch64_simd_reg_shl<mode>_signed"
605 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
607 (match_operand:VDQ_I 2 "register_operand" "w")]
608 UNSPEC_ASHIFT_SIGNED))]
610 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
611 [(set_attr "type" "neon_shift_reg<q>")]
614 (define_expand "ashl<mode>3"
615 [(match_operand:VDQ_I 0 "register_operand" "")
616 (match_operand:VDQ_I 1 "register_operand" "")
617 (match_operand:SI 2 "general_operand" "")]
620 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
623 if (CONST_INT_P (operands[2]))
625 shift_amount = INTVAL (operands[2]);
626 if (shift_amount >= 0 && shift_amount < bit_width)
628 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
630 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
637 operands[2] = force_reg (SImode, operands[2]);
640 else if (MEM_P (operands[2]))
642 operands[2] = force_reg (SImode, operands[2]);
645 if (REG_P (operands[2]))
647 rtx tmp = gen_reg_rtx (<MODE>mode);
648 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
649 convert_to_mode (<VEL>mode,
652 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
661 (define_expand "lshr<mode>3"
662 [(match_operand:VDQ_I 0 "register_operand" "")
663 (match_operand:VDQ_I 1 "register_operand" "")
664 (match_operand:SI 2 "general_operand" "")]
667 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
670 if (CONST_INT_P (operands[2]))
672 shift_amount = INTVAL (operands[2]);
673 if (shift_amount > 0 && shift_amount <= bit_width)
675 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
677 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
683 operands[2] = force_reg (SImode, operands[2]);
685 else if (MEM_P (operands[2]))
687 operands[2] = force_reg (SImode, operands[2]);
690 if (REG_P (operands[2]))
692 rtx tmp = gen_reg_rtx (SImode);
693 rtx tmp1 = gen_reg_rtx (<MODE>mode);
694 emit_insn (gen_negsi2 (tmp, operands[2]));
695 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
696 convert_to_mode (<VEL>mode,
698 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
708 (define_expand "ashr<mode>3"
709 [(match_operand:VDQ_I 0 "register_operand" "")
710 (match_operand:VDQ_I 1 "register_operand" "")
711 (match_operand:SI 2 "general_operand" "")]
714 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
717 if (CONST_INT_P (operands[2]))
719 shift_amount = INTVAL (operands[2]);
720 if (shift_amount > 0 && shift_amount <= bit_width)
722 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
724 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
730 operands[2] = force_reg (SImode, operands[2]);
732 else if (MEM_P (operands[2]))
734 operands[2] = force_reg (SImode, operands[2]);
737 if (REG_P (operands[2]))
739 rtx tmp = gen_reg_rtx (SImode);
740 rtx tmp1 = gen_reg_rtx (<MODE>mode);
741 emit_insn (gen_negsi2 (tmp, operands[2]));
742 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
743 convert_to_mode (<VEL>mode,
745 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
755 (define_expand "vashl<mode>3"
756 [(match_operand:VDQ_I 0 "register_operand" "")
757 (match_operand:VDQ_I 1 "register_operand" "")
758 (match_operand:VDQ_I 2 "register_operand" "")]
761 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
766 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
767 ;; Negating individual lanes most certainly offsets the
768 ;; gain from vectorization.
769 (define_expand "vashr<mode>3"
770 [(match_operand:VDQ_BHSI 0 "register_operand" "")
771 (match_operand:VDQ_BHSI 1 "register_operand" "")
772 (match_operand:VDQ_BHSI 2 "register_operand" "")]
775 rtx neg = gen_reg_rtx (<MODE>mode);
776 emit (gen_neg<mode>2 (neg, operands[2]));
777 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
783 (define_expand "aarch64_ashr_simddi"
784 [(match_operand:DI 0 "register_operand" "=w")
785 (match_operand:DI 1 "register_operand" "w")
786 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
789 /* An arithmetic shift right by 64 fills the result with copies of the sign
790 bit, just like asr by 63 - however the standard pattern does not handle
792 if (INTVAL (operands[2]) == 64)
793 operands[2] = GEN_INT (63);
794 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
799 (define_expand "vlshr<mode>3"
800 [(match_operand:VDQ_BHSI 0 "register_operand" "")
801 (match_operand:VDQ_BHSI 1 "register_operand" "")
802 (match_operand:VDQ_BHSI 2 "register_operand" "")]
805 rtx neg = gen_reg_rtx (<MODE>mode);
806 emit (gen_neg<mode>2 (neg, operands[2]));
807 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
812 (define_expand "aarch64_lshr_simddi"
813 [(match_operand:DI 0 "register_operand" "=w")
814 (match_operand:DI 1 "register_operand" "w")
815 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
818 if (INTVAL (operands[2]) == 64)
819 emit_move_insn (operands[0], const0_rtx);
821 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
826 (define_expand "vec_set<mode>"
827 [(match_operand:VDQ_BHSI 0 "register_operand")
828 (match_operand:<VEL> 1 "register_operand")
829 (match_operand:SI 2 "immediate_operand")]
832 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
833 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
834 GEN_INT (elem), operands[0]));
839 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
840 (define_insn "vec_shr_<mode>"
841 [(set (match_operand:VD 0 "register_operand" "=w")
842 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
843 (match_operand:SI 2 "immediate_operand" "i")]
847 if (BYTES_BIG_ENDIAN)
848 return "shl %d0, %d1, %2";
850 return "ushr %d0, %d1, %2";
852 [(set_attr "type" "neon_shift_imm")]
855 (define_insn "aarch64_simd_vec_setv2di"
856 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
859 (match_operand:DI 1 "register_operand" "r,w"))
860 (match_operand:V2DI 3 "register_operand" "0,0")
861 (match_operand:SI 2 "immediate_operand" "i,i")))]
864 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
865 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
866 switch (which_alternative)
869 return "ins\\t%0.d[%p2], %1";
871 return "ins\\t%0.d[%p2], %1.d[0]";
876 [(set_attr "type" "neon_from_gp, neon_ins_q")]
879 (define_expand "vec_setv2di"
880 [(match_operand:V2DI 0 "register_operand")
881 (match_operand:DI 1 "register_operand")
882 (match_operand:SI 2 "immediate_operand")]
885 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
886 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
887 GEN_INT (elem), operands[0]));
892 (define_insn "aarch64_simd_vec_set<mode>"
893 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
895 (vec_duplicate:VDQF_F16
896 (match_operand:<VEL> 1 "register_operand" "w"))
897 (match_operand:VDQF_F16 3 "register_operand" "0")
898 (match_operand:SI 2 "immediate_operand" "i")))]
901 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
903 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
904 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
906 [(set_attr "type" "neon_ins<q>")]
909 (define_expand "vec_set<mode>"
910 [(match_operand:VDQF_F16 0 "register_operand" "+w")
911 (match_operand:<VEL> 1 "register_operand" "w")
912 (match_operand:SI 2 "immediate_operand" "")]
915 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
916 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
917 GEN_INT (elem), operands[0]));
923 (define_insn "aarch64_mla<mode>"
924 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
925 (plus:VDQ_BHSI (mult:VDQ_BHSI
926 (match_operand:VDQ_BHSI 2 "register_operand" "w")
927 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
928 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
930 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
931 [(set_attr "type" "neon_mla_<Vetype><q>")]
934 (define_insn "*aarch64_mla_elt<mode>"
935 [(set (match_operand:VDQHS 0 "register_operand" "=w")
940 (match_operand:VDQHS 1 "register_operand" "<h_con>")
941 (parallel [(match_operand:SI 2 "immediate_operand")])))
942 (match_operand:VDQHS 3 "register_operand" "w"))
943 (match_operand:VDQHS 4 "register_operand" "0")))]
946 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
947 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
949 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
952 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
953 [(set (match_operand:VDQHS 0 "register_operand" "=w")
958 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
959 (parallel [(match_operand:SI 2 "immediate_operand")])))
960 (match_operand:VDQHS 3 "register_operand" "w"))
961 (match_operand:VDQHS 4 "register_operand" "0")))]
964 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
965 INTVAL (operands[2])));
966 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
968 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
971 (define_insn "aarch64_mls<mode>"
972 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
973 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
974 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
975 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
977 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
978 [(set_attr "type" "neon_mla_<Vetype><q>")]
981 (define_insn "*aarch64_mls_elt<mode>"
982 [(set (match_operand:VDQHS 0 "register_operand" "=w")
984 (match_operand:VDQHS 4 "register_operand" "0")
988 (match_operand:VDQHS 1 "register_operand" "<h_con>")
989 (parallel [(match_operand:SI 2 "immediate_operand")])))
990 (match_operand:VDQHS 3 "register_operand" "w"))))]
993 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
994 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
996 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
999 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1000 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1002 (match_operand:VDQHS 4 "register_operand" "0")
1004 (vec_duplicate:VDQHS
1006 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1007 (parallel [(match_operand:SI 2 "immediate_operand")])))
1008 (match_operand:VDQHS 3 "register_operand" "w"))))]
1011 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1012 INTVAL (operands[2])));
1013 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1015 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1018 ;; Max/Min operations.
1019 (define_insn "<su><maxmin><mode>3"
1020 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1021 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1022 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1024 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1025 [(set_attr "type" "neon_minmax<q>")]
1028 (define_expand "<su><maxmin>v2di3"
1029 [(set (match_operand:V2DI 0 "register_operand" "")
1030 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1031 (match_operand:V2DI 2 "register_operand" "")))]
1034 enum rtx_code cmp_operator;
1055 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1056 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
1057 operands[2], cmp_fmt, operands[1], operands[2]));
1061 ;; Pairwise Integer Max/Min operations.
1062 (define_insn "aarch64_<maxmin_uns>p<mode>"
1063 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1064 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1065 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1068 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1069 [(set_attr "type" "neon_minmax<q>")]
1072 ;; Pairwise FP Max/Min operations.
1073 (define_insn "aarch64_<maxmin_uns>p<mode>"
1074 [(set (match_operand:VDQF 0 "register_operand" "=w")
1075 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1076 (match_operand:VDQF 2 "register_operand" "w")]
1079 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1080 [(set_attr "type" "neon_minmax<q>")]
1083 ;; vec_concat gives a new vector with the low elements from operand 1, and
1084 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1085 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1086 ;; What that means, is that the RTL descriptions of the below patterns
1087 ;; need to change depending on endianness.
1089 ;; Move to the low architectural bits of the register.
1090 ;; On little-endian this is { operand, zeroes }
1091 ;; On big-endian this is { zeroes, operand }
1093 (define_insn "move_lo_quad_internal_<mode>"
1094 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1096 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1097 (vec_duplicate:<VHALF> (const_int 0))))]
1098 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1103 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1104 (set_attr "simd" "yes,*,yes")
1105 (set_attr "fp" "*,yes,*")
1106 (set_attr "length" "4")]
1109 (define_insn "move_lo_quad_internal_<mode>"
1110 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1112 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1114 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1119 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1120 (set_attr "simd" "yes,*,yes")
1121 (set_attr "fp" "*,yes,*")
1122 (set_attr "length" "4")]
1125 (define_insn "move_lo_quad_internal_be_<mode>"
1126 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1128 (vec_duplicate:<VHALF> (const_int 0))
1129 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1130 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1135 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1136 (set_attr "simd" "yes,*,yes")
1137 (set_attr "fp" "*,yes,*")
1138 (set_attr "length" "4")]
1141 (define_insn "move_lo_quad_internal_be_<mode>"
1142 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1145 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1146 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1151 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1152 (set_attr "simd" "yes,*,yes")
1153 (set_attr "fp" "*,yes,*")
1154 (set_attr "length" "4")]
1157 (define_expand "move_lo_quad_<mode>"
1158 [(match_operand:VQ 0 "register_operand")
1159 (match_operand:VQ 1 "register_operand")]
1162 if (BYTES_BIG_ENDIAN)
1163 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1165 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1170 ;; Move operand1 to the high architectural bits of the register, keeping
1171 ;; the low architectural bits of operand2.
1172 ;; For little-endian this is { operand2, operand1 }
1173 ;; For big-endian this is { operand1, operand2 }
1175 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1176 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1180 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1181 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1182 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1184 ins\\t%0.d[1], %1.d[0]
1186 [(set_attr "type" "neon_ins")]
1189 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1190 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1192 (match_operand:<VHALF> 1 "register_operand" "w,r")
1195 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1196 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1198 ins\\t%0.d[1], %1.d[0]
1200 [(set_attr "type" "neon_ins")]
1203 (define_expand "move_hi_quad_<mode>"
1204 [(match_operand:VQ 0 "register_operand" "")
1205 (match_operand:<VHALF> 1 "register_operand" "")]
1208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1209 if (BYTES_BIG_ENDIAN)
1210 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1213 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1218 ;; Narrowing operations.
1221 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1222 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1223 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1225 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1226 [(set_attr "type" "neon_shift_imm_narrow_q")]
1229 (define_expand "vec_pack_trunc_<mode>"
1230 [(match_operand:<VNARROWD> 0 "register_operand" "")
1231 (match_operand:VDN 1 "register_operand" "")
1232 (match_operand:VDN 2 "register_operand" "")]
1235 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1236 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1237 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1239 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1240 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1241 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1247 (define_insn "vec_pack_trunc_<mode>"
1248 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1249 (vec_concat:<VNARROWQ2>
1250 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1251 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1254 if (BYTES_BIG_ENDIAN)
1255 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1257 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1259 [(set_attr "type" "multiple")
1260 (set_attr "length" "8")]
1263 ;; Widening operations.
1265 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1266 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1267 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1268 (match_operand:VQW 1 "register_operand" "w")
1269 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1272 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1273 [(set_attr "type" "neon_shift_imm_long")]
1276 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1277 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1278 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1279 (match_operand:VQW 1 "register_operand" "w")
1280 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1283 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1284 [(set_attr "type" "neon_shift_imm_long")]
1287 (define_expand "vec_unpack<su>_hi_<mode>"
1288 [(match_operand:<VWIDE> 0 "register_operand" "")
1289 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1292 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1293 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1299 (define_expand "vec_unpack<su>_lo_<mode>"
1300 [(match_operand:<VWIDE> 0 "register_operand" "")
1301 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1304 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1305 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1311 ;; Widening arithmetic.
1313 (define_insn "*aarch64_<su>mlal_lo<mode>"
1314 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1317 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1318 (match_operand:VQW 2 "register_operand" "w")
1319 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1320 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1321 (match_operand:VQW 4 "register_operand" "w")
1323 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1325 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1326 [(set_attr "type" "neon_mla_<Vetype>_long")]
1329 (define_insn "*aarch64_<su>mlal_hi<mode>"
1330 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1333 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1334 (match_operand:VQW 2 "register_operand" "w")
1335 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1336 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1337 (match_operand:VQW 4 "register_operand" "w")
1339 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1341 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1342 [(set_attr "type" "neon_mla_<Vetype>_long")]
1345 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1346 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1348 (match_operand:<VWIDE> 1 "register_operand" "0")
1350 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1351 (match_operand:VQW 2 "register_operand" "w")
1352 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1353 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1354 (match_operand:VQW 4 "register_operand" "w")
1357 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1358 [(set_attr "type" "neon_mla_<Vetype>_long")]
1361 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1362 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1364 (match_operand:<VWIDE> 1 "register_operand" "0")
1366 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1367 (match_operand:VQW 2 "register_operand" "w")
1368 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1369 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1370 (match_operand:VQW 4 "register_operand" "w")
1373 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1374 [(set_attr "type" "neon_mla_<Vetype>_long")]
1377 (define_insn "*aarch64_<su>mlal<mode>"
1378 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1382 (match_operand:VD_BHSI 1 "register_operand" "w"))
1384 (match_operand:VD_BHSI 2 "register_operand" "w")))
1385 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1387 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1388 [(set_attr "type" "neon_mla_<Vetype>_long")]
1391 (define_insn "*aarch64_<su>mlsl<mode>"
1392 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1394 (match_operand:<VWIDE> 1 "register_operand" "0")
1397 (match_operand:VD_BHSI 2 "register_operand" "w"))
1399 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1401 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1402 [(set_attr "type" "neon_mla_<Vetype>_long")]
1405 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1406 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1407 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1408 (match_operand:VQW 1 "register_operand" "w")
1409 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1410 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1411 (match_operand:VQW 2 "register_operand" "w")
1414 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1415 [(set_attr "type" "neon_mul_<Vetype>_long")]
1418 (define_expand "vec_widen_<su>mult_lo_<mode>"
1419 [(match_operand:<VWIDE> 0 "register_operand" "")
1420 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1421 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1424 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1425 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1432 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1433 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1434 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1435 (match_operand:VQW 1 "register_operand" "w")
1436 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1437 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1438 (match_operand:VQW 2 "register_operand" "w")
1441 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1442 [(set_attr "type" "neon_mul_<Vetype>_long")]
1445 (define_expand "vec_widen_<su>mult_hi_<mode>"
1446 [(match_operand:<VWIDE> 0 "register_operand" "")
1447 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1448 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1451 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1452 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1460 ;; FP vector operations.
1461 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1462 ;; double-precision (64-bit) floating-point data types and arithmetic as
1463 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1464 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1466 ;; Floating-point operations can raise an exception. Vectorizing such
1467 ;; operations are safe because of reasons explained below.
1469 ;; ARMv8 permits an extension to enable trapped floating-point
1470 ;; exception handling, however this is an optional feature. In the
1471 ;; event of a floating-point exception being raised by vectorised
1473 ;; 1. If trapped floating-point exceptions are available, then a trap
1474 ;; will be taken when any lane raises an enabled exception. A trap
1475 ;; handler may determine which lane raised the exception.
1476 ;; 2. Alternatively a sticky exception flag is set in the
1477 ;; floating-point status register (FPSR). Software may explicitly
1478 ;; test the exception flags, in which case the tests will either
1479 ;; prevent vectorisation, allowing precise identification of the
1480 ;; failing operation, or if tested outside of vectorisable regions
1481 ;; then the specific operation and lane are not of interest.
1483 ;; FP arithmetic operations.
1485 (define_insn "add<mode>3"
1486 [(set (match_operand:VDQF 0 "register_operand" "=w")
1487 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1488 (match_operand:VDQF 2 "register_operand" "w")))]
1490 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1491 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1494 (define_insn "sub<mode>3"
1495 [(set (match_operand:VDQF 0 "register_operand" "=w")
1496 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1497 (match_operand:VDQF 2 "register_operand" "w")))]
1499 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1500 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1503 (define_insn "mul<mode>3"
1504 [(set (match_operand:VDQF 0 "register_operand" "=w")
1505 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1506 (match_operand:VDQF 2 "register_operand" "w")))]
1508 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1509 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1512 (define_insn "div<mode>3"
1513 [(set (match_operand:VDQF 0 "register_operand" "=w")
1514 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1515 (match_operand:VDQF 2 "register_operand" "w")))]
1517 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1518 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1521 (define_insn "neg<mode>2"
1522 [(set (match_operand:VDQF 0 "register_operand" "=w")
1523 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1525 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1526 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1529 (define_insn "abs<mode>2"
1530 [(set (match_operand:VDQF 0 "register_operand" "=w")
1531 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1533 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1534 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1537 (define_insn "fma<mode>4"
1538 [(set (match_operand:VDQF 0 "register_operand" "=w")
1539 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1540 (match_operand:VDQF 2 "register_operand" "w")
1541 (match_operand:VDQF 3 "register_operand" "0")))]
1543 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1544 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1547 (define_insn "*aarch64_fma4_elt<mode>"
1548 [(set (match_operand:VDQF 0 "register_operand" "=w")
1552 (match_operand:VDQF 1 "register_operand" "<h_con>")
1553 (parallel [(match_operand:SI 2 "immediate_operand")])))
1554 (match_operand:VDQF 3 "register_operand" "w")
1555 (match_operand:VDQF 4 "register_operand" "0")))]
1558 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1559 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1561 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1564 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1565 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1567 (vec_duplicate:VDQSF
1569 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1570 (parallel [(match_operand:SI 2 "immediate_operand")])))
1571 (match_operand:VDQSF 3 "register_operand" "w")
1572 (match_operand:VDQSF 4 "register_operand" "0")))]
1575 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1576 INTVAL (operands[2])));
1577 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1579 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1582 (define_insn "*aarch64_fma4_elt_to_128df"
1583 [(set (match_operand:V2DF 0 "register_operand" "=w")
1586 (match_operand:DF 1 "register_operand" "w"))
1587 (match_operand:V2DF 2 "register_operand" "w")
1588 (match_operand:V2DF 3 "register_operand" "0")))]
1590 "fmla\\t%0.2d, %2.2d, %1.2d[0]"
1591 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1594 (define_insn "*aarch64_fma4_elt_to_64v2df"
1595 [(set (match_operand:DF 0 "register_operand" "=w")
1598 (match_operand:V2DF 1 "register_operand" "w")
1599 (parallel [(match_operand:SI 2 "immediate_operand")]))
1600 (match_operand:DF 3 "register_operand" "w")
1601 (match_operand:DF 4 "register_operand" "0")))]
1604 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1605 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1607 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1610 (define_insn "fnma<mode>4"
1611 [(set (match_operand:VDQF 0 "register_operand" "=w")
1613 (match_operand:VDQF 1 "register_operand" "w")
1615 (match_operand:VDQF 2 "register_operand" "w"))
1616 (match_operand:VDQF 3 "register_operand" "0")))]
1618 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1619 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1622 (define_insn "*aarch64_fnma4_elt<mode>"
1623 [(set (match_operand:VDQF 0 "register_operand" "=w")
1626 (match_operand:VDQF 3 "register_operand" "w"))
1629 (match_operand:VDQF 1 "register_operand" "<h_con>")
1630 (parallel [(match_operand:SI 2 "immediate_operand")])))
1631 (match_operand:VDQF 4 "register_operand" "0")))]
1634 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1635 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1637 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1640 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1641 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1644 (match_operand:VDQSF 3 "register_operand" "w"))
1645 (vec_duplicate:VDQSF
1647 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1648 (parallel [(match_operand:SI 2 "immediate_operand")])))
1649 (match_operand:VDQSF 4 "register_operand" "0")))]
1652 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1653 INTVAL (operands[2])));
1654 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1656 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1659 (define_insn "*aarch64_fnma4_elt_to_128df"
1660 [(set (match_operand:V2DF 0 "register_operand" "=w")
1663 (match_operand:V2DF 2 "register_operand" "w"))
1665 (match_operand:DF 1 "register_operand" "w"))
1666 (match_operand:V2DF 3 "register_operand" "0")))]
1668 "fmls\\t%0.2d, %2.2d, %1.2d[0]"
1669 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1672 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1673 [(set (match_operand:DF 0 "register_operand" "=w")
1676 (match_operand:V2DF 1 "register_operand" "w")
1677 (parallel [(match_operand:SI 2 "immediate_operand")]))
1679 (match_operand:DF 3 "register_operand" "w"))
1680 (match_operand:DF 4 "register_operand" "0")))]
1683 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1684 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1686 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1689 ;; Vector versions of the floating-point frint patterns.
1690 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1691 (define_insn "<frint_pattern><mode>2"
1692 [(set (match_operand:VDQF 0 "register_operand" "=w")
1693 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1696 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1697 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1700 ;; Vector versions of the fcvt standard patterns.
1701 ;; Expands to lbtrunc, lround, lceil, lfloor
1702 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1703 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1704 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1705 [(match_operand:VDQF 1 "register_operand" "w")]
1708 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1709 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1712 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1713 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1714 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1716 (match_operand:VDQF 1 "register_operand" "w")
1717 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1720 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1721 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1723 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1725 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1726 output_asm_insn (buf, operands);
1729 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1732 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1733 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1734 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1735 [(match_operand:VDQF 1 "register_operand")]
1740 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1741 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1742 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1743 [(match_operand:VDQF 1 "register_operand")]
1748 (define_expand "ftrunc<VDQF:mode>2"
1749 [(set (match_operand:VDQF 0 "register_operand")
1750 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1755 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1756 [(set (match_operand:VDQF 0 "register_operand" "=w")
1758 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1760 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1761 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1764 ;; Conversions between vectors of floats and doubles.
1765 ;; Contains a mix of patterns to match standard pattern names
1766 ;; and those for intrinsics.
1768 ;; Float widening operations.
1770 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1771 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1772 (float_extend:<VWIDE> (vec_select:<VHALF>
1773 (match_operand:VQ_HSF 1 "register_operand" "w")
1774 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1777 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1778 [(set_attr "type" "neon_fp_cvt_widen_s")]
1781 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1782 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1783 ;; the meaning of HI and LO changes depending on the target endianness.
1784 ;; While elsewhere we map the higher numbered elements of a vector to
1785 ;; the lower architectural lanes of the vector, for these patterns we want
1786 ;; to always treat "hi" as referring to the higher architectural lanes.
1787 ;; Consequently, while the patterns below look inconsistent with our
1788 ;; other big-endian patterns their behavior is as required.
1790 (define_expand "vec_unpacks_lo_<mode>"
1791 [(match_operand:<VWIDE> 0 "register_operand" "")
1792 (match_operand:VQ_HSF 1 "register_operand" "")]
1795 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1796 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1802 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1803 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1804 (float_extend:<VWIDE> (vec_select:<VHALF>
1805 (match_operand:VQ_HSF 1 "register_operand" "w")
1806 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1809 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1810 [(set_attr "type" "neon_fp_cvt_widen_s")]
1813 (define_expand "vec_unpacks_hi_<mode>"
1814 [(match_operand:<VWIDE> 0 "register_operand" "")
1815 (match_operand:VQ_HSF 1 "register_operand" "")]
1818 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1819 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1824 (define_insn "aarch64_float_extend_lo_<Vwide>"
1825 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1826 (float_extend:<VWIDE>
1827 (match_operand:VDF 1 "register_operand" "w")))]
1829 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1830 [(set_attr "type" "neon_fp_cvt_widen_s")]
1833 ;; Float narrowing operations.
1835 (define_insn "aarch64_float_truncate_lo_<mode>"
1836 [(set (match_operand:VDF 0 "register_operand" "=w")
1838 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1840 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1841 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1844 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1845 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1847 (match_operand:VDF 1 "register_operand" "0")
1849 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1850 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1851 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1852 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1855 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1856 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1859 (match_operand:<VWIDE> 2 "register_operand" "w"))
1860 (match_operand:VDF 1 "register_operand" "0")))]
1861 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1862 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1863 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1866 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1867 [(match_operand:<VDBL> 0 "register_operand" "=w")
1868 (match_operand:VDF 1 "register_operand" "0")
1869 (match_operand:<VWIDE> 2 "register_operand" "w")]
1872 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1873 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1874 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1875 emit_insn (gen (operands[0], operands[1], operands[2]));
1880 (define_expand "vec_pack_trunc_v2df"
1881 [(set (match_operand:V4SF 0 "register_operand")
1883 (float_truncate:V2SF
1884 (match_operand:V2DF 1 "register_operand"))
1885 (float_truncate:V2SF
1886 (match_operand:V2DF 2 "register_operand"))
1890 rtx tmp = gen_reg_rtx (V2SFmode);
1891 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1892 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1894 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1895 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1896 tmp, operands[hi]));
1901 (define_expand "vec_pack_trunc_df"
1902 [(set (match_operand:V2SF 0 "register_operand")
1905 (match_operand:DF 1 "register_operand"))
1907 (match_operand:DF 2 "register_operand"))
1911 rtx tmp = gen_reg_rtx (V2SFmode);
1912 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1913 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1915 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1916 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1917 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1923 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1925 ;; a = (b < c) ? b : c;
1926 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1927 ;; either explicitly or indirectly via -ffast-math.
1929 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1930 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1931 ;; operand will be returned when both operands are zero (i.e. they may not
1932 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
1933 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
1936 (define_insn "<su><maxmin><mode>3"
1937 [(set (match_operand:VDQF 0 "register_operand" "=w")
1938 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
1939 (match_operand:VDQF 2 "register_operand" "w")))]
1941 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1942 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1945 (define_insn "<maxmin_uns><mode>3"
1946 [(set (match_operand:VDQF 0 "register_operand" "=w")
1947 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1948 (match_operand:VDQF 2 "register_operand" "w")]
1951 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1952 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1955 ;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
1956 (define_insn "<fmaxmin><mode>3"
1957 [(set (match_operand:VDQF 0 "register_operand" "=w")
1958 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1959 (match_operand:VDQF 2 "register_operand" "w")]
1962 "<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1963 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1966 ;; 'across lanes' add.
1968 (define_expand "reduc_plus_scal_<mode>"
1969 [(match_operand:<VEL> 0 "register_operand" "=w")
1970 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
1974 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1975 rtx scratch = gen_reg_rtx (<MODE>mode);
1976 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1977 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1982 (define_expand "reduc_plus_scal_<mode>"
1983 [(match_operand:<VEL> 0 "register_operand" "=w")
1984 (match_operand:V2F 1 "register_operand" "w")]
1987 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1988 rtx scratch = gen_reg_rtx (<MODE>mode);
1989 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1990 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1995 (define_insn "aarch64_reduc_plus_internal<mode>"
1996 [(set (match_operand:VDQV 0 "register_operand" "=w")
1997 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2000 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2001 [(set_attr "type" "neon_reduc_add<q>")]
2004 (define_insn "aarch64_reduc_plus_internalv2si"
2005 [(set (match_operand:V2SI 0 "register_operand" "=w")
2006 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2009 "addp\\t%0.2s, %1.2s, %1.2s"
2010 [(set_attr "type" "neon_reduc_add")]
2013 (define_insn "aarch64_reduc_plus_internal<mode>"
2014 [(set (match_operand:V2F 0 "register_operand" "=w")
2015 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
2018 "faddp\\t%<Vetype>0, %1.<Vtype>"
2019 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2022 (define_insn "aarch64_addpv4sf"
2023 [(set (match_operand:V4SF 0 "register_operand" "=w")
2024 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
2027 "faddp\\t%0.4s, %1.4s, %1.4s"
2028 [(set_attr "type" "neon_fp_reduc_add_s_q")]
2031 (define_expand "reduc_plus_scal_v4sf"
2032 [(set (match_operand:SF 0 "register_operand")
2033 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2037 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2038 rtx scratch = gen_reg_rtx (V4SFmode);
2039 emit_insn (gen_aarch64_addpv4sf (scratch, operands[1]));
2040 emit_insn (gen_aarch64_addpv4sf (scratch, scratch));
2041 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2045 (define_insn "clrsb<mode>2"
2046 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2047 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2049 "cls\\t%0.<Vtype>, %1.<Vtype>"
2050 [(set_attr "type" "neon_cls<q>")]
2053 (define_insn "clz<mode>2"
2054 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2055 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2057 "clz\\t%0.<Vtype>, %1.<Vtype>"
2058 [(set_attr "type" "neon_cls<q>")]
2061 (define_insn "popcount<mode>2"
2062 [(set (match_operand:VB 0 "register_operand" "=w")
2063 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2065 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2066 [(set_attr "type" "neon_cnt<q>")]
2069 ;; 'across lanes' max and min ops.
2071 ;; Template for outputting a scalar, so we can create __builtins which can be
2072 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2073 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2074 [(match_operand:<VEL> 0 "register_operand")
2075 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
2079 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2080 rtx scratch = gen_reg_rtx (<MODE>mode);
2081 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2083 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2088 ;; Likewise for integer cases, signed and unsigned.
2089 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2090 [(match_operand:<VEL> 0 "register_operand")
2091 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2095 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2096 rtx scratch = gen_reg_rtx (<MODE>mode);
2097 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2099 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2104 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2105 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2106 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2109 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2110 [(set_attr "type" "neon_reduc_minmax<q>")]
2113 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2114 [(set (match_operand:V2SI 0 "register_operand" "=w")
2115 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2118 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2119 [(set_attr "type" "neon_reduc_minmax")]
2122 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2123 [(set (match_operand:VDQF 0 "register_operand" "=w")
2124 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
2127 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2128 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
2131 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2133 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2136 ;; Thus our BSL is of the form:
2137 ;; op0 = bsl (mask, op2, op3)
2138 ;; We can use any of:
2141 ;; bsl mask, op1, op2
2142 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2143 ;; bit op0, op2, mask
2144 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2145 ;; bif op0, op1, mask
2147 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2148 ;; Some forms of straight-line code may generate the equivalent form
2149 ;; in *aarch64_simd_bsl<mode>_alt.
2151 (define_insn "aarch64_simd_bsl<mode>_internal"
2152 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2156 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2157 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2158 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2159 (match_dup:<V_cmp_result> 3)
2163 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2164 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2165 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2166 [(set_attr "type" "neon_bsl<q>")]
2169 ;; We need this form in addition to the above pattern to match the case
2170 ;; when combine tries merging three insns such that the second operand of
2171 ;; the outer XOR matches the second operand of the inner XOR rather than
2172 ;; the first. The two are equivalent but since recog doesn't try all
2173 ;; permutations of commutative operations, we have to have a separate pattern.
2175 (define_insn "*aarch64_simd_bsl<mode>_alt"
2176 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2180 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2181 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2182 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2183 (match_dup:VSDQ_I_DI 2)))]
2186 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2187 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2188 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2189 [(set_attr "type" "neon_bsl<q>")]
2192 (define_expand "aarch64_simd_bsl<mode>"
2193 [(match_operand:VALLDIF 0 "register_operand")
2194 (match_operand:<V_cmp_result> 1 "register_operand")
2195 (match_operand:VALLDIF 2 "register_operand")
2196 (match_operand:VALLDIF 3 "register_operand")]
2199 /* We can't alias operands together if they have different modes. */
2200 rtx tmp = operands[0];
2201 if (FLOAT_MODE_P (<MODE>mode))
2203 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2204 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2205 tmp = gen_reg_rtx (<V_cmp_result>mode);
2207 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2208 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2212 if (tmp != operands[0])
2213 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2218 (define_expand "aarch64_vcond_internal<mode><mode>"
2219 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2220 (if_then_else:VSDQ_I_DI
2221 (match_operator 3 "comparison_operator"
2222 [(match_operand:VSDQ_I_DI 4 "register_operand")
2223 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2224 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2225 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2228 rtx op1 = operands[1];
2229 rtx op2 = operands[2];
2230 rtx mask = gen_reg_rtx (<MODE>mode);
2231 enum rtx_code code = GET_CODE (operands[3]);
2233 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2234 and desirable for other comparisons if it results in FOO ? -1 : 0
2235 (this allows direct use of the comparison result without a bsl). */
2238 && op1 == CONST0_RTX (<V_cmp_result>mode)
2239 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2245 case LE: code = GT; break;
2246 case LT: code = GE; break;
2247 case GE: code = LT; break;
2248 case GT: code = LE; break;
2250 case NE: code = EQ; break;
2251 case LTU: code = GEU; break;
2252 case LEU: code = GTU; break;
2253 case GTU: code = LEU; break;
2254 case GEU: code = LTU; break;
2255 default: gcc_unreachable ();
2259 /* Make sure we can handle the last operand. */
2263 /* Normalized to EQ above. */
2271 /* These instructions have a form taking an immediate zero. */
2272 if (operands[5] == CONST0_RTX (<MODE>mode))
2274 /* Fall through, as may need to load into register. */
2276 if (!REG_P (operands[5]))
2277 operands[5] = force_reg (<MODE>mode, operands[5]);
2284 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2288 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2292 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2296 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2300 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2304 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2308 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2312 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2315 /* NE has been normalized to EQ above. */
2317 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2324 /* If we have (a = (b CMP c) ? -1 : 0);
2325 Then we can simply move the generated mask. */
2327 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2328 && op2 == CONST0_RTX (<V_cmp_result>mode))
2329 emit_move_insn (operands[0], mask);
2333 op1 = force_reg (<MODE>mode, op1);
2335 op2 = force_reg (<MODE>mode, op2);
2336 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2343 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2344 [(set (match_operand:VDQF_COND 0 "register_operand")
2346 (match_operator 3 "comparison_operator"
2347 [(match_operand:VDQF 4 "register_operand")
2348 (match_operand:VDQF 5 "nonmemory_operand")])
2349 (match_operand:VDQF_COND 1 "nonmemory_operand")
2350 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2354 int use_zero_form = 0;
2355 int swap_bsl_operands = 0;
2356 rtx op1 = operands[1];
2357 rtx op2 = operands[2];
2358 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2359 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2361 rtx (*base_comparison) (rtx, rtx, rtx);
2362 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2364 switch (GET_CODE (operands[3]))
2371 if (operands[5] == CONST0_RTX (<MODE>mode))
2378 if (!REG_P (operands[5]))
2379 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2382 switch (GET_CODE (operands[3]))
2392 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2393 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2401 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2402 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2407 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2408 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2414 switch (GET_CODE (operands[3]))
2421 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2422 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2428 Note that there also exist direct comparison against 0 forms,
2429 so catch those as a special case. */
2433 switch (GET_CODE (operands[3]))
2436 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2439 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2442 /* Do nothing, other zero form cases already have the correct
2449 emit_insn (base_comparison (mask, operands[4], operands[5]));
2451 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2458 /* FCM returns false for lanes which are unordered, so if we use
2459 the inverse of the comparison we actually want to emit, then
2460 swap the operands to BSL, we will end up with the correct result.
2461 Note that a NE NaN and NaN NE b are true for all a, b.
2463 Our transformations are:
2468 a NE b -> !(a EQ b) */
2471 emit_insn (base_comparison (mask, operands[4], operands[5]));
2473 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2475 swap_bsl_operands = 1;
2478 /* We check (a > b || b > a). combining these comparisons give us
2479 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2480 will then give us (a == b || a UNORDERED b) as intended. */
2482 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2483 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2484 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2485 swap_bsl_operands = 1;
2488 /* Operands are ORDERED iff (a > b || b >= a).
2489 Swapping the operands to BSL will give the UNORDERED case. */
2490 swap_bsl_operands = 1;
2493 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2494 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2495 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2501 if (swap_bsl_operands)
2507 /* If we have (a = (b CMP c) ? -1 : 0);
2508 Then we can simply move the generated mask. */
2510 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2511 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2512 emit_move_insn (operands[0], mask);
2516 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2518 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2519 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2526 (define_expand "vcond<mode><mode>"
2527 [(set (match_operand:VALLDI 0 "register_operand")
2528 (if_then_else:VALLDI
2529 (match_operator 3 "comparison_operator"
2530 [(match_operand:VALLDI 4 "register_operand")
2531 (match_operand:VALLDI 5 "nonmemory_operand")])
2532 (match_operand:VALLDI 1 "nonmemory_operand")
2533 (match_operand:VALLDI 2 "nonmemory_operand")))]
2536 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2537 operands[2], operands[3],
2538 operands[4], operands[5]));
2542 (define_expand "vcond<v_cmp_result><mode>"
2543 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2544 (if_then_else:<V_cmp_result>
2545 (match_operator 3 "comparison_operator"
2546 [(match_operand:VDQF 4 "register_operand")
2547 (match_operand:VDQF 5 "nonmemory_operand")])
2548 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2549 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2552 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2553 operands[0], operands[1],
2554 operands[2], operands[3],
2555 operands[4], operands[5]));
2559 (define_expand "vcondu<mode><mode>"
2560 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2561 (if_then_else:VSDQ_I_DI
2562 (match_operator 3 "comparison_operator"
2563 [(match_operand:VSDQ_I_DI 4 "register_operand")
2564 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2565 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2566 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2569 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2570 operands[2], operands[3],
2571 operands[4], operands[5]));
2575 ;; Patterns for AArch64 SIMD Intrinsics.
2577 ;; Lane extraction with sign extension to general purpose register.
2578 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2579 [(set (match_operand:GPI 0 "register_operand" "=r")
2582 (match_operand:VDQQH 1 "register_operand" "w")
2583 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2586 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2587 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2589 [(set_attr "type" "neon_to_gp<q>")]
2592 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2593 [(set (match_operand:SI 0 "register_operand" "=r")
2596 (match_operand:VDQQH 1 "register_operand" "w")
2597 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2600 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2601 return "umov\\t%w0, %1.<Vetype>[%2]";
2603 [(set_attr "type" "neon_to_gp<q>")]
2606 ;; Lane extraction of a value, neither sign nor zero extension
2607 ;; is guaranteed so upper bits should be considered undefined.
2608 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2609 (define_insn "aarch64_get_lane<mode>"
2610 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2612 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2613 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2616 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2617 switch (which_alternative)
2620 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2622 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2624 return "st1\\t{%1.<Vetype>}[%2], %0";
2629 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2632 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2635 (define_insn "*aarch64_combinez<mode>"
2636 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2638 (match_operand:VD_BHSI 1 "general_operand" "w,r,m")
2639 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2640 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2645 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2646 (set_attr "simd" "yes,*,yes")
2647 (set_attr "fp" "*,yes,*")]
2650 (define_insn "*aarch64_combinez_be<mode>"
2651 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2653 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2654 (match_operand:VD_BHSI 1 "general_operand" "w,r,m")))]
2655 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2660 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2661 (set_attr "simd" "yes,*,yes")
2662 (set_attr "fp" "*,yes,*")]
2665 (define_expand "aarch64_combine<mode>"
2666 [(match_operand:<VDBL> 0 "register_operand")
2667 (match_operand:VDC 1 "register_operand")
2668 (match_operand:VDC 2 "register_operand")]
2672 if (BYTES_BIG_ENDIAN)
2682 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2687 (define_insn_and_split "aarch64_combine_internal<mode>"
2688 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2689 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2690 (match_operand:VDC 2 "register_operand" "w")))]
2693 "&& reload_completed"
2696 if (BYTES_BIG_ENDIAN)
2697 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2699 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2702 [(set_attr "type" "multiple")]
2705 (define_expand "aarch64_simd_combine<mode>"
2706 [(match_operand:<VDBL> 0 "register_operand")
2707 (match_operand:VDC 1 "register_operand")
2708 (match_operand:VDC 2 "register_operand")]
2711 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2712 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2715 [(set_attr "type" "multiple")]
2718 ;; <su><addsub>l<q>.
2720 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2721 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2722 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2723 (match_operand:VQW 1 "register_operand" "w")
2724 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2725 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2726 (match_operand:VQW 2 "register_operand" "w")
2729 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2730 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2733 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2734 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2735 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2736 (match_operand:VQW 1 "register_operand" "w")
2737 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2738 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2739 (match_operand:VQW 2 "register_operand" "w")
2742 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2743 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2747 (define_expand "aarch64_saddl2<mode>"
2748 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2749 (match_operand:VQW 1 "register_operand" "w")
2750 (match_operand:VQW 2 "register_operand" "w")]
2753 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2754 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2759 (define_expand "aarch64_uaddl2<mode>"
2760 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2761 (match_operand:VQW 1 "register_operand" "w")
2762 (match_operand:VQW 2 "register_operand" "w")]
2765 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2766 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2771 (define_expand "aarch64_ssubl2<mode>"
2772 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2773 (match_operand:VQW 1 "register_operand" "w")
2774 (match_operand:VQW 2 "register_operand" "w")]
2777 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2778 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2783 (define_expand "aarch64_usubl2<mode>"
2784 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2785 (match_operand:VQW 1 "register_operand" "w")
2786 (match_operand:VQW 2 "register_operand" "w")]
2789 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2790 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2795 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2796 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2797 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2798 (match_operand:VD_BHSI 1 "register_operand" "w"))
2800 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2802 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2803 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2806 ;; <su><addsub>w<q>.
2808 (define_expand "widen_ssum<mode>3"
2809 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2810 (plus:<VDBLW> (sign_extend:<VDBLW>
2811 (match_operand:VQW 1 "register_operand" ""))
2812 (match_operand:<VDBLW> 2 "register_operand" "")))]
2815 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2816 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2818 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2820 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2825 (define_expand "widen_ssum<mode>3"
2826 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2827 (plus:<VWIDE> (sign_extend:<VWIDE>
2828 (match_operand:VD_BHSI 1 "register_operand" ""))
2829 (match_operand:<VWIDE> 2 "register_operand" "")))]
2832 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2836 (define_expand "widen_usum<mode>3"
2837 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2838 (plus:<VDBLW> (zero_extend:<VDBLW>
2839 (match_operand:VQW 1 "register_operand" ""))
2840 (match_operand:<VDBLW> 2 "register_operand" "")))]
2843 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2844 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2846 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2848 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2853 (define_expand "widen_usum<mode>3"
2854 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2855 (plus:<VWIDE> (zero_extend:<VWIDE>
2856 (match_operand:VD_BHSI 1 "register_operand" ""))
2857 (match_operand:<VWIDE> 2 "register_operand" "")))]
2860 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
2864 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2865 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2866 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2868 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2870 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2871 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2874 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
2875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2876 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2879 (match_operand:VQW 2 "register_operand" "w")
2880 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
2882 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
2883 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2886 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2887 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2888 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2891 (match_operand:VQW 2 "register_operand" "w")
2892 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2894 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2895 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2898 (define_expand "aarch64_saddw2<mode>"
2899 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2900 (match_operand:<VWIDE> 1 "register_operand" "w")
2901 (match_operand:VQW 2 "register_operand" "w")]
2904 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2905 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2910 (define_expand "aarch64_uaddw2<mode>"
2911 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2912 (match_operand:<VWIDE> 1 "register_operand" "w")
2913 (match_operand:VQW 2 "register_operand" "w")]
2916 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2917 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2923 (define_expand "aarch64_ssubw2<mode>"
2924 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2925 (match_operand:<VWIDE> 1 "register_operand" "w")
2926 (match_operand:VQW 2 "register_operand" "w")]
2929 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2930 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2935 (define_expand "aarch64_usubw2<mode>"
2936 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2937 (match_operand:<VWIDE> 1 "register_operand" "w")
2938 (match_operand:VQW 2 "register_operand" "w")]
2941 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2942 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2947 ;; <su><r>h<addsub>.
2949 (define_insn "aarch64_<sur>h<addsub><mode>"
2950 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2951 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
2952 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
2955 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2956 [(set_attr "type" "neon_<addsub>_halve<q>")]
2959 ;; <r><addsub>hn<q>.
2961 (define_insn "aarch64_<sur><addsub>hn<mode>"
2962 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2963 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
2964 (match_operand:VQN 2 "register_operand" "w")]
2967 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
2968 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2971 (define_insn "aarch64_<sur><addsub>hn2<mode>"
2972 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2973 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
2974 (match_operand:VQN 2 "register_operand" "w")
2975 (match_operand:VQN 3 "register_operand" "w")]
2978 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
2979 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2984 (define_insn "aarch64_pmul<mode>"
2985 [(set (match_operand:VB 0 "register_operand" "=w")
2986 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
2987 (match_operand:VB 2 "register_operand" "w")]
2990 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2991 [(set_attr "type" "neon_mul_<Vetype><q>")]
2996 (define_insn "aarch64_fmulx<mode>"
2997 [(set (match_operand:VALLF 0 "register_operand" "=w")
2998 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
2999 (match_operand:VALLF 2 "register_operand" "w")]
3002 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3003 [(set_attr "type" "neon_fp_mul_<Vetype>")]
3006 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3008 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3009 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3011 [(match_operand:VDQSF 1 "register_operand" "w")
3012 (vec_duplicate:VDQSF
3014 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3015 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3019 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3020 INTVAL (operands[3])));
3021 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3023 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3026 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3028 (define_insn "*aarch64_mulx_elt<mode>"
3029 [(set (match_operand:VDQF 0 "register_operand" "=w")
3031 [(match_operand:VDQF 1 "register_operand" "w")
3034 (match_operand:VDQF 2 "register_operand" "w")
3035 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3039 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3040 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3042 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3047 (define_insn "*aarch64_mulx_elt_to_64v2df"
3048 [(set (match_operand:V2DF 0 "register_operand" "=w")
3050 [(match_operand:V2DF 1 "register_operand" "w")
3052 (match_operand:DF 2 "register_operand" "w"))]
3056 return "fmulx\t%0.2d, %1.2d, %2.d[0]";
3058 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
3061 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3062 ;; vmulxd_lane_f64 == vmulx_lane_f64
3063 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3065 (define_insn "*aarch64_vgetfmulx<mode>"
3066 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3068 [(match_operand:<VEL> 1 "register_operand" "w")
3070 (match_operand:VDQF_DF 2 "register_operand" "w")
3071 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3075 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3076 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3078 [(set_attr "type" "fmul<Vetype>")]
3082 (define_insn "aarch64_<su_optab><optab><mode>"
3083 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3084 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3085 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3087 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3088 [(set_attr "type" "neon_<optab><q>")]
3091 ;; suqadd and usqadd
3093 (define_insn "aarch64_<sur>qadd<mode>"
3094 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3095 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3096 (match_operand:VSDQ_I 2 "register_operand" "w")]
3099 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3100 [(set_attr "type" "neon_qadd<q>")]
3105 (define_insn "aarch64_sqmovun<mode>"
3106 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3107 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3110 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3111 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3114 ;; sqmovn and uqmovn
3116 (define_insn "aarch64_<sur>qmovn<mode>"
3117 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3118 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3121 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3122 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3127 (define_insn "aarch64_s<optab><mode>"
3128 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3130 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3132 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3133 [(set_attr "type" "neon_<optab><q>")]
3138 (define_insn "aarch64_sq<r>dmulh<mode>"
3139 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3141 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3142 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3145 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3146 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3151 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3152 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3154 [(match_operand:VDQHS 1 "register_operand" "w")
3156 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3157 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3161 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3162 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3163 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3166 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3167 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3169 [(match_operand:VDQHS 1 "register_operand" "w")
3171 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3172 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3176 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3177 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3178 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3181 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3182 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3184 [(match_operand:SD_HSI 1 "register_operand" "w")
3186 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3187 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3191 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3192 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3193 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3196 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3197 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3199 [(match_operand:SD_HSI 1 "register_operand" "w")
3201 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3202 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3206 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3207 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3208 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3213 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3214 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3216 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3217 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3218 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3221 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3222 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3225 ;; sqrdml[as]h_lane.
3227 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3228 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3230 [(match_operand:VDQHS 1 "register_operand" "0")
3231 (match_operand:VDQHS 2 "register_operand" "w")
3233 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3234 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3238 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3240 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3242 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3245 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3246 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3248 [(match_operand:SD_HSI 1 "register_operand" "0")
3249 (match_operand:SD_HSI 2 "register_operand" "w")
3251 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3252 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3256 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3258 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3260 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3263 ;; sqrdml[as]h_laneq.
3265 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3266 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3268 [(match_operand:VDQHS 1 "register_operand" "0")
3269 (match_operand:VDQHS 2 "register_operand" "w")
3271 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3272 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3276 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3278 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3280 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3283 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3284 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3286 [(match_operand:SD_HSI 1 "register_operand" "0")
3287 (match_operand:SD_HSI 2 "register_operand" "w")
3289 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3290 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3294 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3296 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3298 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3303 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3304 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3306 (match_operand:<VWIDE> 1 "register_operand" "0")
3309 (sign_extend:<VWIDE>
3310 (match_operand:VSD_HSI 2 "register_operand" "w"))
3311 (sign_extend:<VWIDE>
3312 (match_operand:VSD_HSI 3 "register_operand" "w")))
3315 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3316 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3321 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3322 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3324 (match_operand:<VWIDE> 1 "register_operand" "0")
3327 (sign_extend:<VWIDE>
3328 (match_operand:VD_HSI 2 "register_operand" "w"))
3329 (sign_extend:<VWIDE>
3330 (vec_duplicate:VD_HSI
3332 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3333 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3338 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3340 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3342 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3345 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3346 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3348 (match_operand:<VWIDE> 1 "register_operand" "0")
3351 (sign_extend:<VWIDE>
3352 (match_operand:VD_HSI 2 "register_operand" "w"))
3353 (sign_extend:<VWIDE>
3354 (vec_duplicate:VD_HSI
3356 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3357 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3362 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3364 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3366 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3369 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3370 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3372 (match_operand:<VWIDE> 1 "register_operand" "0")
3375 (sign_extend:<VWIDE>
3376 (match_operand:SD_HSI 2 "register_operand" "w"))
3377 (sign_extend:<VWIDE>
3379 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3380 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3385 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3387 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3389 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3392 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3393 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3395 (match_operand:<VWIDE> 1 "register_operand" "0")
3398 (sign_extend:<VWIDE>
3399 (match_operand:SD_HSI 2 "register_operand" "w"))
3400 (sign_extend:<VWIDE>
3402 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3403 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3408 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3410 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3412 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3417 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3418 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3420 (match_operand:<VWIDE> 1 "register_operand" "0")
3423 (sign_extend:<VWIDE>
3424 (match_operand:VD_HSI 2 "register_operand" "w"))
3425 (sign_extend:<VWIDE>
3426 (vec_duplicate:VD_HSI
3427 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3430 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3431 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3436 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3437 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3439 (match_operand:<VWIDE> 1 "register_operand" "0")
3442 (sign_extend:<VWIDE>
3444 (match_operand:VQ_HSI 2 "register_operand" "w")
3445 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3446 (sign_extend:<VWIDE>
3448 (match_operand:VQ_HSI 3 "register_operand" "w")
3452 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3453 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3456 (define_expand "aarch64_sqdmlal2<mode>"
3457 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3458 (match_operand:<VWIDE> 1 "register_operand" "w")
3459 (match_operand:VQ_HSI 2 "register_operand" "w")
3460 (match_operand:VQ_HSI 3 "register_operand" "w")]
3463 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3464 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3465 operands[2], operands[3], p));
3469 (define_expand "aarch64_sqdmlsl2<mode>"
3470 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3471 (match_operand:<VWIDE> 1 "register_operand" "w")
3472 (match_operand:VQ_HSI 2 "register_operand" "w")
3473 (match_operand:VQ_HSI 3 "register_operand" "w")]
3476 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3477 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3478 operands[2], operands[3], p));
3484 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3485 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3487 (match_operand:<VWIDE> 1 "register_operand" "0")
3490 (sign_extend:<VWIDE>
3492 (match_operand:VQ_HSI 2 "register_operand" "w")
3493 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3494 (sign_extend:<VWIDE>
3495 (vec_duplicate:<VHALF>
3497 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3498 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3503 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3505 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3507 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3510 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3511 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3513 (match_operand:<VWIDE> 1 "register_operand" "0")
3516 (sign_extend:<VWIDE>
3518 (match_operand:VQ_HSI 2 "register_operand" "w")
3519 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3520 (sign_extend:<VWIDE>
3521 (vec_duplicate:<VHALF>
3523 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3524 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3529 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3531 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3533 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3536 (define_expand "aarch64_sqdmlal2_lane<mode>"
3537 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3538 (match_operand:<VWIDE> 1 "register_operand" "w")
3539 (match_operand:VQ_HSI 2 "register_operand" "w")
3540 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3541 (match_operand:SI 4 "immediate_operand" "i")]
3544 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3545 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3546 operands[2], operands[3],
3551 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3552 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3553 (match_operand:<VWIDE> 1 "register_operand" "w")
3554 (match_operand:VQ_HSI 2 "register_operand" "w")
3555 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3556 (match_operand:SI 4 "immediate_operand" "i")]
3559 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3560 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3561 operands[2], operands[3],
3566 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3567 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3568 (match_operand:<VWIDE> 1 "register_operand" "w")
3569 (match_operand:VQ_HSI 2 "register_operand" "w")
3570 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3571 (match_operand:SI 4 "immediate_operand" "i")]
3574 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3575 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3576 operands[2], operands[3],
3581 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3582 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3583 (match_operand:<VWIDE> 1 "register_operand" "w")
3584 (match_operand:VQ_HSI 2 "register_operand" "w")
3585 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3586 (match_operand:SI 4 "immediate_operand" "i")]
3589 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3590 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3591 operands[2], operands[3],
3596 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3599 (match_operand:<VWIDE> 1 "register_operand" "0")
3602 (sign_extend:<VWIDE>
3604 (match_operand:VQ_HSI 2 "register_operand" "w")
3605 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3606 (sign_extend:<VWIDE>
3607 (vec_duplicate:<VHALF>
3608 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3611 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3612 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3615 (define_expand "aarch64_sqdmlal2_n<mode>"
3616 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3617 (match_operand:<VWIDE> 1 "register_operand" "w")
3618 (match_operand:VQ_HSI 2 "register_operand" "w")
3619 (match_operand:<VEL> 3 "register_operand" "w")]
3622 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3623 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3624 operands[2], operands[3],
3629 (define_expand "aarch64_sqdmlsl2_n<mode>"
3630 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3631 (match_operand:<VWIDE> 1 "register_operand" "w")
3632 (match_operand:VQ_HSI 2 "register_operand" "w")
3633 (match_operand:<VEL> 3 "register_operand" "w")]
3636 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3637 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3638 operands[2], operands[3],
3645 (define_insn "aarch64_sqdmull<mode>"
3646 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3649 (sign_extend:<VWIDE>
3650 (match_operand:VSD_HSI 1 "register_operand" "w"))
3651 (sign_extend:<VWIDE>
3652 (match_operand:VSD_HSI 2 "register_operand" "w")))
3655 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3656 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3661 (define_insn "aarch64_sqdmull_lane<mode>"
3662 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3665 (sign_extend:<VWIDE>
3666 (match_operand:VD_HSI 1 "register_operand" "w"))
3667 (sign_extend:<VWIDE>
3668 (vec_duplicate:VD_HSI
3670 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3671 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3676 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3677 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3679 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3682 (define_insn "aarch64_sqdmull_laneq<mode>"
3683 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3686 (sign_extend:<VWIDE>
3687 (match_operand:VD_HSI 1 "register_operand" "w"))
3688 (sign_extend:<VWIDE>
3689 (vec_duplicate:VD_HSI
3691 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3692 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3697 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3698 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3700 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3703 (define_insn "aarch64_sqdmull_lane<mode>"
3704 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3707 (sign_extend:<VWIDE>
3708 (match_operand:SD_HSI 1 "register_operand" "w"))
3709 (sign_extend:<VWIDE>
3711 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3712 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3717 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3718 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3720 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3723 (define_insn "aarch64_sqdmull_laneq<mode>"
3724 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3727 (sign_extend:<VWIDE>
3728 (match_operand:SD_HSI 1 "register_operand" "w"))
3729 (sign_extend:<VWIDE>
3731 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3732 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3737 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3738 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3740 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3745 (define_insn "aarch64_sqdmull_n<mode>"
3746 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3749 (sign_extend:<VWIDE>
3750 (match_operand:VD_HSI 1 "register_operand" "w"))
3751 (sign_extend:<VWIDE>
3752 (vec_duplicate:VD_HSI
3753 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3757 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3758 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3765 (define_insn "aarch64_sqdmull2<mode>_internal"
3766 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3769 (sign_extend:<VWIDE>
3771 (match_operand:VQ_HSI 1 "register_operand" "w")
3772 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3773 (sign_extend:<VWIDE>
3775 (match_operand:VQ_HSI 2 "register_operand" "w")
3780 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3781 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3784 (define_expand "aarch64_sqdmull2<mode>"
3785 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3786 (match_operand:VQ_HSI 1 "register_operand" "w")
3787 (match_operand:VQ_HSI 2 "register_operand" "w")]
3790 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3791 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3798 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3799 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3802 (sign_extend:<VWIDE>
3804 (match_operand:VQ_HSI 1 "register_operand" "w")
3805 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3806 (sign_extend:<VWIDE>
3807 (vec_duplicate:<VHALF>
3809 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3810 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3815 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3816 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3818 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3821 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3822 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3825 (sign_extend:<VWIDE>
3827 (match_operand:VQ_HSI 1 "register_operand" "w")
3828 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3829 (sign_extend:<VWIDE>
3830 (vec_duplicate:<VHALF>
3832 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3833 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3838 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3839 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3841 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3844 (define_expand "aarch64_sqdmull2_lane<mode>"
3845 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3846 (match_operand:VQ_HSI 1 "register_operand" "w")
3847 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3848 (match_operand:SI 3 "immediate_operand" "i")]
3851 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3852 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3853 operands[2], operands[3],
3858 (define_expand "aarch64_sqdmull2_laneq<mode>"
3859 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3860 (match_operand:VQ_HSI 1 "register_operand" "w")
3861 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3862 (match_operand:SI 3 "immediate_operand" "i")]
3865 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3866 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3867 operands[2], operands[3],
3874 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3878 (sign_extend:<VWIDE>
3880 (match_operand:VQ_HSI 1 "register_operand" "w")
3881 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3882 (sign_extend:<VWIDE>
3883 (vec_duplicate:<VHALF>
3884 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3888 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3889 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3892 (define_expand "aarch64_sqdmull2_n<mode>"
3893 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3894 (match_operand:VQ_HSI 1 "register_operand" "w")
3895 (match_operand:<VEL> 2 "register_operand" "w")]
3898 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3899 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3906 (define_insn "aarch64_<sur>shl<mode>"
3907 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3909 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3910 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3913 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3914 [(set_attr "type" "neon_shift_reg<q>")]
3920 (define_insn "aarch64_<sur>q<r>shl<mode>"
3921 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3923 [(match_operand:VSDQ_I 1 "register_operand" "w")
3924 (match_operand:VSDQ_I 2 "register_operand" "w")]
3927 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3928 [(set_attr "type" "neon_sat_shift_reg<q>")]
3933 (define_insn "aarch64_<sur>shll_n<mode>"
3934 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3935 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
3937 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
3941 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3942 if (INTVAL (operands[2]) == bit_width)
3944 return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3947 return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3949 [(set_attr "type" "neon_shift_imm_long")]
3954 (define_insn "aarch64_<sur>shll2_n<mode>"
3955 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3956 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
3957 (match_operand:SI 2 "immediate_operand" "i")]
3961 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3962 if (INTVAL (operands[2]) == bit_width)
3964 return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3967 return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3969 [(set_attr "type" "neon_shift_imm_long")]
3974 (define_insn "aarch64_<sur>shr_n<mode>"
3975 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3976 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3978 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3981 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3982 [(set_attr "type" "neon_sat_shift_imm<q>")]
3987 (define_insn "aarch64_<sur>sra_n<mode>"
3988 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3989 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3990 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3992 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3995 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3996 [(set_attr "type" "neon_shift_acc<q>")]
4001 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4002 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4003 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4004 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4006 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4009 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4010 [(set_attr "type" "neon_shift_imm<q>")]
4015 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4016 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4017 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4019 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4022 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4023 [(set_attr "type" "neon_sat_shift_imm<q>")]
4029 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4030 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4031 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4033 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4036 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4037 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4041 ;; cm(eq|ge|gt|lt|le)
4042 ;; Note, we have constraints for Dz and Z as different expanders
4043 ;; have different ideas of what should be passed to this pattern.
4045 (define_insn "aarch64_cm<optab><mode>"
4046 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4048 (COMPARISONS:<V_cmp_result>
4049 (match_operand:VDQ_I 1 "register_operand" "w,w")
4050 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4054 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4055 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4056 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4059 (define_insn_and_split "aarch64_cm<optab>di"
4060 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4063 (match_operand:DI 1 "register_operand" "w,w,r")
4064 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4066 (clobber (reg:CC CC_REGNUM))]
4070 [(set (match_operand:DI 0 "register_operand")
4073 (match_operand:DI 1 "register_operand")
4074 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4077 /* If we are in the general purpose register file,
4078 we split to a sequence of comparison and store. */
4079 if (GP_REGNUM_P (REGNO (operands[0]))
4080 && GP_REGNUM_P (REGNO (operands[1])))
4082 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4083 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4084 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4085 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4088 /* Otherwise, we expand to a similar pattern which does not
4089 clobber CC_REGNUM. */
4091 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4094 (define_insn "*aarch64_cm<optab>di"
4095 [(set (match_operand:DI 0 "register_operand" "=w,w")
4098 (match_operand:DI 1 "register_operand" "w,w")
4099 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4101 "TARGET_SIMD && reload_completed"
4103 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4104 cm<optab>\t%d0, %d1, #0"
4105 [(set_attr "type" "neon_compare, neon_compare_zero")]
4110 (define_insn "aarch64_cm<optab><mode>"
4111 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4113 (UCOMPARISONS:<V_cmp_result>
4114 (match_operand:VDQ_I 1 "register_operand" "w")
4115 (match_operand:VDQ_I 2 "register_operand" "w")
4118 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4119 [(set_attr "type" "neon_compare<q>")]
4122 (define_insn_and_split "aarch64_cm<optab>di"
4123 [(set (match_operand:DI 0 "register_operand" "=w,r")
4126 (match_operand:DI 1 "register_operand" "w,r")
4127 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4129 (clobber (reg:CC CC_REGNUM))]
4133 [(set (match_operand:DI 0 "register_operand")
4136 (match_operand:DI 1 "register_operand")
4137 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4140 /* If we are in the general purpose register file,
4141 we split to a sequence of comparison and store. */
4142 if (GP_REGNUM_P (REGNO (operands[0]))
4143 && GP_REGNUM_P (REGNO (operands[1])))
4145 machine_mode mode = CCmode;
4146 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4147 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4148 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4151 /* Otherwise, we expand to a similar pattern which does not
4152 clobber CC_REGNUM. */
4154 [(set_attr "type" "neon_compare,multiple")]
4157 (define_insn "*aarch64_cm<optab>di"
4158 [(set (match_operand:DI 0 "register_operand" "=w")
4161 (match_operand:DI 1 "register_operand" "w")
4162 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4164 "TARGET_SIMD && reload_completed"
4165 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4166 [(set_attr "type" "neon_compare")]
4171 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4172 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
4173 ;; not (neg (eq (and x y) 0))
4174 ;; which is rewritten by simplify_rtx as
4175 ;; plus (eq (and x y) 0) -1.
4177 (define_insn "aarch64_cmtst<mode>"
4178 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4179 (plus:<V_cmp_result>
4182 (match_operand:VDQ_I 1 "register_operand" "w")
4183 (match_operand:VDQ_I 2 "register_operand" "w"))
4184 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4185 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4188 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4189 [(set_attr "type" "neon_tst<q>")]
4192 (define_insn_and_split "aarch64_cmtstdi"
4193 [(set (match_operand:DI 0 "register_operand" "=w,r")
4197 (match_operand:DI 1 "register_operand" "w,r")
4198 (match_operand:DI 2 "register_operand" "w,r"))
4200 (clobber (reg:CC CC_REGNUM))]
4204 [(set (match_operand:DI 0 "register_operand")
4208 (match_operand:DI 1 "register_operand")
4209 (match_operand:DI 2 "register_operand"))
4212 /* If we are in the general purpose register file,
4213 we split to a sequence of comparison and store. */
4214 if (GP_REGNUM_P (REGNO (operands[0]))
4215 && GP_REGNUM_P (REGNO (operands[1])))
4217 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4218 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4219 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4220 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4221 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4224 /* Otherwise, we expand to a similar pattern which does not
4225 clobber CC_REGNUM. */
4227 [(set_attr "type" "neon_tst,multiple")]
4230 (define_insn "*aarch64_cmtstdi"
4231 [(set (match_operand:DI 0 "register_operand" "=w")
4235 (match_operand:DI 1 "register_operand" "w")
4236 (match_operand:DI 2 "register_operand" "w"))
4239 "cmtst\t%d0, %d1, %d2"
4240 [(set_attr "type" "neon_tst")]
4243 ;; fcm(eq|ge|gt|le|lt)
4245 (define_insn "aarch64_cm<optab><mode>"
4246 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4248 (COMPARISONS:<V_cmp_result>
4249 (match_operand:VALLF 1 "register_operand" "w,w")
4250 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4254 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4255 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4256 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
4260 ;; Note we can also handle what would be fac(le|lt) by
4261 ;; generating fac(ge|gt).
4263 (define_insn "*aarch64_fac<optab><mode>"
4264 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4266 (FAC_COMPARISONS:<V_cmp_result>
4267 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
4268 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
4271 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4272 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
4277 (define_insn "aarch64_addp<mode>"
4278 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4280 [(match_operand:VD_BHSI 1 "register_operand" "w")
4281 (match_operand:VD_BHSI 2 "register_operand" "w")]
4284 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4285 [(set_attr "type" "neon_reduc_add<q>")]
4288 (define_insn "aarch64_addpdi"
4289 [(set (match_operand:DI 0 "register_operand" "=w")
4291 [(match_operand:V2DI 1 "register_operand" "w")]
4295 [(set_attr "type" "neon_reduc_add")]
4300 (define_insn "sqrt<mode>2"
4301 [(set (match_operand:VDQF 0 "register_operand" "=w")
4302 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
4304 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4305 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
4308 ;; Patterns for vector struct loads and stores.
4310 (define_insn "aarch64_simd_ld2<mode>"
4311 [(set (match_operand:OI 0 "register_operand" "=w")
4312 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4313 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4316 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4317 [(set_attr "type" "neon_load2_2reg<q>")]
4320 (define_insn "aarch64_simd_ld2r<mode>"
4321 [(set (match_operand:OI 0 "register_operand" "=w")
4322 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4323 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4326 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4327 [(set_attr "type" "neon_load2_all_lanes<q>")]
4330 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4331 [(set (match_operand:OI 0 "register_operand" "=w")
4332 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4333 (match_operand:OI 2 "register_operand" "0")
4334 (match_operand:SI 3 "immediate_operand" "i")
4335 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4339 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4340 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4342 [(set_attr "type" "neon_load2_one_lane")]
4345 (define_expand "vec_load_lanesoi<mode>"
4346 [(set (match_operand:OI 0 "register_operand" "=w")
4347 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4348 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4352 if (BYTES_BIG_ENDIAN)
4354 rtx tmp = gen_reg_rtx (OImode);
4355 rtx mask = aarch64_reverse_mask (<MODE>mode);
4356 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4357 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4360 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4364 (define_insn "aarch64_simd_st2<mode>"
4365 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4366 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4367 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4370 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4371 [(set_attr "type" "neon_store2_2reg<q>")]
4374 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4375 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4376 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4377 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4378 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4379 (match_operand:SI 2 "immediate_operand" "i")]
4383 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4384 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4386 [(set_attr "type" "neon_store2_one_lane<q>")]
4389 (define_expand "vec_store_lanesoi<mode>"
4390 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4391 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4392 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4396 if (BYTES_BIG_ENDIAN)
4398 rtx tmp = gen_reg_rtx (OImode);
4399 rtx mask = aarch64_reverse_mask (<MODE>mode);
4400 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4401 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4404 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4408 (define_insn "aarch64_simd_ld3<mode>"
4409 [(set (match_operand:CI 0 "register_operand" "=w")
4410 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4411 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4414 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4415 [(set_attr "type" "neon_load3_3reg<q>")]
4418 (define_insn "aarch64_simd_ld3r<mode>"
4419 [(set (match_operand:CI 0 "register_operand" "=w")
4420 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4421 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4424 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4425 [(set_attr "type" "neon_load3_all_lanes<q>")]
4428 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4429 [(set (match_operand:CI 0 "register_operand" "=w")
4430 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4431 (match_operand:CI 2 "register_operand" "0")
4432 (match_operand:SI 3 "immediate_operand" "i")
4433 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4437 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4438 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4440 [(set_attr "type" "neon_load3_one_lane")]
4443 (define_expand "vec_load_lanesci<mode>"
4444 [(set (match_operand:CI 0 "register_operand" "=w")
4445 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4446 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4450 if (BYTES_BIG_ENDIAN)
4452 rtx tmp = gen_reg_rtx (CImode);
4453 rtx mask = aarch64_reverse_mask (<MODE>mode);
4454 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4455 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4458 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4462 (define_insn "aarch64_simd_st3<mode>"
4463 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4464 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4465 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4468 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4469 [(set_attr "type" "neon_store3_3reg<q>")]
4472 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4473 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4474 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4475 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4476 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4477 (match_operand:SI 2 "immediate_operand" "i")]
4481 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4482 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4484 [(set_attr "type" "neon_store3_one_lane<q>")]
4487 (define_expand "vec_store_lanesci<mode>"
4488 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4489 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4490 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4494 if (BYTES_BIG_ENDIAN)
4496 rtx tmp = gen_reg_rtx (CImode);
4497 rtx mask = aarch64_reverse_mask (<MODE>mode);
4498 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4499 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4502 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4506 (define_insn "aarch64_simd_ld4<mode>"
4507 [(set (match_operand:XI 0 "register_operand" "=w")
4508 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4509 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4512 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4513 [(set_attr "type" "neon_load4_4reg<q>")]
4516 (define_insn "aarch64_simd_ld4r<mode>"
4517 [(set (match_operand:XI 0 "register_operand" "=w")
4518 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4519 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4522 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4523 [(set_attr "type" "neon_load4_all_lanes<q>")]
4526 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4527 [(set (match_operand:XI 0 "register_operand" "=w")
4528 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4529 (match_operand:XI 2 "register_operand" "0")
4530 (match_operand:SI 3 "immediate_operand" "i")
4531 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4535 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4536 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4538 [(set_attr "type" "neon_load4_one_lane")]
4541 (define_expand "vec_load_lanesxi<mode>"
4542 [(set (match_operand:XI 0 "register_operand" "=w")
4543 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4544 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4548 if (BYTES_BIG_ENDIAN)
4550 rtx tmp = gen_reg_rtx (XImode);
4551 rtx mask = aarch64_reverse_mask (<MODE>mode);
4552 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4553 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4556 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4560 (define_insn "aarch64_simd_st4<mode>"
4561 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4562 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4563 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4566 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4567 [(set_attr "type" "neon_store4_4reg<q>")]
4570 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4571 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4572 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4573 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4574 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4575 (match_operand:SI 2 "immediate_operand" "i")]
4579 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4580 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4582 [(set_attr "type" "neon_store4_one_lane<q>")]
4585 (define_expand "vec_store_lanesxi<mode>"
4586 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4587 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4588 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4592 if (BYTES_BIG_ENDIAN)
4594 rtx tmp = gen_reg_rtx (XImode);
4595 rtx mask = aarch64_reverse_mask (<MODE>mode);
4596 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4597 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4600 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4604 (define_insn_and_split "aarch64_rev_reglist<mode>"
4605 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4607 [(match_operand:VSTRUCT 1 "register_operand" "w")
4608 (match_operand:V16QI 2 "register_operand" "w")]
4609 UNSPEC_REV_REGLIST))]
4612 "&& reload_completed"
4616 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4617 for (i = 0; i < nregs; i++)
4619 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4620 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4621 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4625 [(set_attr "type" "neon_tbl1_q")
4626 (set_attr "length" "<insn_count>")]
4629 ;; Reload patterns for AdvSIMD register list operands.
4631 (define_expand "mov<mode>"
4632 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4633 (match_operand:VSTRUCT 1 "general_operand" ""))]
4636 if (can_create_pseudo_p ())
4638 if (GET_CODE (operands[0]) != REG)
4639 operands[1] = force_reg (<MODE>mode, operands[1]);
4643 (define_insn "*aarch64_mov<mode>"
4644 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4645 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4646 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4647 && (register_operand (operands[0], <MODE>mode)
4648 || register_operand (operands[1], <MODE>mode))"
4651 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4652 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4653 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4654 neon_load<nregs>_<nregs>reg_q")
4655 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4658 (define_insn "aarch64_be_ld1<mode>"
4659 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4660 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4661 "aarch64_simd_struct_operand" "Utv")]
4664 "ld1\\t{%0<Vmtype>}, %1"
4665 [(set_attr "type" "neon_load1_1reg<q>")]
4668 (define_insn "aarch64_be_st1<mode>"
4669 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4670 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4673 "st1\\t{%1<Vmtype>}, %0"
4674 [(set_attr "type" "neon_store1_1reg<q>")]
4677 (define_insn "*aarch64_be_movoi"
4678 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4679 (match_operand:OI 1 "general_operand" " w,w,m"))]
4680 "TARGET_SIMD && BYTES_BIG_ENDIAN
4681 && (register_operand (operands[0], OImode)
4682 || register_operand (operands[1], OImode))"
4687 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4688 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4691 (define_insn "*aarch64_be_movci"
4692 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4693 (match_operand:CI 1 "general_operand" " w,w,o"))]
4694 "TARGET_SIMD && BYTES_BIG_ENDIAN
4695 && (register_operand (operands[0], CImode)
4696 || register_operand (operands[1], CImode))"
4698 [(set_attr "type" "multiple")
4699 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4702 (define_insn "*aarch64_be_movxi"
4703 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4704 (match_operand:XI 1 "general_operand" " w,w,o"))]
4705 "TARGET_SIMD && BYTES_BIG_ENDIAN
4706 && (register_operand (operands[0], XImode)
4707 || register_operand (operands[1], XImode))"
4709 [(set_attr "type" "multiple")
4710 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4714 [(set (match_operand:OI 0 "register_operand")
4715 (match_operand:OI 1 "register_operand"))]
4716 "TARGET_SIMD && reload_completed"
4719 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4724 [(set (match_operand:CI 0 "nonimmediate_operand")
4725 (match_operand:CI 1 "general_operand"))]
4726 "TARGET_SIMD && reload_completed"
4729 if (register_operand (operands[0], CImode)
4730 && register_operand (operands[1], CImode))
4732 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4735 else if (BYTES_BIG_ENDIAN)
4737 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4738 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4739 emit_move_insn (gen_lowpart (V16QImode,
4740 simplify_gen_subreg (TImode, operands[0],
4742 gen_lowpart (V16QImode,
4743 simplify_gen_subreg (TImode, operands[1],
4752 [(set (match_operand:XI 0 "nonimmediate_operand")
4753 (match_operand:XI 1 "general_operand"))]
4754 "TARGET_SIMD && reload_completed"
4757 if (register_operand (operands[0], XImode)
4758 && register_operand (operands[1], XImode))
4760 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4763 else if (BYTES_BIG_ENDIAN)
4765 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4766 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4767 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4768 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4775 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4776 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4777 (match_operand:DI 1 "register_operand" "w")
4778 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4781 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4782 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4785 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4790 (define_insn "aarch64_ld2<mode>_dreg"
4791 [(set (match_operand:OI 0 "register_operand" "=w")
4796 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4798 (vec_duplicate:VD (const_int 0)))
4800 (unspec:VD [(match_dup 1)]
4802 (vec_duplicate:VD (const_int 0)))) 0))]
4804 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4805 [(set_attr "type" "neon_load2_2reg<q>")]
4808 (define_insn "aarch64_ld2<mode>_dreg"
4809 [(set (match_operand:OI 0 "register_operand" "=w")
4814 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4818 (unspec:DX [(match_dup 1)]
4820 (const_int 0))) 0))]
4822 "ld1\\t{%S0.1d - %T0.1d}, %1"
4823 [(set_attr "type" "neon_load1_2reg<q>")]
4826 (define_insn "aarch64_ld3<mode>_dreg"
4827 [(set (match_operand:CI 0 "register_operand" "=w")
4833 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4835 (vec_duplicate:VD (const_int 0)))
4837 (unspec:VD [(match_dup 1)]
4839 (vec_duplicate:VD (const_int 0))))
4841 (unspec:VD [(match_dup 1)]
4843 (vec_duplicate:VD (const_int 0)))) 0))]
4845 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4846 [(set_attr "type" "neon_load3_3reg<q>")]
4849 (define_insn "aarch64_ld3<mode>_dreg"
4850 [(set (match_operand:CI 0 "register_operand" "=w")
4856 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4860 (unspec:DX [(match_dup 1)]
4864 (unspec:DX [(match_dup 1)]
4866 (const_int 0))) 0))]
4868 "ld1\\t{%S0.1d - %U0.1d}, %1"
4869 [(set_attr "type" "neon_load1_3reg<q>")]
4872 (define_insn "aarch64_ld4<mode>_dreg"
4873 [(set (match_operand:XI 0 "register_operand" "=w")
4879 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4881 (vec_duplicate:VD (const_int 0)))
4883 (unspec:VD [(match_dup 1)]
4885 (vec_duplicate:VD (const_int 0))))
4888 (unspec:VD [(match_dup 1)]
4890 (vec_duplicate:VD (const_int 0)))
4892 (unspec:VD [(match_dup 1)]
4894 (vec_duplicate:VD (const_int 0))))) 0))]
4896 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4897 [(set_attr "type" "neon_load4_4reg<q>")]
4900 (define_insn "aarch64_ld4<mode>_dreg"
4901 [(set (match_operand:XI 0 "register_operand" "=w")
4907 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4911 (unspec:DX [(match_dup 1)]
4916 (unspec:DX [(match_dup 1)]
4920 (unspec:DX [(match_dup 1)]
4922 (const_int 0)))) 0))]
4924 "ld1\\t{%S0.1d - %V0.1d}, %1"
4925 [(set_attr "type" "neon_load1_4reg<q>")]
4928 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
4929 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4930 (match_operand:DI 1 "register_operand" "r")
4931 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4934 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4935 set_mem_size (mem, <VSTRUCT:nregs> * 8);
4937 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
4941 (define_expand "aarch64_ld1<VALL_F16:mode>"
4942 [(match_operand:VALL_F16 0 "register_operand")
4943 (match_operand:DI 1 "register_operand")]
4946 machine_mode mode = <VALL_F16:MODE>mode;
4947 rtx mem = gen_rtx_MEM (mode, operands[1]);
4949 if (BYTES_BIG_ENDIAN)
4950 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
4952 emit_move_insn (operands[0], mem);
4956 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
4957 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4958 (match_operand:DI 1 "register_operand" "r")
4959 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4962 machine_mode mode = <VSTRUCT:MODE>mode;
4963 rtx mem = gen_rtx_MEM (mode, operands[1]);
4965 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
4969 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
4970 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4971 (match_operand:DI 1 "register_operand" "w")
4972 (match_operand:VSTRUCT 2 "register_operand" "0")
4973 (match_operand:SI 3 "immediate_operand" "i")
4974 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4977 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4978 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4981 aarch64_simd_lane_bounds (operands[3], 0,
4982 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
4984 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
4985 operands[0], mem, operands[2], operands[3]));
4989 ;; Expanders for builtins to extract vector registers from large
4990 ;; opaque integer modes.
4994 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
4995 [(match_operand:VDC 0 "register_operand" "=w")
4996 (match_operand:VSTRUCT 1 "register_operand" "w")
4997 (match_operand:SI 2 "immediate_operand" "i")]
5000 int part = INTVAL (operands[2]);
5001 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5002 int offset = part * 16;
5004 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5005 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5011 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5012 [(match_operand:VQ 0 "register_operand" "=w")
5013 (match_operand:VSTRUCT 1 "register_operand" "w")
5014 (match_operand:SI 2 "immediate_operand" "i")]
5017 int part = INTVAL (operands[2]);
5018 int offset = part * 16;
5020 emit_move_insn (operands[0],
5021 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5025 ;; Permuted-store expanders for neon intrinsics.
5027 ;; Permute instructions
5031 (define_expand "vec_perm_const<mode>"
5032 [(match_operand:VALL_F16 0 "register_operand")
5033 (match_operand:VALL_F16 1 "register_operand")
5034 (match_operand:VALL_F16 2 "register_operand")
5035 (match_operand:<V_cmp_result> 3)]
5038 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5039 operands[2], operands[3]))
5045 (define_expand "vec_perm<mode>"
5046 [(match_operand:VB 0 "register_operand")
5047 (match_operand:VB 1 "register_operand")
5048 (match_operand:VB 2 "register_operand")
5049 (match_operand:VB 3 "register_operand")]
5052 aarch64_expand_vec_perm (operands[0], operands[1],
5053 operands[2], operands[3]);
5057 (define_insn "aarch64_tbl1<mode>"
5058 [(set (match_operand:VB 0 "register_operand" "=w")
5059 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5060 (match_operand:VB 2 "register_operand" "w")]
5063 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5064 [(set_attr "type" "neon_tbl1<q>")]
5067 ;; Two source registers.
5069 (define_insn "aarch64_tbl2v16qi"
5070 [(set (match_operand:V16QI 0 "register_operand" "=w")
5071 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5072 (match_operand:V16QI 2 "register_operand" "w")]
5075 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5076 [(set_attr "type" "neon_tbl2_q")]
5079 (define_insn "aarch64_tbl3<mode>"
5080 [(set (match_operand:VB 0 "register_operand" "=w")
5081 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5082 (match_operand:VB 2 "register_operand" "w")]
5085 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5086 [(set_attr "type" "neon_tbl3")]
5089 (define_insn "aarch64_tbx4<mode>"
5090 [(set (match_operand:VB 0 "register_operand" "=w")
5091 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5092 (match_operand:OI 2 "register_operand" "w")
5093 (match_operand:VB 3 "register_operand" "w")]
5096 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5097 [(set_attr "type" "neon_tbl4")]
5100 ;; Three source registers.
5102 (define_insn "aarch64_qtbl3<mode>"
5103 [(set (match_operand:VB 0 "register_operand" "=w")
5104 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5105 (match_operand:VB 2 "register_operand" "w")]
5108 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5109 [(set_attr "type" "neon_tbl3")]
5112 (define_insn "aarch64_qtbx3<mode>"
5113 [(set (match_operand:VB 0 "register_operand" "=w")
5114 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5115 (match_operand:CI 2 "register_operand" "w")
5116 (match_operand:VB 3 "register_operand" "w")]
5119 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5120 [(set_attr "type" "neon_tbl3")]
5123 ;; Four source registers.
5125 (define_insn "aarch64_qtbl4<mode>"
5126 [(set (match_operand:VB 0 "register_operand" "=w")
5127 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5128 (match_operand:VB 2 "register_operand" "w")]
5131 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5132 [(set_attr "type" "neon_tbl4")]
5135 (define_insn "aarch64_qtbx4<mode>"
5136 [(set (match_operand:VB 0 "register_operand" "=w")
5137 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5138 (match_operand:XI 2 "register_operand" "w")
5139 (match_operand:VB 3 "register_operand" "w")]
5142 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5143 [(set_attr "type" "neon_tbl4")]
5146 (define_insn_and_split "aarch64_combinev16qi"
5147 [(set (match_operand:OI 0 "register_operand" "=w")
5148 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5149 (match_operand:V16QI 2 "register_operand" "w")]
5153 "&& reload_completed"
5156 aarch64_split_combinev16qi (operands);
5159 [(set_attr "type" "multiple")]
5162 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5163 [(set (match_operand:VALL 0 "register_operand" "=w")
5164 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
5165 (match_operand:VALL 2 "register_operand" "w")]
5168 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5169 [(set_attr "type" "neon_permute<q>")]
5172 ;; Note immediate (third) operand is lane index not byte index.
5173 (define_insn "aarch64_ext<mode>"
5174 [(set (match_operand:VALL 0 "register_operand" "=w")
5175 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
5176 (match_operand:VALL 2 "register_operand" "w")
5177 (match_operand:SI 3 "immediate_operand" "i")]
5181 operands[3] = GEN_INT (INTVAL (operands[3])
5182 * GET_MODE_UNIT_SIZE (<MODE>mode));
5183 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5185 [(set_attr "type" "neon_ext<q>")]
5188 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5189 [(set (match_operand:VALL 0 "register_operand" "=w")
5190 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")]
5193 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5194 [(set_attr "type" "neon_rev<q>")]
5197 (define_insn "aarch64_st2<mode>_dreg"
5198 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5199 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5200 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5203 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5204 [(set_attr "type" "neon_store2_2reg")]
5207 (define_insn "aarch64_st2<mode>_dreg"
5208 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5209 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5210 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5213 "st1\\t{%S1.1d - %T1.1d}, %0"
5214 [(set_attr "type" "neon_store1_2reg")]
5217 (define_insn "aarch64_st3<mode>_dreg"
5218 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5219 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5220 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5223 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5224 [(set_attr "type" "neon_store3_3reg")]
5227 (define_insn "aarch64_st3<mode>_dreg"
5228 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5229 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5230 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5233 "st1\\t{%S1.1d - %U1.1d}, %0"
5234 [(set_attr "type" "neon_store1_3reg")]
5237 (define_insn "aarch64_st4<mode>_dreg"
5238 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5239 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5240 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5243 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5244 [(set_attr "type" "neon_store4_4reg")]
5247 (define_insn "aarch64_st4<mode>_dreg"
5248 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5249 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5250 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5253 "st1\\t{%S1.1d - %V1.1d}, %0"
5254 [(set_attr "type" "neon_store1_4reg")]
5257 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5258 [(match_operand:DI 0 "register_operand" "r")
5259 (match_operand:VSTRUCT 1 "register_operand" "w")
5260 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5263 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5264 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5266 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5270 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5271 [(match_operand:DI 0 "register_operand" "r")
5272 (match_operand:VSTRUCT 1 "register_operand" "w")
5273 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5276 machine_mode mode = <VSTRUCT:MODE>mode;
5277 rtx mem = gen_rtx_MEM (mode, operands[0]);
5279 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5283 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5284 [(match_operand:DI 0 "register_operand" "r")
5285 (match_operand:VSTRUCT 1 "register_operand" "w")
5286 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5287 (match_operand:SI 2 "immediate_operand")]
5290 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5291 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5294 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5295 mem, operands[1], operands[2]));
5299 (define_expand "aarch64_st1<VALL_F16:mode>"
5300 [(match_operand:DI 0 "register_operand")
5301 (match_operand:VALL_F16 1 "register_operand")]
5304 machine_mode mode = <VALL_F16:MODE>mode;
5305 rtx mem = gen_rtx_MEM (mode, operands[0]);
5307 if (BYTES_BIG_ENDIAN)
5308 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5310 emit_move_insn (mem, operands[1]);
5314 ;; Expander for builtins to insert vector registers into large
5315 ;; opaque integer modes.
5317 ;; Q-register list. We don't need a D-reg inserter as we zero
5318 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5320 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5321 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5322 (match_operand:VSTRUCT 1 "register_operand" "0")
5323 (match_operand:VQ 2 "register_operand" "w")
5324 (match_operand:SI 3 "immediate_operand" "i")]
5327 int part = INTVAL (operands[3]);
5328 int offset = part * 16;
5330 emit_move_insn (operands[0], operands[1]);
5331 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5336 ;; Standard pattern name vec_init<mode>.
5338 (define_expand "vec_init<mode>"
5339 [(match_operand:VALL_F16 0 "register_operand" "")
5340 (match_operand 1 "" "")]
5343 aarch64_expand_vector_init (operands[0], operands[1]);
5347 (define_insn "*aarch64_simd_ld1r<mode>"
5348 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5349 (vec_duplicate:VALL_F16
5350 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5352 "ld1r\\t{%0.<Vtype>}, %1"
5353 [(set_attr "type" "neon_load1_all_lanes")]
5356 (define_insn "aarch64_frecpe<mode>"
5357 [(set (match_operand:VDQF 0 "register_operand" "=w")
5358 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
5361 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5362 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
5365 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5366 [(set (match_operand:GPF 0 "register_operand" "=w")
5367 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
5370 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5371 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
5374 (define_insn "aarch64_frecps<mode>"
5375 [(set (match_operand:VALLF 0 "register_operand" "=w")
5376 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
5377 (match_operand:VALLF 2 "register_operand" "w")]
5380 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5381 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
5384 (define_insn "aarch64_urecpe<mode>"
5385 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5386 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5389 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5390 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5392 ;; Standard pattern name vec_extract<mode>.
5394 (define_expand "vec_extract<mode>"
5395 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5396 (match_operand:VALL_F16 1 "register_operand" "")
5397 (match_operand:SI 2 "immediate_operand" "")]
5401 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5407 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5408 [(set (match_operand:V16QI 0 "register_operand" "=w")
5409 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5410 (match_operand:V16QI 2 "register_operand" "w")]
5412 "TARGET_SIMD && TARGET_CRYPTO"
5413 "aes<aes_op>\\t%0.16b, %2.16b"
5414 [(set_attr "type" "crypto_aese")]
5417 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5418 [(set (match_operand:V16QI 0 "register_operand" "=w")
5419 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
5421 "TARGET_SIMD && TARGET_CRYPTO"
5422 "aes<aesmc_op>\\t%0.16b, %1.16b"
5423 [(set_attr "type" "crypto_aesmc")]
5428 (define_insn "aarch64_crypto_sha1hsi"
5429 [(set (match_operand:SI 0 "register_operand" "=w")
5430 (unspec:SI [(match_operand:SI 1
5431 "register_operand" "w")]
5433 "TARGET_SIMD && TARGET_CRYPTO"
5435 [(set_attr "type" "crypto_sha1_fast")]
5438 (define_insn "aarch64_crypto_sha1su1v4si"
5439 [(set (match_operand:V4SI 0 "register_operand" "=w")
5440 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5441 (match_operand:V4SI 2 "register_operand" "w")]
5443 "TARGET_SIMD && TARGET_CRYPTO"
5444 "sha1su1\\t%0.4s, %2.4s"
5445 [(set_attr "type" "crypto_sha1_fast")]
5448 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5449 [(set (match_operand:V4SI 0 "register_operand" "=w")
5450 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5451 (match_operand:SI 2 "register_operand" "w")
5452 (match_operand:V4SI 3 "register_operand" "w")]
5454 "TARGET_SIMD && TARGET_CRYPTO"
5455 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5456 [(set_attr "type" "crypto_sha1_slow")]
5459 (define_insn "aarch64_crypto_sha1su0v4si"
5460 [(set (match_operand:V4SI 0 "register_operand" "=w")
5461 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5462 (match_operand:V4SI 2 "register_operand" "w")
5463 (match_operand:V4SI 3 "register_operand" "w")]
5465 "TARGET_SIMD && TARGET_CRYPTO"
5466 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5467 [(set_attr "type" "crypto_sha1_xor")]
5472 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5473 [(set (match_operand:V4SI 0 "register_operand" "=w")
5474 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5475 (match_operand:V4SI 2 "register_operand" "w")
5476 (match_operand:V4SI 3 "register_operand" "w")]
5478 "TARGET_SIMD && TARGET_CRYPTO"
5479 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5480 [(set_attr "type" "crypto_sha256_slow")]
5483 (define_insn "aarch64_crypto_sha256su0v4si"
5484 [(set (match_operand:V4SI 0 "register_operand" "=w")
5485 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5486 (match_operand:V4SI 2 "register_operand" "w")]
5488 "TARGET_SIMD &&TARGET_CRYPTO"
5489 "sha256su0\\t%0.4s, %2.4s"
5490 [(set_attr "type" "crypto_sha256_fast")]
5493 (define_insn "aarch64_crypto_sha256su1v4si"
5494 [(set (match_operand:V4SI 0 "register_operand" "=w")
5495 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5496 (match_operand:V4SI 2 "register_operand" "w")
5497 (match_operand:V4SI 3 "register_operand" "w")]
5499 "TARGET_SIMD &&TARGET_CRYPTO"
5500 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5501 [(set_attr "type" "crypto_sha256_slow")]
5506 (define_insn "aarch64_crypto_pmulldi"
5507 [(set (match_operand:TI 0 "register_operand" "=w")
5508 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5509 (match_operand:DI 2 "register_operand" "w")]
5511 "TARGET_SIMD && TARGET_CRYPTO"
5512 "pmull\\t%0.1q, %1.1d, %2.1d"
5513 [(set_attr "type" "neon_mul_d_long")]
5516 (define_insn "aarch64_crypto_pmullv2di"
5517 [(set (match_operand:TI 0 "register_operand" "=w")
5518 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5519 (match_operand:V2DI 2 "register_operand" "w")]
5521 "TARGET_SIMD && TARGET_CRYPTO"
5522 "pmull2\\t%0.1q, %1.2d, %2.2d"
5523 [(set_attr "type" "neon_mul_d_long")]