1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2015 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
76 [(set_attr "type" "neon_dup<q>")]
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
92 [(set_attr "type" "neon_dup<q>")]
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
104 switch (which_alternative)
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "ins\t%0.d[0], %1";
111 case 5: return "mov\t%0, %1";
113 return aarch64_output_simd_mov_immediate (operands[1],
115 default: gcc_unreachable ();
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
120 mov_reg, neon_move<q>")]
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
132 switch (which_alternative)
135 return "ldr\\t%q0, %1";
137 return "str\\t%q1, %0";
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
153 (set_attr "length" "4,4,4,8,8,8,4")]
156 (define_insn "load_pair<mode>"
157 [(set (match_operand:VD 0 "register_operand" "=w")
158 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
159 (set (match_operand:VD 2 "register_operand" "=w")
160 (match_operand:VD 3 "memory_operand" "m"))]
162 && rtx_equal_p (XEXP (operands[3], 0),
163 plus_constant (Pmode,
164 XEXP (operands[1], 0),
165 GET_MODE_SIZE (<MODE>mode)))"
167 [(set_attr "type" "neon_ldp")]
170 (define_insn "store_pair<mode>"
171 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
172 (match_operand:VD 1 "register_operand" "w"))
173 (set (match_operand:VD 2 "memory_operand" "=m")
174 (match_operand:VD 3 "register_operand" "w"))]
176 && rtx_equal_p (XEXP (operands[2], 0),
177 plus_constant (Pmode,
178 XEXP (operands[0], 0),
179 GET_MODE_SIZE (<MODE>mode)))"
181 [(set_attr "type" "neon_stp")]
185 [(set (match_operand:VQ 0 "register_operand" "")
186 (match_operand:VQ 1 "register_operand" ""))]
187 "TARGET_SIMD && reload_completed
188 && GP_REGNUM_P (REGNO (operands[0]))
189 && GP_REGNUM_P (REGNO (operands[1]))"
192 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
197 [(set (match_operand:VQ 0 "register_operand" "")
198 (match_operand:VQ 1 "register_operand" ""))]
199 "TARGET_SIMD && reload_completed
200 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
201 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
204 aarch64_split_simd_move (operands[0], operands[1]);
208 (define_expand "aarch64_split_simd_mov<mode>"
209 [(set (match_operand:VQ 0)
210 (match_operand:VQ 1))]
213 rtx dst = operands[0];
214 rtx src = operands[1];
216 if (GP_REGNUM_P (REGNO (src)))
218 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
219 rtx src_high_part = gen_highpart (<VHALF>mode, src);
222 (gen_move_lo_quad_<mode> (dst, src_low_part));
224 (gen_move_hi_quad_<mode> (dst, src_high_part));
229 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
230 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
231 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
232 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
235 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
237 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
243 (define_insn "aarch64_simd_mov_from_<mode>low"
244 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
246 (match_operand:VQ 1 "register_operand" "w")
247 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
248 "TARGET_SIMD && reload_completed"
250 [(set_attr "type" "neon_to_gp<q>")
251 (set_attr "length" "4")
254 (define_insn "aarch64_simd_mov_from_<mode>high"
255 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
257 (match_operand:VQ 1 "register_operand" "w")
258 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
259 "TARGET_SIMD && reload_completed"
261 [(set_attr "type" "neon_to_gp<q>")
262 (set_attr "length" "4")
265 (define_insn "orn<mode>3"
266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
267 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
268 (match_operand:VDQ_I 2 "register_operand" "w")))]
270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
271 [(set_attr "type" "neon_logic<q>")]
274 (define_insn "bic<mode>3"
275 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
276 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
277 (match_operand:VDQ_I 2 "register_operand" "w")))]
279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
280 [(set_attr "type" "neon_logic<q>")]
283 (define_insn "add<mode>3"
284 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
285 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
286 (match_operand:VDQ_I 2 "register_operand" "w")))]
288 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
289 [(set_attr "type" "neon_add<q>")]
292 (define_insn "sub<mode>3"
293 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
294 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
295 (match_operand:VDQ_I 2 "register_operand" "w")))]
297 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
298 [(set_attr "type" "neon_sub<q>")]
301 (define_insn "mul<mode>3"
302 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
303 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
304 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
306 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
307 [(set_attr "type" "neon_mul_<Vetype><q>")]
310 (define_insn "bswap<mode>2"
311 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
312 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
314 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
315 [(set_attr "type" "neon_rev<q>")]
318 (define_insn "aarch64_rbit<mode>"
319 [(set (match_operand:VB 0 "register_operand" "=w")
320 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
323 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
324 [(set_attr "type" "neon_rbit")]
327 (define_expand "ctz<mode>2"
328 [(set (match_operand:VS 0 "register_operand")
329 (ctz:VS (match_operand:VS 1 "register_operand")))]
332 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
333 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
335 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
336 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
341 (define_insn "*aarch64_mul3_elt<mode>"
342 [(set (match_operand:VMUL 0 "register_operand" "=w")
346 (match_operand:VMUL 1 "register_operand" "<h_con>")
347 (parallel [(match_operand:SI 2 "immediate_operand")])))
348 (match_operand:VMUL 3 "register_operand" "w")))]
351 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
352 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
354 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
357 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
358 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
359 (mult:VMUL_CHANGE_NLANES
360 (vec_duplicate:VMUL_CHANGE_NLANES
362 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
363 (parallel [(match_operand:SI 2 "immediate_operand")])))
364 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
367 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
368 INTVAL (operands[2])));
369 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
371 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
374 (define_insn "*aarch64_mul3_elt_to_128df"
375 [(set (match_operand:V2DF 0 "register_operand" "=w")
378 (match_operand:DF 2 "register_operand" "w"))
379 (match_operand:V2DF 1 "register_operand" "w")))]
381 "fmul\\t%0.2d, %1.2d, %2.d[0]"
382 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
385 (define_insn "aarch64_rsqrte_<mode>2"
386 [(set (match_operand:VALLF 0 "register_operand" "=w")
387 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
390 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
391 [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
393 (define_insn "aarch64_rsqrts_<mode>3"
394 [(set (match_operand:VALLF 0 "register_operand" "=w")
395 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
396 (match_operand:VALLF 2 "register_operand" "w")]
399 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
400 [(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")])
402 (define_expand "rsqrt<mode>2"
403 [(set (match_operand:VALLF 0 "register_operand" "=w")
404 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
408 aarch64_emit_swrsqrt (operands[0], operands[1]);
412 (define_insn "*aarch64_mul3_elt_to_64v2df"
413 [(set (match_operand:DF 0 "register_operand" "=w")
416 (match_operand:V2DF 1 "register_operand" "w")
417 (parallel [(match_operand:SI 2 "immediate_operand")]))
418 (match_operand:DF 3 "register_operand" "w")))]
421 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
422 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
424 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
427 (define_insn "neg<mode>2"
428 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
429 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
431 "neg\t%0.<Vtype>, %1.<Vtype>"
432 [(set_attr "type" "neon_neg<q>")]
435 (define_insn "abs<mode>2"
436 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
437 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
439 "abs\t%0.<Vtype>, %1.<Vtype>"
440 [(set_attr "type" "neon_abs<q>")]
443 ;; The intrinsic version of integer ABS must not be allowed to
444 ;; combine with any operation with an integerated ABS step, such
446 (define_insn "aarch64_abs<mode>"
447 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
449 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
452 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
453 [(set_attr "type" "neon_abs<q>")]
456 (define_insn "abd<mode>_3"
457 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
458 (abs:VDQ_BHSI (minus:VDQ_BHSI
459 (match_operand:VDQ_BHSI 1 "register_operand" "w")
460 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
462 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
463 [(set_attr "type" "neon_abd<q>")]
466 (define_insn "aba<mode>_3"
467 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
468 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
469 (match_operand:VDQ_BHSI 1 "register_operand" "w")
470 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
471 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
473 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
474 [(set_attr "type" "neon_arith_acc<q>")]
477 (define_insn "fabd<mode>_3"
478 [(set (match_operand:VDQF 0 "register_operand" "=w")
479 (abs:VDQF (minus:VDQF
480 (match_operand:VDQF 1 "register_operand" "w")
481 (match_operand:VDQF 2 "register_operand" "w"))))]
483 "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
484 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
487 (define_insn "*fabd_scalar<mode>3"
488 [(set (match_operand:GPF 0 "register_operand" "=w")
490 (match_operand:GPF 1 "register_operand" "w")
491 (match_operand:GPF 2 "register_operand" "w"))))]
493 "fabd\t%<s>0, %<s>1, %<s>2"
494 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
497 (define_insn "and<mode>3"
498 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
499 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
500 (match_operand:VDQ_I 2 "register_operand" "w")))]
502 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
503 [(set_attr "type" "neon_logic<q>")]
506 (define_insn "ior<mode>3"
507 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
508 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
509 (match_operand:VDQ_I 2 "register_operand" "w")))]
511 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
512 [(set_attr "type" "neon_logic<q>")]
515 (define_insn "xor<mode>3"
516 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
517 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
518 (match_operand:VDQ_I 2 "register_operand" "w")))]
520 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
521 [(set_attr "type" "neon_logic<q>")]
524 (define_insn "one_cmpl<mode>2"
525 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
526 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
528 "not\t%0.<Vbtype>, %1.<Vbtype>"
529 [(set_attr "type" "neon_logic<q>")]
532 (define_insn "aarch64_simd_vec_set<mode>"
533 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
535 (vec_duplicate:VDQ_BHSI
536 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
537 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
538 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
541 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
542 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
543 switch (which_alternative)
546 return "ins\\t%0.<Vetype>[%p2], %w1";
548 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
550 return "ld1\\t{%0.<Vetype>}[%p2], %1";
555 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
558 (define_insn "aarch64_simd_lshr<mode>"
559 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
560 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
561 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
563 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
564 [(set_attr "type" "neon_shift_imm<q>")]
567 (define_insn "aarch64_simd_ashr<mode>"
568 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
569 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
570 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
572 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
573 [(set_attr "type" "neon_shift_imm<q>")]
576 (define_insn "aarch64_simd_imm_shl<mode>"
577 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
578 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
579 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
581 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
582 [(set_attr "type" "neon_shift_imm<q>")]
585 (define_insn "aarch64_simd_reg_sshl<mode>"
586 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
587 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
588 (match_operand:VDQ_I 2 "register_operand" "w")))]
590 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
591 [(set_attr "type" "neon_shift_reg<q>")]
594 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
595 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
596 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
597 (match_operand:VDQ_I 2 "register_operand" "w")]
598 UNSPEC_ASHIFT_UNSIGNED))]
600 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
601 [(set_attr "type" "neon_shift_reg<q>")]
604 (define_insn "aarch64_simd_reg_shl<mode>_signed"
605 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
607 (match_operand:VDQ_I 2 "register_operand" "w")]
608 UNSPEC_ASHIFT_SIGNED))]
610 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
611 [(set_attr "type" "neon_shift_reg<q>")]
614 (define_expand "ashl<mode>3"
615 [(match_operand:VDQ_I 0 "register_operand" "")
616 (match_operand:VDQ_I 1 "register_operand" "")
617 (match_operand:SI 2 "general_operand" "")]
620 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
623 if (CONST_INT_P (operands[2]))
625 shift_amount = INTVAL (operands[2]);
626 if (shift_amount >= 0 && shift_amount < bit_width)
628 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
630 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
637 operands[2] = force_reg (SImode, operands[2]);
640 else if (MEM_P (operands[2]))
642 operands[2] = force_reg (SImode, operands[2]);
645 if (REG_P (operands[2]))
647 rtx tmp = gen_reg_rtx (<MODE>mode);
648 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
649 convert_to_mode (<VEL>mode,
652 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
661 (define_expand "lshr<mode>3"
662 [(match_operand:VDQ_I 0 "register_operand" "")
663 (match_operand:VDQ_I 1 "register_operand" "")
664 (match_operand:SI 2 "general_operand" "")]
667 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
670 if (CONST_INT_P (operands[2]))
672 shift_amount = INTVAL (operands[2]);
673 if (shift_amount > 0 && shift_amount <= bit_width)
675 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
677 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
683 operands[2] = force_reg (SImode, operands[2]);
685 else if (MEM_P (operands[2]))
687 operands[2] = force_reg (SImode, operands[2]);
690 if (REG_P (operands[2]))
692 rtx tmp = gen_reg_rtx (SImode);
693 rtx tmp1 = gen_reg_rtx (<MODE>mode);
694 emit_insn (gen_negsi2 (tmp, operands[2]));
695 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
696 convert_to_mode (<VEL>mode,
698 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
708 (define_expand "ashr<mode>3"
709 [(match_operand:VDQ_I 0 "register_operand" "")
710 (match_operand:VDQ_I 1 "register_operand" "")
711 (match_operand:SI 2 "general_operand" "")]
714 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
717 if (CONST_INT_P (operands[2]))
719 shift_amount = INTVAL (operands[2]);
720 if (shift_amount > 0 && shift_amount <= bit_width)
722 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
724 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
730 operands[2] = force_reg (SImode, operands[2]);
732 else if (MEM_P (operands[2]))
734 operands[2] = force_reg (SImode, operands[2]);
737 if (REG_P (operands[2]))
739 rtx tmp = gen_reg_rtx (SImode);
740 rtx tmp1 = gen_reg_rtx (<MODE>mode);
741 emit_insn (gen_negsi2 (tmp, operands[2]));
742 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
743 convert_to_mode (<VEL>mode,
745 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
755 (define_expand "vashl<mode>3"
756 [(match_operand:VDQ_I 0 "register_operand" "")
757 (match_operand:VDQ_I 1 "register_operand" "")
758 (match_operand:VDQ_I 2 "register_operand" "")]
761 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
766 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
767 ;; Negating individual lanes most certainly offsets the
768 ;; gain from vectorization.
769 (define_expand "vashr<mode>3"
770 [(match_operand:VDQ_BHSI 0 "register_operand" "")
771 (match_operand:VDQ_BHSI 1 "register_operand" "")
772 (match_operand:VDQ_BHSI 2 "register_operand" "")]
775 rtx neg = gen_reg_rtx (<MODE>mode);
776 emit (gen_neg<mode>2 (neg, operands[2]));
777 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
783 (define_expand "aarch64_ashr_simddi"
784 [(match_operand:DI 0 "register_operand" "=w")
785 (match_operand:DI 1 "register_operand" "w")
786 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
789 /* An arithmetic shift right by 64 fills the result with copies of the sign
790 bit, just like asr by 63 - however the standard pattern does not handle
792 if (INTVAL (operands[2]) == 64)
793 operands[2] = GEN_INT (63);
794 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
799 (define_expand "vlshr<mode>3"
800 [(match_operand:VDQ_BHSI 0 "register_operand" "")
801 (match_operand:VDQ_BHSI 1 "register_operand" "")
802 (match_operand:VDQ_BHSI 2 "register_operand" "")]
805 rtx neg = gen_reg_rtx (<MODE>mode);
806 emit (gen_neg<mode>2 (neg, operands[2]));
807 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
812 (define_expand "aarch64_lshr_simddi"
813 [(match_operand:DI 0 "register_operand" "=w")
814 (match_operand:DI 1 "register_operand" "w")
815 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
818 if (INTVAL (operands[2]) == 64)
819 emit_move_insn (operands[0], const0_rtx);
821 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
826 (define_expand "vec_set<mode>"
827 [(match_operand:VDQ_BHSI 0 "register_operand")
828 (match_operand:<VEL> 1 "register_operand")
829 (match_operand:SI 2 "immediate_operand")]
832 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
833 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
834 GEN_INT (elem), operands[0]));
839 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
840 (define_insn "vec_shr_<mode>"
841 [(set (match_operand:VD 0 "register_operand" "=w")
842 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
843 (match_operand:SI 2 "immediate_operand" "i")]
847 if (BYTES_BIG_ENDIAN)
848 return "shl %d0, %d1, %2";
850 return "ushr %d0, %d1, %2";
852 [(set_attr "type" "neon_shift_imm")]
855 (define_insn "aarch64_simd_vec_setv2di"
856 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
859 (match_operand:DI 1 "register_operand" "r,w"))
860 (match_operand:V2DI 3 "register_operand" "0,0")
861 (match_operand:SI 2 "immediate_operand" "i,i")))]
864 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
865 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
866 switch (which_alternative)
869 return "ins\\t%0.d[%p2], %1";
871 return "ins\\t%0.d[%p2], %1.d[0]";
876 [(set_attr "type" "neon_from_gp, neon_ins_q")]
879 (define_expand "vec_setv2di"
880 [(match_operand:V2DI 0 "register_operand")
881 (match_operand:DI 1 "register_operand")
882 (match_operand:SI 2 "immediate_operand")]
885 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
886 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
887 GEN_INT (elem), operands[0]));
892 (define_insn "aarch64_simd_vec_set<mode>"
893 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
895 (vec_duplicate:VDQF_F16
896 (match_operand:<VEL> 1 "register_operand" "w"))
897 (match_operand:VDQF_F16 3 "register_operand" "0")
898 (match_operand:SI 2 "immediate_operand" "i")))]
901 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
903 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
904 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
906 [(set_attr "type" "neon_ins<q>")]
909 (define_expand "vec_set<mode>"
910 [(match_operand:VDQF_F16 0 "register_operand" "+w")
911 (match_operand:<VEL> 1 "register_operand" "w")
912 (match_operand:SI 2 "immediate_operand" "")]
915 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
916 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
917 GEN_INT (elem), operands[0]));
923 (define_insn "aarch64_mla<mode>"
924 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
925 (plus:VDQ_BHSI (mult:VDQ_BHSI
926 (match_operand:VDQ_BHSI 2 "register_operand" "w")
927 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
928 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
930 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
931 [(set_attr "type" "neon_mla_<Vetype><q>")]
934 (define_insn "*aarch64_mla_elt<mode>"
935 [(set (match_operand:VDQHS 0 "register_operand" "=w")
940 (match_operand:VDQHS 1 "register_operand" "<h_con>")
941 (parallel [(match_operand:SI 2 "immediate_operand")])))
942 (match_operand:VDQHS 3 "register_operand" "w"))
943 (match_operand:VDQHS 4 "register_operand" "0")))]
946 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
947 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
949 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
952 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
953 [(set (match_operand:VDQHS 0 "register_operand" "=w")
958 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
959 (parallel [(match_operand:SI 2 "immediate_operand")])))
960 (match_operand:VDQHS 3 "register_operand" "w"))
961 (match_operand:VDQHS 4 "register_operand" "0")))]
964 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
965 INTVAL (operands[2])));
966 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
968 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
971 (define_insn "aarch64_mls<mode>"
972 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
973 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
974 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
975 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
977 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
978 [(set_attr "type" "neon_mla_<Vetype><q>")]
981 (define_insn "*aarch64_mls_elt<mode>"
982 [(set (match_operand:VDQHS 0 "register_operand" "=w")
984 (match_operand:VDQHS 4 "register_operand" "0")
988 (match_operand:VDQHS 1 "register_operand" "<h_con>")
989 (parallel [(match_operand:SI 2 "immediate_operand")])))
990 (match_operand:VDQHS 3 "register_operand" "w"))))]
993 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
994 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
996 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
999 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1000 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1002 (match_operand:VDQHS 4 "register_operand" "0")
1004 (vec_duplicate:VDQHS
1006 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1007 (parallel [(match_operand:SI 2 "immediate_operand")])))
1008 (match_operand:VDQHS 3 "register_operand" "w"))))]
1011 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1012 INTVAL (operands[2])));
1013 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1015 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1018 ;; Max/Min operations.
1019 (define_insn "<su><maxmin><mode>3"
1020 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1021 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1022 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1024 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1025 [(set_attr "type" "neon_minmax<q>")]
1028 (define_expand "<su><maxmin>v2di3"
1029 [(set (match_operand:V2DI 0 "register_operand" "")
1030 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1031 (match_operand:V2DI 2 "register_operand" "")))]
1034 enum rtx_code cmp_operator;
1055 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1056 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
1057 operands[2], cmp_fmt, operands[1], operands[2]));
1061 ;; Pairwise Integer Max/Min operations.
1062 (define_insn "aarch64_<maxmin_uns>p<mode>"
1063 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1064 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1065 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1068 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1069 [(set_attr "type" "neon_minmax<q>")]
1072 ;; Pairwise FP Max/Min operations.
1073 (define_insn "aarch64_<maxmin_uns>p<mode>"
1074 [(set (match_operand:VDQF 0 "register_operand" "=w")
1075 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1076 (match_operand:VDQF 2 "register_operand" "w")]
1079 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1080 [(set_attr "type" "neon_minmax<q>")]
1083 ;; vec_concat gives a new vector with the low elements from operand 1, and
1084 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1085 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1086 ;; What that means, is that the RTL descriptions of the below patterns
1087 ;; need to change depending on endianness.
1089 ;; Move to the low architectural bits of the register.
1090 ;; On little-endian this is { operand, zeroes }
1091 ;; On big-endian this is { zeroes, operand }
1093 (define_insn "move_lo_quad_internal_<mode>"
1094 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1096 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1097 (vec_duplicate:<VHALF> (const_int 0))))]
1098 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1103 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1104 (set_attr "simd" "yes,*,yes")
1105 (set_attr "fp" "*,yes,*")
1106 (set_attr "length" "4")]
1109 (define_insn "move_lo_quad_internal_<mode>"
1110 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1112 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1114 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1119 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1120 (set_attr "simd" "yes,*,yes")
1121 (set_attr "fp" "*,yes,*")
1122 (set_attr "length" "4")]
1125 (define_insn "move_lo_quad_internal_be_<mode>"
1126 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1128 (vec_duplicate:<VHALF> (const_int 0))
1129 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1130 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1135 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1136 (set_attr "simd" "yes,*,yes")
1137 (set_attr "fp" "*,yes,*")
1138 (set_attr "length" "4")]
1141 (define_insn "move_lo_quad_internal_be_<mode>"
1142 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1145 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1146 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1151 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1152 (set_attr "simd" "yes,*,yes")
1153 (set_attr "fp" "*,yes,*")
1154 (set_attr "length" "4")]
1157 (define_expand "move_lo_quad_<mode>"
1158 [(match_operand:VQ 0 "register_operand")
1159 (match_operand:VQ 1 "register_operand")]
1162 if (BYTES_BIG_ENDIAN)
1163 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1165 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1170 ;; Move operand1 to the high architectural bits of the register, keeping
1171 ;; the low architectural bits of operand2.
1172 ;; For little-endian this is { operand2, operand1 }
1173 ;; For big-endian this is { operand1, operand2 }
1175 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1176 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1180 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1181 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1182 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1184 ins\\t%0.d[1], %1.d[0]
1186 [(set_attr "type" "neon_ins")]
1189 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1190 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1192 (match_operand:<VHALF> 1 "register_operand" "w,r")
1195 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1196 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1198 ins\\t%0.d[1], %1.d[0]
1200 [(set_attr "type" "neon_ins")]
1203 (define_expand "move_hi_quad_<mode>"
1204 [(match_operand:VQ 0 "register_operand" "")
1205 (match_operand:<VHALF> 1 "register_operand" "")]
1208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1209 if (BYTES_BIG_ENDIAN)
1210 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1213 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1218 ;; Narrowing operations.
1221 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1222 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1223 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1225 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1226 [(set_attr "type" "neon_shift_imm_narrow_q")]
1229 (define_expand "vec_pack_trunc_<mode>"
1230 [(match_operand:<VNARROWD> 0 "register_operand" "")
1231 (match_operand:VDN 1 "register_operand" "")
1232 (match_operand:VDN 2 "register_operand" "")]
1235 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1236 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1237 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1239 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1240 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1241 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1247 (define_insn "vec_pack_trunc_<mode>"
1248 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1249 (vec_concat:<VNARROWQ2>
1250 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1251 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1254 if (BYTES_BIG_ENDIAN)
1255 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1257 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1259 [(set_attr "type" "multiple")
1260 (set_attr "length" "8")]
1263 ;; Widening operations.
1265 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1266 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1267 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1268 (match_operand:VQW 1 "register_operand" "w")
1269 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1272 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1273 [(set_attr "type" "neon_shift_imm_long")]
1276 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1277 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1278 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1279 (match_operand:VQW 1 "register_operand" "w")
1280 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1283 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1284 [(set_attr "type" "neon_shift_imm_long")]
1287 (define_expand "vec_unpack<su>_hi_<mode>"
1288 [(match_operand:<VWIDE> 0 "register_operand" "")
1289 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1292 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1293 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1299 (define_expand "vec_unpack<su>_lo_<mode>"
1300 [(match_operand:<VWIDE> 0 "register_operand" "")
1301 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1304 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1305 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1311 ;; Widening arithmetic.
1313 (define_insn "*aarch64_<su>mlal_lo<mode>"
1314 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1317 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1318 (match_operand:VQW 2 "register_operand" "w")
1319 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1320 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1321 (match_operand:VQW 4 "register_operand" "w")
1323 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1325 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1326 [(set_attr "type" "neon_mla_<Vetype>_long")]
1329 (define_insn "*aarch64_<su>mlal_hi<mode>"
1330 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1333 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1334 (match_operand:VQW 2 "register_operand" "w")
1335 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1336 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1337 (match_operand:VQW 4 "register_operand" "w")
1339 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1341 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1342 [(set_attr "type" "neon_mla_<Vetype>_long")]
1345 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1346 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1348 (match_operand:<VWIDE> 1 "register_operand" "0")
1350 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1351 (match_operand:VQW 2 "register_operand" "w")
1352 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1353 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1354 (match_operand:VQW 4 "register_operand" "w")
1357 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1358 [(set_attr "type" "neon_mla_<Vetype>_long")]
1361 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1362 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1364 (match_operand:<VWIDE> 1 "register_operand" "0")
1366 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1367 (match_operand:VQW 2 "register_operand" "w")
1368 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1369 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1370 (match_operand:VQW 4 "register_operand" "w")
1373 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1374 [(set_attr "type" "neon_mla_<Vetype>_long")]
1377 (define_insn "*aarch64_<su>mlal<mode>"
1378 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1382 (match_operand:VD_BHSI 1 "register_operand" "w"))
1384 (match_operand:VD_BHSI 2 "register_operand" "w")))
1385 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1387 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1388 [(set_attr "type" "neon_mla_<Vetype>_long")]
1391 (define_insn "*aarch64_<su>mlsl<mode>"
1392 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1394 (match_operand:<VWIDE> 1 "register_operand" "0")
1397 (match_operand:VD_BHSI 2 "register_operand" "w"))
1399 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1401 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1402 [(set_attr "type" "neon_mla_<Vetype>_long")]
1405 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1406 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1407 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1408 (match_operand:VQW 1 "register_operand" "w")
1409 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1410 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1411 (match_operand:VQW 2 "register_operand" "w")
1414 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1415 [(set_attr "type" "neon_mul_<Vetype>_long")]
1418 (define_expand "vec_widen_<su>mult_lo_<mode>"
1419 [(match_operand:<VWIDE> 0 "register_operand" "")
1420 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1421 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1424 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1425 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1432 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1433 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1434 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1435 (match_operand:VQW 1 "register_operand" "w")
1436 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1437 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1438 (match_operand:VQW 2 "register_operand" "w")
1441 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1442 [(set_attr "type" "neon_mul_<Vetype>_long")]
1445 (define_expand "vec_widen_<su>mult_hi_<mode>"
1446 [(match_operand:<VWIDE> 0 "register_operand" "")
1447 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1448 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1451 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1452 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1460 ;; FP vector operations.
1461 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1462 ;; double-precision (64-bit) floating-point data types and arithmetic as
1463 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1464 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1466 ;; Floating-point operations can raise an exception. Vectorizing such
1467 ;; operations are safe because of reasons explained below.
1469 ;; ARMv8 permits an extension to enable trapped floating-point
1470 ;; exception handling, however this is an optional feature. In the
1471 ;; event of a floating-point exception being raised by vectorised
1473 ;; 1. If trapped floating-point exceptions are available, then a trap
1474 ;; will be taken when any lane raises an enabled exception. A trap
1475 ;; handler may determine which lane raised the exception.
1476 ;; 2. Alternatively a sticky exception flag is set in the
1477 ;; floating-point status register (FPSR). Software may explicitly
1478 ;; test the exception flags, in which case the tests will either
1479 ;; prevent vectorisation, allowing precise identification of the
1480 ;; failing operation, or if tested outside of vectorisable regions
1481 ;; then the specific operation and lane are not of interest.
1483 ;; FP arithmetic operations.
1485 (define_insn "add<mode>3"
1486 [(set (match_operand:VDQF 0 "register_operand" "=w")
1487 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1488 (match_operand:VDQF 2 "register_operand" "w")))]
1490 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1491 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1494 (define_insn "sub<mode>3"
1495 [(set (match_operand:VDQF 0 "register_operand" "=w")
1496 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1497 (match_operand:VDQF 2 "register_operand" "w")))]
1499 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1500 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1503 (define_insn "mul<mode>3"
1504 [(set (match_operand:VDQF 0 "register_operand" "=w")
1505 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1506 (match_operand:VDQF 2 "register_operand" "w")))]
1508 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1509 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1512 (define_insn "div<mode>3"
1513 [(set (match_operand:VDQF 0 "register_operand" "=w")
1514 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1515 (match_operand:VDQF 2 "register_operand" "w")))]
1517 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1518 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1521 (define_insn "neg<mode>2"
1522 [(set (match_operand:VDQF 0 "register_operand" "=w")
1523 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1525 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1526 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1529 (define_insn "abs<mode>2"
1530 [(set (match_operand:VDQF 0 "register_operand" "=w")
1531 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1533 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1534 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1537 (define_insn "fma<mode>4"
1538 [(set (match_operand:VDQF 0 "register_operand" "=w")
1539 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1540 (match_operand:VDQF 2 "register_operand" "w")
1541 (match_operand:VDQF 3 "register_operand" "0")))]
1543 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1544 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1547 (define_insn "*aarch64_fma4_elt<mode>"
1548 [(set (match_operand:VDQF 0 "register_operand" "=w")
1552 (match_operand:VDQF 1 "register_operand" "<h_con>")
1553 (parallel [(match_operand:SI 2 "immediate_operand")])))
1554 (match_operand:VDQF 3 "register_operand" "w")
1555 (match_operand:VDQF 4 "register_operand" "0")))]
1558 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1559 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1561 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1564 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1565 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1567 (vec_duplicate:VDQSF
1569 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1570 (parallel [(match_operand:SI 2 "immediate_operand")])))
1571 (match_operand:VDQSF 3 "register_operand" "w")
1572 (match_operand:VDQSF 4 "register_operand" "0")))]
1575 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1576 INTVAL (operands[2])));
1577 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1579 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1582 (define_insn "*aarch64_fma4_elt_to_128df"
1583 [(set (match_operand:V2DF 0 "register_operand" "=w")
1586 (match_operand:DF 1 "register_operand" "w"))
1587 (match_operand:V2DF 2 "register_operand" "w")
1588 (match_operand:V2DF 3 "register_operand" "0")))]
1590 "fmla\\t%0.2d, %2.2d, %1.2d[0]"
1591 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1594 (define_insn "*aarch64_fma4_elt_to_64v2df"
1595 [(set (match_operand:DF 0 "register_operand" "=w")
1598 (match_operand:V2DF 1 "register_operand" "w")
1599 (parallel [(match_operand:SI 2 "immediate_operand")]))
1600 (match_operand:DF 3 "register_operand" "w")
1601 (match_operand:DF 4 "register_operand" "0")))]
1604 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1605 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1607 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1610 (define_insn "fnma<mode>4"
1611 [(set (match_operand:VDQF 0 "register_operand" "=w")
1613 (match_operand:VDQF 1 "register_operand" "w")
1615 (match_operand:VDQF 2 "register_operand" "w"))
1616 (match_operand:VDQF 3 "register_operand" "0")))]
1618 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1619 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1622 (define_insn "*aarch64_fnma4_elt<mode>"
1623 [(set (match_operand:VDQF 0 "register_operand" "=w")
1626 (match_operand:VDQF 3 "register_operand" "w"))
1629 (match_operand:VDQF 1 "register_operand" "<h_con>")
1630 (parallel [(match_operand:SI 2 "immediate_operand")])))
1631 (match_operand:VDQF 4 "register_operand" "0")))]
1634 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1635 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1637 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1640 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1641 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1644 (match_operand:VDQSF 3 "register_operand" "w"))
1645 (vec_duplicate:VDQSF
1647 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1648 (parallel [(match_operand:SI 2 "immediate_operand")])))
1649 (match_operand:VDQSF 4 "register_operand" "0")))]
1652 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1653 INTVAL (operands[2])));
1654 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1656 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1659 (define_insn "*aarch64_fnma4_elt_to_128df"
1660 [(set (match_operand:V2DF 0 "register_operand" "=w")
1663 (match_operand:V2DF 2 "register_operand" "w"))
1665 (match_operand:DF 1 "register_operand" "w"))
1666 (match_operand:V2DF 3 "register_operand" "0")))]
1668 "fmls\\t%0.2d, %2.2d, %1.2d[0]"
1669 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1672 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1673 [(set (match_operand:DF 0 "register_operand" "=w")
1676 (match_operand:V2DF 1 "register_operand" "w")
1677 (parallel [(match_operand:SI 2 "immediate_operand")]))
1679 (match_operand:DF 3 "register_operand" "w"))
1680 (match_operand:DF 4 "register_operand" "0")))]
1683 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1684 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1686 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1689 ;; Vector versions of the floating-point frint patterns.
1690 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1691 (define_insn "<frint_pattern><mode>2"
1692 [(set (match_operand:VDQF 0 "register_operand" "=w")
1693 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1696 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1697 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1700 ;; Vector versions of the fcvt standard patterns.
1701 ;; Expands to lbtrunc, lround, lceil, lfloor
1702 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1703 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1704 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1705 [(match_operand:VDQF 1 "register_operand" "w")]
1708 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1709 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1712 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1713 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1714 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1716 (match_operand:VDQF 1 "register_operand" "w")
1717 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1720 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1721 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1723 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1725 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1726 output_asm_insn (buf, operands);
1729 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1732 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1733 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1734 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1735 [(match_operand:VDQF 1 "register_operand")]
1740 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1741 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1742 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1743 [(match_operand:VDQF 1 "register_operand")]
1748 (define_expand "ftrunc<VDQF:mode>2"
1749 [(set (match_operand:VDQF 0 "register_operand")
1750 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1755 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1756 [(set (match_operand:VDQF 0 "register_operand" "=w")
1758 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1760 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1761 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1764 ;; Conversions between vectors of floats and doubles.
1765 ;; Contains a mix of patterns to match standard pattern names
1766 ;; and those for intrinsics.
1768 ;; Float widening operations.
1770 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1771 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1772 (float_extend:<VWIDE> (vec_select:<VHALF>
1773 (match_operand:VQ_HSF 1 "register_operand" "w")
1774 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1777 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1778 [(set_attr "type" "neon_fp_cvt_widen_s")]
1781 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1782 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1783 ;; the meaning of HI and LO changes depending on the target endianness.
1784 ;; While elsewhere we map the higher numbered elements of a vector to
1785 ;; the lower architectural lanes of the vector, for these patterns we want
1786 ;; to always treat "hi" as referring to the higher architectural lanes.
1787 ;; Consequently, while the patterns below look inconsistent with our
1788 ;; other big-endian patterns their behaviour is as required.
1790 (define_expand "vec_unpacks_lo_<mode>"
1791 [(match_operand:<VWIDE> 0 "register_operand" "")
1792 (match_operand:VQ_HSF 1 "register_operand" "")]
1795 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1796 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1802 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1803 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1804 (float_extend:<VWIDE> (vec_select:<VHALF>
1805 (match_operand:VQ_HSF 1 "register_operand" "w")
1806 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1809 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1810 [(set_attr "type" "neon_fp_cvt_widen_s")]
1813 (define_expand "vec_unpacks_hi_<mode>"
1814 [(match_operand:<VWIDE> 0 "register_operand" "")
1815 (match_operand:VQ_HSF 1 "register_operand" "")]
1818 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1819 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1824 (define_insn "aarch64_float_extend_lo_<Vwide>"
1825 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1826 (float_extend:<VWIDE>
1827 (match_operand:VDF 1 "register_operand" "w")))]
1829 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1830 [(set_attr "type" "neon_fp_cvt_widen_s")]
1833 ;; Float narrowing operations.
1835 (define_insn "aarch64_float_truncate_lo_<mode>"
1836 [(set (match_operand:VDF 0 "register_operand" "=w")
1838 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1840 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1841 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1844 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1845 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1847 (match_operand:VDF 1 "register_operand" "0")
1849 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1850 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1851 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1852 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1855 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1856 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1859 (match_operand:<VWIDE> 2 "register_operand" "w"))
1860 (match_operand:VDF 1 "register_operand" "0")))]
1861 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1862 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1863 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1866 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1867 [(match_operand:<VDBL> 0 "register_operand" "=w")
1868 (match_operand:VDF 1 "register_operand" "0")
1869 (match_operand:<VWIDE> 2 "register_operand" "w")]
1872 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1873 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1874 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1875 emit_insn (gen (operands[0], operands[1], operands[2]));
1880 (define_expand "vec_pack_trunc_v2df"
1881 [(set (match_operand:V4SF 0 "register_operand")
1883 (float_truncate:V2SF
1884 (match_operand:V2DF 1 "register_operand"))
1885 (float_truncate:V2SF
1886 (match_operand:V2DF 2 "register_operand"))
1890 rtx tmp = gen_reg_rtx (V2SFmode);
1891 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1892 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1894 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1895 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1896 tmp, operands[hi]));
1901 (define_expand "vec_pack_trunc_df"
1902 [(set (match_operand:V2SF 0 "register_operand")
1905 (match_operand:DF 1 "register_operand"))
1907 (match_operand:DF 2 "register_operand"))
1911 rtx tmp = gen_reg_rtx (V2SFmode);
1912 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1913 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1915 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1916 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1917 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1922 (define_insn "aarch64_vmls<mode>"
1923 [(set (match_operand:VDQF 0 "register_operand" "=w")
1924 (minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
1925 (mult:VDQF (match_operand:VDQF 2 "register_operand" "w")
1926 (match_operand:VDQF 3 "register_operand" "w"))))]
1928 "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1929 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1933 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1935 ;; a = (b < c) ? b : c;
1936 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1937 ;; either explicitly or indirectly via -ffast-math.
1939 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1940 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1941 ;; operand will be returned when both operands are zero (i.e. they may not
1942 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
1943 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
1946 (define_insn "<su><maxmin><mode>3"
1947 [(set (match_operand:VDQF 0 "register_operand" "=w")
1948 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
1949 (match_operand:VDQF 2 "register_operand" "w")))]
1951 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1952 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1955 (define_insn "<maxmin_uns><mode>3"
1956 [(set (match_operand:VDQF 0 "register_operand" "=w")
1957 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1958 (match_operand:VDQF 2 "register_operand" "w")]
1961 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1962 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1965 ;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
1966 (define_insn "<fmaxmin><mode>3"
1967 [(set (match_operand:VDQF 0 "register_operand" "=w")
1968 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1969 (match_operand:VDQF 2 "register_operand" "w")]
1972 "<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1973 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1976 ;; 'across lanes' add.
1978 (define_expand "reduc_plus_scal_<mode>"
1979 [(match_operand:<VEL> 0 "register_operand" "=w")
1980 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
1984 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1985 rtx scratch = gen_reg_rtx (<MODE>mode);
1986 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1987 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1992 (define_expand "reduc_plus_scal_<mode>"
1993 [(match_operand:<VEL> 0 "register_operand" "=w")
1994 (match_operand:V2F 1 "register_operand" "w")]
1997 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1998 rtx scratch = gen_reg_rtx (<MODE>mode);
1999 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2000 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2005 (define_insn "aarch64_reduc_plus_internal<mode>"
2006 [(set (match_operand:VDQV 0 "register_operand" "=w")
2007 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2010 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2011 [(set_attr "type" "neon_reduc_add<q>")]
2014 (define_insn "aarch64_reduc_plus_internalv2si"
2015 [(set (match_operand:V2SI 0 "register_operand" "=w")
2016 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2019 "addp\\t%0.2s, %1.2s, %1.2s"
2020 [(set_attr "type" "neon_reduc_add")]
2023 (define_insn "aarch64_reduc_plus_internal<mode>"
2024 [(set (match_operand:V2F 0 "register_operand" "=w")
2025 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
2028 "faddp\\t%<Vetype>0, %1.<Vtype>"
2029 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2032 (define_insn "aarch64_addpv4sf"
2033 [(set (match_operand:V4SF 0 "register_operand" "=w")
2034 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
2037 "faddp\\t%0.4s, %1.4s, %1.4s"
2038 [(set_attr "type" "neon_fp_reduc_add_s_q")]
2041 (define_expand "reduc_plus_scal_v4sf"
2042 [(set (match_operand:SF 0 "register_operand")
2043 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2047 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2048 rtx scratch = gen_reg_rtx (V4SFmode);
2049 emit_insn (gen_aarch64_addpv4sf (scratch, operands[1]));
2050 emit_insn (gen_aarch64_addpv4sf (scratch, scratch));
2051 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2055 (define_insn "clrsb<mode>2"
2056 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2057 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2059 "cls\\t%0.<Vtype>, %1.<Vtype>"
2060 [(set_attr "type" "neon_cls<q>")]
2063 (define_insn "clz<mode>2"
2064 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2065 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2067 "clz\\t%0.<Vtype>, %1.<Vtype>"
2068 [(set_attr "type" "neon_cls<q>")]
2071 (define_insn "popcount<mode>2"
2072 [(set (match_operand:VB 0 "register_operand" "=w")
2073 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2075 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2076 [(set_attr "type" "neon_cnt<q>")]
2079 ;; 'across lanes' max and min ops.
2081 ;; Template for outputting a scalar, so we can create __builtins which can be
2082 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2083 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2084 [(match_operand:<VEL> 0 "register_operand")
2085 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
2089 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2090 rtx scratch = gen_reg_rtx (<MODE>mode);
2091 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2093 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2098 ;; Likewise for integer cases, signed and unsigned.
2099 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2100 [(match_operand:<VEL> 0 "register_operand")
2101 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2105 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2106 rtx scratch = gen_reg_rtx (<MODE>mode);
2107 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2109 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2114 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2115 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2116 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2119 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2120 [(set_attr "type" "neon_reduc_minmax<q>")]
2123 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2124 [(set (match_operand:V2SI 0 "register_operand" "=w")
2125 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2128 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2129 [(set_attr "type" "neon_reduc_minmax")]
2132 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2133 [(set (match_operand:VDQF 0 "register_operand" "=w")
2134 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
2137 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2138 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
2141 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2143 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2146 ;; Thus our BSL is of the form:
2147 ;; op0 = bsl (mask, op2, op3)
2148 ;; We can use any of:
2151 ;; bsl mask, op1, op2
2152 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2153 ;; bit op0, op2, mask
2154 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2155 ;; bif op0, op1, mask
2157 (define_insn "aarch64_simd_bsl<mode>_internal"
2158 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2162 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2163 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2164 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2165 (match_dup:<V_cmp_result> 3)
2169 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2170 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2171 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2172 [(set_attr "type" "neon_bsl<q>")]
2175 (define_expand "aarch64_simd_bsl<mode>"
2176 [(match_operand:VALLDIF 0 "register_operand")
2177 (match_operand:<V_cmp_result> 1 "register_operand")
2178 (match_operand:VALLDIF 2 "register_operand")
2179 (match_operand:VALLDIF 3 "register_operand")]
2182 /* We can't alias operands together if they have different modes. */
2183 rtx tmp = operands[0];
2184 if (FLOAT_MODE_P (<MODE>mode))
2186 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2187 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2188 tmp = gen_reg_rtx (<V_cmp_result>mode);
2190 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2191 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2195 if (tmp != operands[0])
2196 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2201 (define_expand "aarch64_vcond_internal<mode><mode>"
2202 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2203 (if_then_else:VSDQ_I_DI
2204 (match_operator 3 "comparison_operator"
2205 [(match_operand:VSDQ_I_DI 4 "register_operand")
2206 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2207 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2208 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2211 rtx op1 = operands[1];
2212 rtx op2 = operands[2];
2213 rtx mask = gen_reg_rtx (<MODE>mode);
2214 enum rtx_code code = GET_CODE (operands[3]);
2216 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2217 and desirable for other comparisons if it results in FOO ? -1 : 0
2218 (this allows direct use of the comparison result without a bsl). */
2221 && op1 == CONST0_RTX (<V_cmp_result>mode)
2222 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2228 case LE: code = GT; break;
2229 case LT: code = GE; break;
2230 case GE: code = LT; break;
2231 case GT: code = LE; break;
2233 case NE: code = EQ; break;
2234 case LTU: code = GEU; break;
2235 case LEU: code = GTU; break;
2236 case GTU: code = LEU; break;
2237 case GEU: code = LTU; break;
2238 default: gcc_unreachable ();
2242 /* Make sure we can handle the last operand. */
2246 /* Normalized to EQ above. */
2254 /* These instructions have a form taking an immediate zero. */
2255 if (operands[5] == CONST0_RTX (<MODE>mode))
2257 /* Fall through, as may need to load into register. */
2259 if (!REG_P (operands[5]))
2260 operands[5] = force_reg (<MODE>mode, operands[5]);
2267 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2271 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2275 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2279 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2283 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2287 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2291 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2295 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2298 /* NE has been normalized to EQ above. */
2300 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2307 /* If we have (a = (b CMP c) ? -1 : 0);
2308 Then we can simply move the generated mask. */
2310 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2311 && op2 == CONST0_RTX (<V_cmp_result>mode))
2312 emit_move_insn (operands[0], mask);
2316 op1 = force_reg (<MODE>mode, op1);
2318 op2 = force_reg (<MODE>mode, op2);
2319 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2326 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2327 [(set (match_operand:VDQF_COND 0 "register_operand")
2329 (match_operator 3 "comparison_operator"
2330 [(match_operand:VDQF 4 "register_operand")
2331 (match_operand:VDQF 5 "nonmemory_operand")])
2332 (match_operand:VDQF_COND 1 "nonmemory_operand")
2333 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2337 int use_zero_form = 0;
2338 int swap_bsl_operands = 0;
2339 rtx op1 = operands[1];
2340 rtx op2 = operands[2];
2341 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2342 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2344 rtx (*base_comparison) (rtx, rtx, rtx);
2345 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2347 switch (GET_CODE (operands[3]))
2354 if (operands[5] == CONST0_RTX (<MODE>mode))
2361 if (!REG_P (operands[5]))
2362 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2365 switch (GET_CODE (operands[3]))
2375 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2376 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2384 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2385 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2390 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2391 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2397 switch (GET_CODE (operands[3]))
2404 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2405 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2411 Note that there also exist direct comparison against 0 forms,
2412 so catch those as a special case. */
2416 switch (GET_CODE (operands[3]))
2419 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2422 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2425 /* Do nothing, other zero form cases already have the correct
2432 emit_insn (base_comparison (mask, operands[4], operands[5]));
2434 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2441 /* FCM returns false for lanes which are unordered, so if we use
2442 the inverse of the comparison we actually want to emit, then
2443 swap the operands to BSL, we will end up with the correct result.
2444 Note that a NE NaN and NaN NE b are true for all a, b.
2446 Our transformations are:
2451 a NE b -> !(a EQ b) */
2454 emit_insn (base_comparison (mask, operands[4], operands[5]));
2456 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2458 swap_bsl_operands = 1;
2461 /* We check (a > b || b > a). combining these comparisons give us
2462 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2463 will then give us (a == b || a UNORDERED b) as intended. */
2465 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2466 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2467 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2468 swap_bsl_operands = 1;
2471 /* Operands are ORDERED iff (a > b || b >= a).
2472 Swapping the operands to BSL will give the UNORDERED case. */
2473 swap_bsl_operands = 1;
2476 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2477 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2478 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2484 if (swap_bsl_operands)
2490 /* If we have (a = (b CMP c) ? -1 : 0);
2491 Then we can simply move the generated mask. */
2493 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2494 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2495 emit_move_insn (operands[0], mask);
2499 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2501 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2502 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2509 (define_expand "vcond<mode><mode>"
2510 [(set (match_operand:VALLDI 0 "register_operand")
2511 (if_then_else:VALLDI
2512 (match_operator 3 "comparison_operator"
2513 [(match_operand:VALLDI 4 "register_operand")
2514 (match_operand:VALLDI 5 "nonmemory_operand")])
2515 (match_operand:VALLDI 1 "nonmemory_operand")
2516 (match_operand:VALLDI 2 "nonmemory_operand")))]
2519 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2520 operands[2], operands[3],
2521 operands[4], operands[5]));
2525 (define_expand "vcond<v_cmp_result><mode>"
2526 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2527 (if_then_else:<V_cmp_result>
2528 (match_operator 3 "comparison_operator"
2529 [(match_operand:VDQF 4 "register_operand")
2530 (match_operand:VDQF 5 "nonmemory_operand")])
2531 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2532 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2535 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2536 operands[0], operands[1],
2537 operands[2], operands[3],
2538 operands[4], operands[5]));
2542 (define_expand "vcondu<mode><mode>"
2543 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2544 (if_then_else:VSDQ_I_DI
2545 (match_operator 3 "comparison_operator"
2546 [(match_operand:VSDQ_I_DI 4 "register_operand")
2547 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2548 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2549 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2552 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2553 operands[2], operands[3],
2554 operands[4], operands[5]));
2558 ;; Patterns for AArch64 SIMD Intrinsics.
2560 ;; Lane extraction with sign extension to general purpose register.
2561 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2562 [(set (match_operand:GPI 0 "register_operand" "=r")
2565 (match_operand:VDQQH 1 "register_operand" "w")
2566 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2569 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2570 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2572 [(set_attr "type" "neon_to_gp<q>")]
2575 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2576 [(set (match_operand:SI 0 "register_operand" "=r")
2579 (match_operand:VDQQH 1 "register_operand" "w")
2580 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2583 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2584 return "umov\\t%w0, %1.<Vetype>[%2]";
2586 [(set_attr "type" "neon_to_gp<q>")]
2589 ;; Lane extraction of a value, neither sign nor zero extension
2590 ;; is guaranteed so upper bits should be considered undefined.
2591 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2592 (define_insn "aarch64_get_lane<mode>"
2593 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2595 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2596 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2599 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2600 switch (which_alternative)
2603 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2605 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2607 return "st1\\t{%1.<Vetype>}[%2], %0";
2612 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2615 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2618 (define_insn "*aarch64_combinez<mode>"
2619 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2621 (match_operand:VD_BHSI 1 "general_operand" "w,r,m")
2622 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2623 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2628 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2629 (set_attr "simd" "yes,*,yes")
2630 (set_attr "fp" "*,yes,*")]
2633 (define_insn "*aarch64_combinez_be<mode>"
2634 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2636 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2637 (match_operand:VD_BHSI 1 "general_operand" "w,r,m")))]
2638 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2643 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2644 (set_attr "simd" "yes,*,yes")
2645 (set_attr "fp" "*,yes,*")]
2648 (define_expand "aarch64_combine<mode>"
2649 [(match_operand:<VDBL> 0 "register_operand")
2650 (match_operand:VDC 1 "register_operand")
2651 (match_operand:VDC 2 "register_operand")]
2655 if (BYTES_BIG_ENDIAN)
2665 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2670 (define_insn_and_split "aarch64_combine_internal<mode>"
2671 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2672 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2673 (match_operand:VDC 2 "register_operand" "w")))]
2676 "&& reload_completed"
2679 if (BYTES_BIG_ENDIAN)
2680 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2682 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2685 [(set_attr "type" "multiple")]
2688 (define_expand "aarch64_simd_combine<mode>"
2689 [(match_operand:<VDBL> 0 "register_operand")
2690 (match_operand:VDC 1 "register_operand")
2691 (match_operand:VDC 2 "register_operand")]
2694 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2695 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2698 [(set_attr "type" "multiple")]
2701 ;; <su><addsub>l<q>.
2703 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2704 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2705 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2706 (match_operand:VQW 1 "register_operand" "w")
2707 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2708 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2709 (match_operand:VQW 2 "register_operand" "w")
2712 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2713 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2716 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2717 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2718 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2719 (match_operand:VQW 1 "register_operand" "w")
2720 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2721 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2722 (match_operand:VQW 2 "register_operand" "w")
2725 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2726 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2730 (define_expand "aarch64_saddl2<mode>"
2731 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2732 (match_operand:VQW 1 "register_operand" "w")
2733 (match_operand:VQW 2 "register_operand" "w")]
2736 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2737 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2742 (define_expand "aarch64_uaddl2<mode>"
2743 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2744 (match_operand:VQW 1 "register_operand" "w")
2745 (match_operand:VQW 2 "register_operand" "w")]
2748 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2749 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2754 (define_expand "aarch64_ssubl2<mode>"
2755 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2756 (match_operand:VQW 1 "register_operand" "w")
2757 (match_operand:VQW 2 "register_operand" "w")]
2760 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2761 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2766 (define_expand "aarch64_usubl2<mode>"
2767 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2768 (match_operand:VQW 1 "register_operand" "w")
2769 (match_operand:VQW 2 "register_operand" "w")]
2772 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2773 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2778 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2779 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2780 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2781 (match_operand:VD_BHSI 1 "register_operand" "w"))
2783 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2785 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2786 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2789 ;; <su><addsub>w<q>.
2791 (define_expand "widen_ssum<mode>3"
2792 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2793 (plus:<VDBLW> (sign_extend:<VDBLW>
2794 (match_operand:VQW 1 "register_operand" ""))
2795 (match_operand:<VDBLW> 2 "register_operand" "")))]
2798 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2799 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2801 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2803 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2808 (define_expand "widen_ssum<mode>3"
2809 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2810 (plus:<VWIDE> (sign_extend:<VWIDE>
2811 (match_operand:VD_BHSI 1 "register_operand" ""))
2812 (match_operand:<VWIDE> 2 "register_operand" "")))]
2815 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2819 (define_expand "widen_usum<mode>3"
2820 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2821 (plus:<VDBLW> (zero_extend:<VDBLW>
2822 (match_operand:VQW 1 "register_operand" ""))
2823 (match_operand:<VDBLW> 2 "register_operand" "")))]
2826 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2827 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2829 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2831 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2836 (define_expand "widen_usum<mode>3"
2837 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2838 (plus:<VWIDE> (zero_extend:<VWIDE>
2839 (match_operand:VD_BHSI 1 "register_operand" ""))
2840 (match_operand:<VWIDE> 2 "register_operand" "")))]
2843 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
2847 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2848 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2849 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2851 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2853 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2854 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2857 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
2858 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2859 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2862 (match_operand:VQW 2 "register_operand" "w")
2863 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
2865 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
2866 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2869 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2870 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2871 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2874 (match_operand:VQW 2 "register_operand" "w")
2875 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2877 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2878 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2881 (define_expand "aarch64_saddw2<mode>"
2882 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2883 (match_operand:<VWIDE> 1 "register_operand" "w")
2884 (match_operand:VQW 2 "register_operand" "w")]
2887 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2888 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2893 (define_expand "aarch64_uaddw2<mode>"
2894 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2895 (match_operand:<VWIDE> 1 "register_operand" "w")
2896 (match_operand:VQW 2 "register_operand" "w")]
2899 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2900 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2906 (define_expand "aarch64_ssubw2<mode>"
2907 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2908 (match_operand:<VWIDE> 1 "register_operand" "w")
2909 (match_operand:VQW 2 "register_operand" "w")]
2912 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2913 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2918 (define_expand "aarch64_usubw2<mode>"
2919 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2920 (match_operand:<VWIDE> 1 "register_operand" "w")
2921 (match_operand:VQW 2 "register_operand" "w")]
2924 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2925 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2930 ;; <su><r>h<addsub>.
2932 (define_insn "aarch64_<sur>h<addsub><mode>"
2933 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2934 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
2935 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
2938 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2939 [(set_attr "type" "neon_<addsub>_halve<q>")]
2942 ;; <r><addsub>hn<q>.
2944 (define_insn "aarch64_<sur><addsub>hn<mode>"
2945 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2946 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
2947 (match_operand:VQN 2 "register_operand" "w")]
2950 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
2951 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2954 (define_insn "aarch64_<sur><addsub>hn2<mode>"
2955 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2956 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
2957 (match_operand:VQN 2 "register_operand" "w")
2958 (match_operand:VQN 3 "register_operand" "w")]
2961 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
2962 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2967 (define_insn "aarch64_pmul<mode>"
2968 [(set (match_operand:VB 0 "register_operand" "=w")
2969 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
2970 (match_operand:VB 2 "register_operand" "w")]
2973 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2974 [(set_attr "type" "neon_mul_<Vetype><q>")]
2979 (define_insn "aarch64_fmulx<mode>"
2980 [(set (match_operand:VALLF 0 "register_operand" "=w")
2981 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
2982 (match_operand:VALLF 2 "register_operand" "w")]
2985 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2986 [(set_attr "type" "neon_fp_mul_<Vetype>")]
2989 ;; vmulxq_lane_f32, and vmulx_laneq_f32
2991 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
2992 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2994 [(match_operand:VDQSF 1 "register_operand" "w")
2995 (vec_duplicate:VDQSF
2997 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
2998 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3002 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3003 INTVAL (operands[3])));
3004 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3006 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3009 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3011 (define_insn "*aarch64_mulx_elt<mode>"
3012 [(set (match_operand:VDQF 0 "register_operand" "=w")
3014 [(match_operand:VDQF 1 "register_operand" "w")
3017 (match_operand:VDQF 2 "register_operand" "w")
3018 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3022 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3023 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3025 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3030 (define_insn "*aarch64_mulx_elt_to_64v2df"
3031 [(set (match_operand:V2DF 0 "register_operand" "=w")
3033 [(match_operand:V2DF 1 "register_operand" "w")
3035 (match_operand:DF 2 "register_operand" "w"))]
3039 return "fmulx\t%0.2d, %1.2d, %2.d[0]";
3041 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
3044 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3045 ;; vmulxd_lane_f64 == vmulx_lane_f64
3046 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3048 (define_insn "*aarch64_vgetfmulx<mode>"
3049 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3051 [(match_operand:<VEL> 1 "register_operand" "w")
3053 (match_operand:VDQF_DF 2 "register_operand" "w")
3054 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3058 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3059 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3061 [(set_attr "type" "fmul<Vetype>")]
3065 (define_insn "aarch64_<su_optab><optab><mode>"
3066 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3067 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3068 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3070 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3071 [(set_attr "type" "neon_<optab><q>")]
3074 ;; suqadd and usqadd
3076 (define_insn "aarch64_<sur>qadd<mode>"
3077 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3078 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3079 (match_operand:VSDQ_I 2 "register_operand" "w")]
3082 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3083 [(set_attr "type" "neon_qadd<q>")]
3088 (define_insn "aarch64_sqmovun<mode>"
3089 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3090 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3093 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3094 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3097 ;; sqmovn and uqmovn
3099 (define_insn "aarch64_<sur>qmovn<mode>"
3100 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3101 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3104 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3105 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3110 (define_insn "aarch64_s<optab><mode>"
3111 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3113 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3115 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3116 [(set_attr "type" "neon_<optab><q>")]
3121 (define_insn "aarch64_sq<r>dmulh<mode>"
3122 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3124 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3125 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3128 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3129 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3134 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3135 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3137 [(match_operand:VDQHS 1 "register_operand" "w")
3139 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3140 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3144 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3145 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3146 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3149 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3150 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3152 [(match_operand:VDQHS 1 "register_operand" "w")
3154 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3155 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3159 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3160 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3161 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3164 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3165 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3167 [(match_operand:SD_HSI 1 "register_operand" "w")
3169 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3170 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3174 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3175 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3176 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3179 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3180 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3182 [(match_operand:SD_HSI 1 "register_operand" "w")
3184 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3185 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3189 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3190 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3191 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3196 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3197 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3199 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3200 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3201 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3204 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3205 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3208 ;; sqrdml[as]h_lane.
3210 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3211 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3213 [(match_operand:VDQHS 1 "register_operand" "0")
3214 (match_operand:VDQHS 2 "register_operand" "w")
3216 (match_operand:<VCOND> 3 "register_operand" "w")
3217 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3221 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3223 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3225 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3228 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3229 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3231 [(match_operand:SD_HSI 1 "register_operand" "0")
3232 (match_operand:SD_HSI 2 "register_operand" "w")
3234 (match_operand:<VCOND> 3 "register_operand" "w")
3235 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3239 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3241 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3243 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3246 ;; sqrdml[as]h_laneq.
3248 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3249 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3251 [(match_operand:VDQHS 1 "register_operand" "0")
3252 (match_operand:VDQHS 2 "register_operand" "w")
3254 (match_operand:<VCONQ> 3 "register_operand" "w")
3255 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3259 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3261 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3263 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3266 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3267 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3269 [(match_operand:SD_HSI 1 "register_operand" "0")
3270 (match_operand:SD_HSI 2 "register_operand" "w")
3272 (match_operand:<VCONQ> 3 "register_operand" "w")
3273 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3277 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3279 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3281 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3286 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3287 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289 (match_operand:<VWIDE> 1 "register_operand" "0")
3292 (sign_extend:<VWIDE>
3293 (match_operand:VSD_HSI 2 "register_operand" "w"))
3294 (sign_extend:<VWIDE>
3295 (match_operand:VSD_HSI 3 "register_operand" "w")))
3298 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3299 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3304 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3305 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3307 (match_operand:<VWIDE> 1 "register_operand" "0")
3310 (sign_extend:<VWIDE>
3311 (match_operand:VD_HSI 2 "register_operand" "w"))
3312 (sign_extend:<VWIDE>
3313 (vec_duplicate:VD_HSI
3315 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3316 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3321 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3323 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3325 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3328 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3329 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3331 (match_operand:<VWIDE> 1 "register_operand" "0")
3334 (sign_extend:<VWIDE>
3335 (match_operand:VD_HSI 2 "register_operand" "w"))
3336 (sign_extend:<VWIDE>
3337 (vec_duplicate:VD_HSI
3339 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3340 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3345 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3347 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3349 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3352 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3353 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3355 (match_operand:<VWIDE> 1 "register_operand" "0")
3358 (sign_extend:<VWIDE>
3359 (match_operand:SD_HSI 2 "register_operand" "w"))
3360 (sign_extend:<VWIDE>
3362 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3363 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3368 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3370 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3372 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3375 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3376 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3378 (match_operand:<VWIDE> 1 "register_operand" "0")
3381 (sign_extend:<VWIDE>
3382 (match_operand:SD_HSI 2 "register_operand" "w"))
3383 (sign_extend:<VWIDE>
3385 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3386 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3391 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3393 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3395 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3400 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3401 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3403 (match_operand:<VWIDE> 1 "register_operand" "0")
3406 (sign_extend:<VWIDE>
3407 (match_operand:VD_HSI 2 "register_operand" "w"))
3408 (sign_extend:<VWIDE>
3409 (vec_duplicate:VD_HSI
3410 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3413 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3414 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3419 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3420 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3422 (match_operand:<VWIDE> 1 "register_operand" "0")
3425 (sign_extend:<VWIDE>
3427 (match_operand:VQ_HSI 2 "register_operand" "w")
3428 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3429 (sign_extend:<VWIDE>
3431 (match_operand:VQ_HSI 3 "register_operand" "w")
3435 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3436 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3439 (define_expand "aarch64_sqdmlal2<mode>"
3440 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3441 (match_operand:<VWIDE> 1 "register_operand" "w")
3442 (match_operand:VQ_HSI 2 "register_operand" "w")
3443 (match_operand:VQ_HSI 3 "register_operand" "w")]
3446 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3447 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3448 operands[2], operands[3], p));
3452 (define_expand "aarch64_sqdmlsl2<mode>"
3453 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3454 (match_operand:<VWIDE> 1 "register_operand" "w")
3455 (match_operand:VQ_HSI 2 "register_operand" "w")
3456 (match_operand:VQ_HSI 3 "register_operand" "w")]
3459 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3460 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3461 operands[2], operands[3], p));
3467 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3468 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3470 (match_operand:<VWIDE> 1 "register_operand" "0")
3473 (sign_extend:<VWIDE>
3475 (match_operand:VQ_HSI 2 "register_operand" "w")
3476 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3477 (sign_extend:<VWIDE>
3478 (vec_duplicate:<VHALF>
3480 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3481 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3486 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3488 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3490 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3493 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3494 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3496 (match_operand:<VWIDE> 1 "register_operand" "0")
3499 (sign_extend:<VWIDE>
3501 (match_operand:VQ_HSI 2 "register_operand" "w")
3502 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3503 (sign_extend:<VWIDE>
3504 (vec_duplicate:<VHALF>
3506 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3507 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3512 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3514 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3516 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3519 (define_expand "aarch64_sqdmlal2_lane<mode>"
3520 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3521 (match_operand:<VWIDE> 1 "register_operand" "w")
3522 (match_operand:VQ_HSI 2 "register_operand" "w")
3523 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3524 (match_operand:SI 4 "immediate_operand" "i")]
3527 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3528 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3529 operands[2], operands[3],
3534 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3535 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3536 (match_operand:<VWIDE> 1 "register_operand" "w")
3537 (match_operand:VQ_HSI 2 "register_operand" "w")
3538 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3539 (match_operand:SI 4 "immediate_operand" "i")]
3542 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3543 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3544 operands[2], operands[3],
3549 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3550 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3551 (match_operand:<VWIDE> 1 "register_operand" "w")
3552 (match_operand:VQ_HSI 2 "register_operand" "w")
3553 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3554 (match_operand:SI 4 "immediate_operand" "i")]
3557 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3558 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3559 operands[2], operands[3],
3564 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3565 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3566 (match_operand:<VWIDE> 1 "register_operand" "w")
3567 (match_operand:VQ_HSI 2 "register_operand" "w")
3568 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3569 (match_operand:SI 4 "immediate_operand" "i")]
3572 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3573 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3574 operands[2], operands[3],
3579 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3580 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3582 (match_operand:<VWIDE> 1 "register_operand" "0")
3585 (sign_extend:<VWIDE>
3587 (match_operand:VQ_HSI 2 "register_operand" "w")
3588 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3589 (sign_extend:<VWIDE>
3590 (vec_duplicate:<VHALF>
3591 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3594 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3595 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3598 (define_expand "aarch64_sqdmlal2_n<mode>"
3599 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3600 (match_operand:<VWIDE> 1 "register_operand" "w")
3601 (match_operand:VQ_HSI 2 "register_operand" "w")
3602 (match_operand:<VEL> 3 "register_operand" "w")]
3605 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3606 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3607 operands[2], operands[3],
3612 (define_expand "aarch64_sqdmlsl2_n<mode>"
3613 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3614 (match_operand:<VWIDE> 1 "register_operand" "w")
3615 (match_operand:VQ_HSI 2 "register_operand" "w")
3616 (match_operand:<VEL> 3 "register_operand" "w")]
3619 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3620 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3621 operands[2], operands[3],
3628 (define_insn "aarch64_sqdmull<mode>"
3629 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3632 (sign_extend:<VWIDE>
3633 (match_operand:VSD_HSI 1 "register_operand" "w"))
3634 (sign_extend:<VWIDE>
3635 (match_operand:VSD_HSI 2 "register_operand" "w")))
3638 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3639 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3644 (define_insn "aarch64_sqdmull_lane<mode>"
3645 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3648 (sign_extend:<VWIDE>
3649 (match_operand:VD_HSI 1 "register_operand" "w"))
3650 (sign_extend:<VWIDE>
3651 (vec_duplicate:VD_HSI
3653 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3654 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3659 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3660 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3662 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3665 (define_insn "aarch64_sqdmull_laneq<mode>"
3666 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3669 (sign_extend:<VWIDE>
3670 (match_operand:VD_HSI 1 "register_operand" "w"))
3671 (sign_extend:<VWIDE>
3672 (vec_duplicate:VD_HSI
3674 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3675 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3680 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3681 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3683 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3686 (define_insn "aarch64_sqdmull_lane<mode>"
3687 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3690 (sign_extend:<VWIDE>
3691 (match_operand:SD_HSI 1 "register_operand" "w"))
3692 (sign_extend:<VWIDE>
3694 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3695 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3700 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3701 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3703 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3706 (define_insn "aarch64_sqdmull_laneq<mode>"
3707 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3710 (sign_extend:<VWIDE>
3711 (match_operand:SD_HSI 1 "register_operand" "w"))
3712 (sign_extend:<VWIDE>
3714 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3715 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3720 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3721 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3723 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3728 (define_insn "aarch64_sqdmull_n<mode>"
3729 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3732 (sign_extend:<VWIDE>
3733 (match_operand:VD_HSI 1 "register_operand" "w"))
3734 (sign_extend:<VWIDE>
3735 (vec_duplicate:VD_HSI
3736 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3740 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3741 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3748 (define_insn "aarch64_sqdmull2<mode>_internal"
3749 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3752 (sign_extend:<VWIDE>
3754 (match_operand:VQ_HSI 1 "register_operand" "w")
3755 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3756 (sign_extend:<VWIDE>
3758 (match_operand:VQ_HSI 2 "register_operand" "w")
3763 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3764 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3767 (define_expand "aarch64_sqdmull2<mode>"
3768 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3769 (match_operand:VQ_HSI 1 "register_operand" "w")
3770 (match_operand:VQ_HSI 2 "register_operand" "w")]
3773 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3774 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3781 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3782 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3785 (sign_extend:<VWIDE>
3787 (match_operand:VQ_HSI 1 "register_operand" "w")
3788 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3789 (sign_extend:<VWIDE>
3790 (vec_duplicate:<VHALF>
3792 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3793 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3798 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3799 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3801 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3804 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3805 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3808 (sign_extend:<VWIDE>
3810 (match_operand:VQ_HSI 1 "register_operand" "w")
3811 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3812 (sign_extend:<VWIDE>
3813 (vec_duplicate:<VHALF>
3815 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3816 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3821 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3822 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3824 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3827 (define_expand "aarch64_sqdmull2_lane<mode>"
3828 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3829 (match_operand:VQ_HSI 1 "register_operand" "w")
3830 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3831 (match_operand:SI 3 "immediate_operand" "i")]
3834 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3835 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3836 operands[2], operands[3],
3841 (define_expand "aarch64_sqdmull2_laneq<mode>"
3842 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3843 (match_operand:VQ_HSI 1 "register_operand" "w")
3844 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3845 (match_operand:SI 3 "immediate_operand" "i")]
3848 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3849 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3850 operands[2], operands[3],
3857 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3858 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3861 (sign_extend:<VWIDE>
3863 (match_operand:VQ_HSI 1 "register_operand" "w")
3864 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3865 (sign_extend:<VWIDE>
3866 (vec_duplicate:<VHALF>
3867 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3871 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3872 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3875 (define_expand "aarch64_sqdmull2_n<mode>"
3876 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3877 (match_operand:VQ_HSI 1 "register_operand" "w")
3878 (match_operand:<VEL> 2 "register_operand" "w")]
3881 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3882 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3889 (define_insn "aarch64_<sur>shl<mode>"
3890 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3892 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3893 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3896 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3897 [(set_attr "type" "neon_shift_reg<q>")]
3903 (define_insn "aarch64_<sur>q<r>shl<mode>"
3904 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3906 [(match_operand:VSDQ_I 1 "register_operand" "w")
3907 (match_operand:VSDQ_I 2 "register_operand" "w")]
3910 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3911 [(set_attr "type" "neon_sat_shift_reg<q>")]
3916 (define_insn "aarch64_<sur>shll_n<mode>"
3917 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3918 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
3920 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
3924 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3925 if (INTVAL (operands[2]) == bit_width)
3927 return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3930 return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3932 [(set_attr "type" "neon_shift_imm_long")]
3937 (define_insn "aarch64_<sur>shll2_n<mode>"
3938 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3939 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
3940 (match_operand:SI 2 "immediate_operand" "i")]
3944 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3945 if (INTVAL (operands[2]) == bit_width)
3947 return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3950 return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3952 [(set_attr "type" "neon_shift_imm_long")]
3957 (define_insn "aarch64_<sur>shr_n<mode>"
3958 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3959 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3961 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3964 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3965 [(set_attr "type" "neon_sat_shift_imm<q>")]
3970 (define_insn "aarch64_<sur>sra_n<mode>"
3971 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3972 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3973 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3975 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3978 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3979 [(set_attr "type" "neon_shift_acc<q>")]
3984 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
3985 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3986 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3987 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3989 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
3992 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3993 [(set_attr "type" "neon_shift_imm<q>")]
3998 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
3999 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4000 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4002 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4005 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4006 [(set_attr "type" "neon_sat_shift_imm<q>")]
4012 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4013 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4014 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4016 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4019 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4020 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4024 ;; cm(eq|ge|gt|lt|le)
4025 ;; Note, we have constraints for Dz and Z as different expanders
4026 ;; have different ideas of what should be passed to this pattern.
4028 (define_insn "aarch64_cm<optab><mode>"
4029 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4031 (COMPARISONS:<V_cmp_result>
4032 (match_operand:VDQ_I 1 "register_operand" "w,w")
4033 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4037 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4038 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4039 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4042 (define_insn_and_split "aarch64_cm<optab>di"
4043 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4046 (match_operand:DI 1 "register_operand" "w,w,r")
4047 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4049 (clobber (reg:CC CC_REGNUM))]
4053 [(set (match_operand:DI 0 "register_operand")
4056 (match_operand:DI 1 "register_operand")
4057 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4060 /* If we are in the general purpose register file,
4061 we split to a sequence of comparison and store. */
4062 if (GP_REGNUM_P (REGNO (operands[0]))
4063 && GP_REGNUM_P (REGNO (operands[1])))
4065 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4066 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4067 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4068 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4071 /* Otherwise, we expand to a similar pattern which does not
4072 clobber CC_REGNUM. */
4074 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4077 (define_insn "*aarch64_cm<optab>di"
4078 [(set (match_operand:DI 0 "register_operand" "=w,w")
4081 (match_operand:DI 1 "register_operand" "w,w")
4082 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4084 "TARGET_SIMD && reload_completed"
4086 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4087 cm<optab>\t%d0, %d1, #0"
4088 [(set_attr "type" "neon_compare, neon_compare_zero")]
4093 (define_insn "aarch64_cm<optab><mode>"
4094 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4096 (UCOMPARISONS:<V_cmp_result>
4097 (match_operand:VDQ_I 1 "register_operand" "w")
4098 (match_operand:VDQ_I 2 "register_operand" "w")
4101 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4102 [(set_attr "type" "neon_compare<q>")]
4105 (define_insn_and_split "aarch64_cm<optab>di"
4106 [(set (match_operand:DI 0 "register_operand" "=w,r")
4109 (match_operand:DI 1 "register_operand" "w,r")
4110 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4112 (clobber (reg:CC CC_REGNUM))]
4116 [(set (match_operand:DI 0 "register_operand")
4119 (match_operand:DI 1 "register_operand")
4120 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4123 /* If we are in the general purpose register file,
4124 we split to a sequence of comparison and store. */
4125 if (GP_REGNUM_P (REGNO (operands[0]))
4126 && GP_REGNUM_P (REGNO (operands[1])))
4128 machine_mode mode = CCmode;
4129 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4130 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4131 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4134 /* Otherwise, we expand to a similar pattern which does not
4135 clobber CC_REGNUM. */
4137 [(set_attr "type" "neon_compare,multiple")]
4140 (define_insn "*aarch64_cm<optab>di"
4141 [(set (match_operand:DI 0 "register_operand" "=w")
4144 (match_operand:DI 1 "register_operand" "w")
4145 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4147 "TARGET_SIMD && reload_completed"
4148 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4149 [(set_attr "type" "neon_compare")]
4154 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4155 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
4156 ;; not (neg (eq (and x y) 0))
4157 ;; which is rewritten by simplify_rtx as
4158 ;; plus (eq (and x y) 0) -1.
4160 (define_insn "aarch64_cmtst<mode>"
4161 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4162 (plus:<V_cmp_result>
4165 (match_operand:VDQ_I 1 "register_operand" "w")
4166 (match_operand:VDQ_I 2 "register_operand" "w"))
4167 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4168 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4171 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4172 [(set_attr "type" "neon_tst<q>")]
4175 (define_insn_and_split "aarch64_cmtstdi"
4176 [(set (match_operand:DI 0 "register_operand" "=w,r")
4180 (match_operand:DI 1 "register_operand" "w,r")
4181 (match_operand:DI 2 "register_operand" "w,r"))
4183 (clobber (reg:CC CC_REGNUM))]
4187 [(set (match_operand:DI 0 "register_operand")
4191 (match_operand:DI 1 "register_operand")
4192 (match_operand:DI 2 "register_operand"))
4195 /* If we are in the general purpose register file,
4196 we split to a sequence of comparison and store. */
4197 if (GP_REGNUM_P (REGNO (operands[0]))
4198 && GP_REGNUM_P (REGNO (operands[1])))
4200 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4201 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4202 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4203 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4204 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4207 /* Otherwise, we expand to a similar pattern which does not
4208 clobber CC_REGNUM. */
4210 [(set_attr "type" "neon_tst,multiple")]
4213 (define_insn "*aarch64_cmtstdi"
4214 [(set (match_operand:DI 0 "register_operand" "=w")
4218 (match_operand:DI 1 "register_operand" "w")
4219 (match_operand:DI 2 "register_operand" "w"))
4222 "cmtst\t%d0, %d1, %d2"
4223 [(set_attr "type" "neon_tst")]
4226 ;; fcm(eq|ge|gt|le|lt)
4228 (define_insn "aarch64_cm<optab><mode>"
4229 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4231 (COMPARISONS:<V_cmp_result>
4232 (match_operand:VALLF 1 "register_operand" "w,w")
4233 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4237 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4238 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4239 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
4243 ;; Note we can also handle what would be fac(le|lt) by
4244 ;; generating fac(ge|gt).
4246 (define_insn "*aarch64_fac<optab><mode>"
4247 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4249 (FAC_COMPARISONS:<V_cmp_result>
4250 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
4251 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
4254 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4255 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
4260 (define_insn "aarch64_addp<mode>"
4261 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4263 [(match_operand:VD_BHSI 1 "register_operand" "w")
4264 (match_operand:VD_BHSI 2 "register_operand" "w")]
4267 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4268 [(set_attr "type" "neon_reduc_add<q>")]
4271 (define_insn "aarch64_addpdi"
4272 [(set (match_operand:DI 0 "register_operand" "=w")
4274 [(match_operand:V2DI 1 "register_operand" "w")]
4278 [(set_attr "type" "neon_reduc_add")]
4283 (define_insn "sqrt<mode>2"
4284 [(set (match_operand:VDQF 0 "register_operand" "=w")
4285 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
4287 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4288 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
4291 ;; Patterns for vector struct loads and stores.
4293 (define_insn "aarch64_simd_ld2<mode>"
4294 [(set (match_operand:OI 0 "register_operand" "=w")
4295 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4296 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4299 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4300 [(set_attr "type" "neon_load2_2reg<q>")]
4303 (define_insn "aarch64_simd_ld2r<mode>"
4304 [(set (match_operand:OI 0 "register_operand" "=w")
4305 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4306 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4309 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4310 [(set_attr "type" "neon_load2_all_lanes<q>")]
4313 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4314 [(set (match_operand:OI 0 "register_operand" "=w")
4315 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4316 (match_operand:OI 2 "register_operand" "0")
4317 (match_operand:SI 3 "immediate_operand" "i")
4318 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4322 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4323 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4325 [(set_attr "type" "neon_load2_one_lane")]
4328 (define_expand "vec_load_lanesoi<mode>"
4329 [(set (match_operand:OI 0 "register_operand" "=w")
4330 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4331 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4335 if (BYTES_BIG_ENDIAN)
4337 rtx tmp = gen_reg_rtx (OImode);
4338 rtx mask = aarch64_reverse_mask (<MODE>mode);
4339 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4340 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4343 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4347 (define_insn "aarch64_simd_st2<mode>"
4348 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4349 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4350 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4353 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4354 [(set_attr "type" "neon_store2_2reg<q>")]
4357 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4358 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4359 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4360 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4361 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4362 (match_operand:SI 2 "immediate_operand" "i")]
4366 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4367 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4369 [(set_attr "type" "neon_store2_one_lane<q>")]
4372 (define_expand "vec_store_lanesoi<mode>"
4373 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4374 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4375 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4379 if (BYTES_BIG_ENDIAN)
4381 rtx tmp = gen_reg_rtx (OImode);
4382 rtx mask = aarch64_reverse_mask (<MODE>mode);
4383 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4384 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4387 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4391 (define_insn "aarch64_simd_ld3<mode>"
4392 [(set (match_operand:CI 0 "register_operand" "=w")
4393 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4394 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4397 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4398 [(set_attr "type" "neon_load3_3reg<q>")]
4401 (define_insn "aarch64_simd_ld3r<mode>"
4402 [(set (match_operand:CI 0 "register_operand" "=w")
4403 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4404 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4407 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4408 [(set_attr "type" "neon_load3_all_lanes<q>")]
4411 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4412 [(set (match_operand:CI 0 "register_operand" "=w")
4413 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4414 (match_operand:CI 2 "register_operand" "0")
4415 (match_operand:SI 3 "immediate_operand" "i")
4416 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4420 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4421 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4423 [(set_attr "type" "neon_load3_one_lane")]
4426 (define_expand "vec_load_lanesci<mode>"
4427 [(set (match_operand:CI 0 "register_operand" "=w")
4428 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4429 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4433 if (BYTES_BIG_ENDIAN)
4435 rtx tmp = gen_reg_rtx (CImode);
4436 rtx mask = aarch64_reverse_mask (<MODE>mode);
4437 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4438 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4441 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4445 (define_insn "aarch64_simd_st3<mode>"
4446 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4447 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4448 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4451 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4452 [(set_attr "type" "neon_store3_3reg<q>")]
4455 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4456 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4457 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4458 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4459 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4460 (match_operand:SI 2 "immediate_operand" "i")]
4464 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4465 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4467 [(set_attr "type" "neon_store3_one_lane<q>")]
4470 (define_expand "vec_store_lanesci<mode>"
4471 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4472 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4473 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4477 if (BYTES_BIG_ENDIAN)
4479 rtx tmp = gen_reg_rtx (CImode);
4480 rtx mask = aarch64_reverse_mask (<MODE>mode);
4481 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4482 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4485 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4489 (define_insn "aarch64_simd_ld4<mode>"
4490 [(set (match_operand:XI 0 "register_operand" "=w")
4491 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4492 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4495 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4496 [(set_attr "type" "neon_load4_4reg<q>")]
4499 (define_insn "aarch64_simd_ld4r<mode>"
4500 [(set (match_operand:XI 0 "register_operand" "=w")
4501 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4502 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4505 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4506 [(set_attr "type" "neon_load4_all_lanes<q>")]
4509 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4510 [(set (match_operand:XI 0 "register_operand" "=w")
4511 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4512 (match_operand:XI 2 "register_operand" "0")
4513 (match_operand:SI 3 "immediate_operand" "i")
4514 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4518 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4519 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4521 [(set_attr "type" "neon_load4_one_lane")]
4524 (define_expand "vec_load_lanesxi<mode>"
4525 [(set (match_operand:XI 0 "register_operand" "=w")
4526 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4527 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4531 if (BYTES_BIG_ENDIAN)
4533 rtx tmp = gen_reg_rtx (XImode);
4534 rtx mask = aarch64_reverse_mask (<MODE>mode);
4535 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4536 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4539 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4543 (define_insn "aarch64_simd_st4<mode>"
4544 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4545 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4546 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4549 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4550 [(set_attr "type" "neon_store4_4reg<q>")]
4553 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4554 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4555 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4556 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4557 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4558 (match_operand:SI 2 "immediate_operand" "i")]
4562 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4563 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4565 [(set_attr "type" "neon_store4_one_lane<q>")]
4568 (define_expand "vec_store_lanesxi<mode>"
4569 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4570 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4571 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4575 if (BYTES_BIG_ENDIAN)
4577 rtx tmp = gen_reg_rtx (XImode);
4578 rtx mask = aarch64_reverse_mask (<MODE>mode);
4579 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4580 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4583 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4587 (define_insn_and_split "aarch64_rev_reglist<mode>"
4588 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4590 [(match_operand:VSTRUCT 1 "register_operand" "w")
4591 (match_operand:V16QI 2 "register_operand" "w")]
4592 UNSPEC_REV_REGLIST))]
4595 "&& reload_completed"
4599 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4600 for (i = 0; i < nregs; i++)
4602 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4603 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4604 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4608 [(set_attr "type" "neon_tbl1_q")
4609 (set_attr "length" "<insn_count>")]
4612 ;; Reload patterns for AdvSIMD register list operands.
4614 (define_expand "mov<mode>"
4615 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4616 (match_operand:VSTRUCT 1 "general_operand" ""))]
4619 if (can_create_pseudo_p ())
4621 if (GET_CODE (operands[0]) != REG)
4622 operands[1] = force_reg (<MODE>mode, operands[1]);
4626 (define_insn "*aarch64_mov<mode>"
4627 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4628 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4629 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4630 && (register_operand (operands[0], <MODE>mode)
4631 || register_operand (operands[1], <MODE>mode))"
4634 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4635 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4636 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4637 neon_load<nregs>_<nregs>reg_q")
4638 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4641 (define_insn "aarch64_be_ld1<mode>"
4642 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4643 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4644 "aarch64_simd_struct_operand" "Utv")]
4647 "ld1\\t{%0<Vmtype>}, %1"
4648 [(set_attr "type" "neon_load1_1reg<q>")]
4651 (define_insn "aarch64_be_st1<mode>"
4652 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4653 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4656 "st1\\t{%1<Vmtype>}, %0"
4657 [(set_attr "type" "neon_store1_1reg<q>")]
4660 (define_insn "*aarch64_be_movoi"
4661 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4662 (match_operand:OI 1 "general_operand" " w,w,m"))]
4663 "TARGET_SIMD && BYTES_BIG_ENDIAN
4664 && (register_operand (operands[0], OImode)
4665 || register_operand (operands[1], OImode))"
4670 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4671 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4674 (define_insn "*aarch64_be_movci"
4675 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4676 (match_operand:CI 1 "general_operand" " w,w,o"))]
4677 "TARGET_SIMD && BYTES_BIG_ENDIAN
4678 && (register_operand (operands[0], CImode)
4679 || register_operand (operands[1], CImode))"
4681 [(set_attr "type" "multiple")
4682 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4685 (define_insn "*aarch64_be_movxi"
4686 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4687 (match_operand:XI 1 "general_operand" " w,w,o"))]
4688 "TARGET_SIMD && BYTES_BIG_ENDIAN
4689 && (register_operand (operands[0], XImode)
4690 || register_operand (operands[1], XImode))"
4692 [(set_attr "type" "multiple")
4693 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4697 [(set (match_operand:OI 0 "register_operand")
4698 (match_operand:OI 1 "register_operand"))]
4699 "TARGET_SIMD && reload_completed"
4702 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4707 [(set (match_operand:CI 0 "nonimmediate_operand")
4708 (match_operand:CI 1 "general_operand"))]
4709 "TARGET_SIMD && reload_completed"
4712 if (register_operand (operands[0], CImode)
4713 && register_operand (operands[1], CImode))
4715 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4718 else if (BYTES_BIG_ENDIAN)
4720 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4721 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4722 emit_move_insn (gen_lowpart (V16QImode,
4723 simplify_gen_subreg (TImode, operands[0],
4725 gen_lowpart (V16QImode,
4726 simplify_gen_subreg (TImode, operands[1],
4735 [(set (match_operand:XI 0 "nonimmediate_operand")
4736 (match_operand:XI 1 "general_operand"))]
4737 "TARGET_SIMD && reload_completed"
4740 if (register_operand (operands[0], XImode)
4741 && register_operand (operands[1], XImode))
4743 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4746 else if (BYTES_BIG_ENDIAN)
4748 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4749 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4750 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4751 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4758 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4759 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4760 (match_operand:DI 1 "register_operand" "w")
4761 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4764 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4765 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4768 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4773 (define_insn "aarch64_ld2<mode>_dreg"
4774 [(set (match_operand:OI 0 "register_operand" "=w")
4779 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4781 (vec_duplicate:VD (const_int 0)))
4783 (unspec:VD [(match_dup 1)]
4785 (vec_duplicate:VD (const_int 0)))) 0))]
4787 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4788 [(set_attr "type" "neon_load2_2reg<q>")]
4791 (define_insn "aarch64_ld2<mode>_dreg"
4792 [(set (match_operand:OI 0 "register_operand" "=w")
4797 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4801 (unspec:DX [(match_dup 1)]
4803 (const_int 0))) 0))]
4805 "ld1\\t{%S0.1d - %T0.1d}, %1"
4806 [(set_attr "type" "neon_load1_2reg<q>")]
4809 (define_insn "aarch64_ld3<mode>_dreg"
4810 [(set (match_operand:CI 0 "register_operand" "=w")
4816 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4818 (vec_duplicate:VD (const_int 0)))
4820 (unspec:VD [(match_dup 1)]
4822 (vec_duplicate:VD (const_int 0))))
4824 (unspec:VD [(match_dup 1)]
4826 (vec_duplicate:VD (const_int 0)))) 0))]
4828 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4829 [(set_attr "type" "neon_load3_3reg<q>")]
4832 (define_insn "aarch64_ld3<mode>_dreg"
4833 [(set (match_operand:CI 0 "register_operand" "=w")
4839 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4843 (unspec:DX [(match_dup 1)]
4847 (unspec:DX [(match_dup 1)]
4849 (const_int 0))) 0))]
4851 "ld1\\t{%S0.1d - %U0.1d}, %1"
4852 [(set_attr "type" "neon_load1_3reg<q>")]
4855 (define_insn "aarch64_ld4<mode>_dreg"
4856 [(set (match_operand:XI 0 "register_operand" "=w")
4862 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4864 (vec_duplicate:VD (const_int 0)))
4866 (unspec:VD [(match_dup 1)]
4868 (vec_duplicate:VD (const_int 0))))
4871 (unspec:VD [(match_dup 1)]
4873 (vec_duplicate:VD (const_int 0)))
4875 (unspec:VD [(match_dup 1)]
4877 (vec_duplicate:VD (const_int 0))))) 0))]
4879 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4880 [(set_attr "type" "neon_load4_4reg<q>")]
4883 (define_insn "aarch64_ld4<mode>_dreg"
4884 [(set (match_operand:XI 0 "register_operand" "=w")
4890 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4894 (unspec:DX [(match_dup 1)]
4899 (unspec:DX [(match_dup 1)]
4903 (unspec:DX [(match_dup 1)]
4905 (const_int 0)))) 0))]
4907 "ld1\\t{%S0.1d - %V0.1d}, %1"
4908 [(set_attr "type" "neon_load1_4reg<q>")]
4911 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
4912 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4913 (match_operand:DI 1 "register_operand" "r")
4914 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4917 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4918 set_mem_size (mem, <VSTRUCT:nregs> * 8);
4920 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
4924 (define_expand "aarch64_ld1<VALL_F16:mode>"
4925 [(match_operand:VALL_F16 0 "register_operand")
4926 (match_operand:DI 1 "register_operand")]
4929 machine_mode mode = <VALL_F16:MODE>mode;
4930 rtx mem = gen_rtx_MEM (mode, operands[1]);
4932 if (BYTES_BIG_ENDIAN)
4933 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
4935 emit_move_insn (operands[0], mem);
4939 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
4940 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4941 (match_operand:DI 1 "register_operand" "r")
4942 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4945 machine_mode mode = <VSTRUCT:MODE>mode;
4946 rtx mem = gen_rtx_MEM (mode, operands[1]);
4948 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
4952 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
4953 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4954 (match_operand:DI 1 "register_operand" "w")
4955 (match_operand:VSTRUCT 2 "register_operand" "0")
4956 (match_operand:SI 3 "immediate_operand" "i")
4957 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4960 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4961 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4964 aarch64_simd_lane_bounds (operands[3], 0,
4965 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
4967 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
4968 operands[0], mem, operands[2], operands[3]));
4972 ;; Expanders for builtins to extract vector registers from large
4973 ;; opaque integer modes.
4977 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
4978 [(match_operand:VDC 0 "register_operand" "=w")
4979 (match_operand:VSTRUCT 1 "register_operand" "w")
4980 (match_operand:SI 2 "immediate_operand" "i")]
4983 int part = INTVAL (operands[2]);
4984 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
4985 int offset = part * 16;
4987 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
4988 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
4994 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
4995 [(match_operand:VQ 0 "register_operand" "=w")
4996 (match_operand:VSTRUCT 1 "register_operand" "w")
4997 (match_operand:SI 2 "immediate_operand" "i")]
5000 int part = INTVAL (operands[2]);
5001 int offset = part * 16;
5003 emit_move_insn (operands[0],
5004 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5008 ;; Permuted-store expanders for neon intrinsics.
5010 ;; Permute instructions
5014 (define_expand "vec_perm_const<mode>"
5015 [(match_operand:VALL_F16 0 "register_operand")
5016 (match_operand:VALL_F16 1 "register_operand")
5017 (match_operand:VALL_F16 2 "register_operand")
5018 (match_operand:<V_cmp_result> 3)]
5021 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5022 operands[2], operands[3]))
5028 (define_expand "vec_perm<mode>"
5029 [(match_operand:VB 0 "register_operand")
5030 (match_operand:VB 1 "register_operand")
5031 (match_operand:VB 2 "register_operand")
5032 (match_operand:VB 3 "register_operand")]
5035 aarch64_expand_vec_perm (operands[0], operands[1],
5036 operands[2], operands[3]);
5040 (define_insn "aarch64_tbl1<mode>"
5041 [(set (match_operand:VB 0 "register_operand" "=w")
5042 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5043 (match_operand:VB 2 "register_operand" "w")]
5046 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5047 [(set_attr "type" "neon_tbl1<q>")]
5050 ;; Two source registers.
5052 (define_insn "aarch64_tbl2v16qi"
5053 [(set (match_operand:V16QI 0 "register_operand" "=w")
5054 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5055 (match_operand:V16QI 2 "register_operand" "w")]
5058 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5059 [(set_attr "type" "neon_tbl2_q")]
5062 (define_insn "aarch64_tbl3<mode>"
5063 [(set (match_operand:VB 0 "register_operand" "=w")
5064 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5065 (match_operand:VB 2 "register_operand" "w")]
5068 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5069 [(set_attr "type" "neon_tbl3")]
5072 (define_insn "aarch64_tbx4<mode>"
5073 [(set (match_operand:VB 0 "register_operand" "=w")
5074 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5075 (match_operand:OI 2 "register_operand" "w")
5076 (match_operand:VB 3 "register_operand" "w")]
5079 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5080 [(set_attr "type" "neon_tbl4")]
5083 ;; Three source registers.
5085 (define_insn "aarch64_qtbl3<mode>"
5086 [(set (match_operand:VB 0 "register_operand" "=w")
5087 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5088 (match_operand:VB 2 "register_operand" "w")]
5091 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5092 [(set_attr "type" "neon_tbl3")]
5095 (define_insn "aarch64_qtbx3<mode>"
5096 [(set (match_operand:VB 0 "register_operand" "=w")
5097 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5098 (match_operand:CI 2 "register_operand" "w")
5099 (match_operand:VB 3 "register_operand" "w")]
5102 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5103 [(set_attr "type" "neon_tbl3")]
5106 ;; Four source registers.
5108 (define_insn "aarch64_qtbl4<mode>"
5109 [(set (match_operand:VB 0 "register_operand" "=w")
5110 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5111 (match_operand:VB 2 "register_operand" "w")]
5114 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5115 [(set_attr "type" "neon_tbl4")]
5118 (define_insn "aarch64_qtbx4<mode>"
5119 [(set (match_operand:VB 0 "register_operand" "=w")
5120 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5121 (match_operand:XI 2 "register_operand" "w")
5122 (match_operand:VB 3 "register_operand" "w")]
5125 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5126 [(set_attr "type" "neon_tbl4")]
5129 (define_insn_and_split "aarch64_combinev16qi"
5130 [(set (match_operand:OI 0 "register_operand" "=w")
5131 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5132 (match_operand:V16QI 2 "register_operand" "w")]
5136 "&& reload_completed"
5139 aarch64_split_combinev16qi (operands);
5142 [(set_attr "type" "multiple")]
5145 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5146 [(set (match_operand:VALL 0 "register_operand" "=w")
5147 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
5148 (match_operand:VALL 2 "register_operand" "w")]
5151 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5152 [(set_attr "type" "neon_permute<q>")]
5155 ;; Note immediate (third) operand is lane index not byte index.
5156 (define_insn "aarch64_ext<mode>"
5157 [(set (match_operand:VALL 0 "register_operand" "=w")
5158 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
5159 (match_operand:VALL 2 "register_operand" "w")
5160 (match_operand:SI 3 "immediate_operand" "i")]
5164 operands[3] = GEN_INT (INTVAL (operands[3])
5165 * GET_MODE_UNIT_SIZE (<MODE>mode));
5166 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5168 [(set_attr "type" "neon_ext<q>")]
5171 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5172 [(set (match_operand:VALL 0 "register_operand" "=w")
5173 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")]
5176 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5177 [(set_attr "type" "neon_rev<q>")]
5180 (define_insn "aarch64_st2<mode>_dreg"
5181 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5182 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5183 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5186 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5187 [(set_attr "type" "neon_store2_2reg")]
5190 (define_insn "aarch64_st2<mode>_dreg"
5191 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5192 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5193 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5196 "st1\\t{%S1.1d - %T1.1d}, %0"
5197 [(set_attr "type" "neon_store1_2reg")]
5200 (define_insn "aarch64_st3<mode>_dreg"
5201 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5202 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5203 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5206 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5207 [(set_attr "type" "neon_store3_3reg")]
5210 (define_insn "aarch64_st3<mode>_dreg"
5211 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5212 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5213 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5216 "st1\\t{%S1.1d - %U1.1d}, %0"
5217 [(set_attr "type" "neon_store1_3reg")]
5220 (define_insn "aarch64_st4<mode>_dreg"
5221 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5222 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5223 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5226 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5227 [(set_attr "type" "neon_store4_4reg")]
5230 (define_insn "aarch64_st4<mode>_dreg"
5231 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5232 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5233 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5236 "st1\\t{%S1.1d - %V1.1d}, %0"
5237 [(set_attr "type" "neon_store1_4reg")]
5240 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5241 [(match_operand:DI 0 "register_operand" "r")
5242 (match_operand:VSTRUCT 1 "register_operand" "w")
5243 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5246 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5247 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5249 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5253 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5254 [(match_operand:DI 0 "register_operand" "r")
5255 (match_operand:VSTRUCT 1 "register_operand" "w")
5256 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5259 machine_mode mode = <VSTRUCT:MODE>mode;
5260 rtx mem = gen_rtx_MEM (mode, operands[0]);
5262 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5266 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5267 [(match_operand:DI 0 "register_operand" "r")
5268 (match_operand:VSTRUCT 1 "register_operand" "w")
5269 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5270 (match_operand:SI 2 "immediate_operand")]
5273 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5274 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5277 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5278 mem, operands[1], operands[2]));
5282 (define_expand "aarch64_st1<VALL_F16:mode>"
5283 [(match_operand:DI 0 "register_operand")
5284 (match_operand:VALL_F16 1 "register_operand")]
5287 machine_mode mode = <VALL_F16:MODE>mode;
5288 rtx mem = gen_rtx_MEM (mode, operands[0]);
5290 if (BYTES_BIG_ENDIAN)
5291 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5293 emit_move_insn (mem, operands[1]);
5297 ;; Expander for builtins to insert vector registers into large
5298 ;; opaque integer modes.
5300 ;; Q-register list. We don't need a D-reg inserter as we zero
5301 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5303 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5304 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5305 (match_operand:VSTRUCT 1 "register_operand" "0")
5306 (match_operand:VQ 2 "register_operand" "w")
5307 (match_operand:SI 3 "immediate_operand" "i")]
5310 int part = INTVAL (operands[3]);
5311 int offset = part * 16;
5313 emit_move_insn (operands[0], operands[1]);
5314 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5319 ;; Standard pattern name vec_init<mode>.
5321 (define_expand "vec_init<mode>"
5322 [(match_operand:VALL_F16 0 "register_operand" "")
5323 (match_operand 1 "" "")]
5326 aarch64_expand_vector_init (operands[0], operands[1]);
5330 (define_insn "*aarch64_simd_ld1r<mode>"
5331 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5332 (vec_duplicate:VALL_F16
5333 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5335 "ld1r\\t{%0.<Vtype>}, %1"
5336 [(set_attr "type" "neon_load1_all_lanes")]
5339 (define_insn "aarch64_frecpe<mode>"
5340 [(set (match_operand:VDQF 0 "register_operand" "=w")
5341 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
5344 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5345 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
5348 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5349 [(set (match_operand:GPF 0 "register_operand" "=w")
5350 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
5353 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5354 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
5357 (define_insn "aarch64_frecps<mode>"
5358 [(set (match_operand:VALLF 0 "register_operand" "=w")
5359 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
5360 (match_operand:VALLF 2 "register_operand" "w")]
5363 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5364 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
5367 (define_insn "aarch64_urecpe<mode>"
5368 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5369 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5372 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5373 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5375 ;; Standard pattern name vec_extract<mode>.
5377 (define_expand "vec_extract<mode>"
5378 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5379 (match_operand:VALL_F16 1 "register_operand" "")
5380 (match_operand:SI 2 "immediate_operand" "")]
5384 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5390 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5391 [(set (match_operand:V16QI 0 "register_operand" "=w")
5392 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5393 (match_operand:V16QI 2 "register_operand" "w")]
5395 "TARGET_SIMD && TARGET_CRYPTO"
5396 "aes<aes_op>\\t%0.16b, %2.16b"
5397 [(set_attr "type" "crypto_aese")]
5400 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5401 [(set (match_operand:V16QI 0 "register_operand" "=w")
5402 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
5404 "TARGET_SIMD && TARGET_CRYPTO"
5405 "aes<aesmc_op>\\t%0.16b, %1.16b"
5406 [(set_attr "type" "crypto_aesmc")]
5411 (define_insn "aarch64_crypto_sha1hsi"
5412 [(set (match_operand:SI 0 "register_operand" "=w")
5413 (unspec:SI [(match_operand:SI 1
5414 "register_operand" "w")]
5416 "TARGET_SIMD && TARGET_CRYPTO"
5418 [(set_attr "type" "crypto_sha1_fast")]
5421 (define_insn "aarch64_crypto_sha1su1v4si"
5422 [(set (match_operand:V4SI 0 "register_operand" "=w")
5423 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5424 (match_operand:V4SI 2 "register_operand" "w")]
5426 "TARGET_SIMD && TARGET_CRYPTO"
5427 "sha1su1\\t%0.4s, %2.4s"
5428 [(set_attr "type" "crypto_sha1_fast")]
5431 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5432 [(set (match_operand:V4SI 0 "register_operand" "=w")
5433 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5434 (match_operand:SI 2 "register_operand" "w")
5435 (match_operand:V4SI 3 "register_operand" "w")]
5437 "TARGET_SIMD && TARGET_CRYPTO"
5438 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5439 [(set_attr "type" "crypto_sha1_slow")]
5442 (define_insn "aarch64_crypto_sha1su0v4si"
5443 [(set (match_operand:V4SI 0 "register_operand" "=w")
5444 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5445 (match_operand:V4SI 2 "register_operand" "w")
5446 (match_operand:V4SI 3 "register_operand" "w")]
5448 "TARGET_SIMD && TARGET_CRYPTO"
5449 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5450 [(set_attr "type" "crypto_sha1_xor")]
5455 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5456 [(set (match_operand:V4SI 0 "register_operand" "=w")
5457 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5458 (match_operand:V4SI 2 "register_operand" "w")
5459 (match_operand:V4SI 3 "register_operand" "w")]
5461 "TARGET_SIMD && TARGET_CRYPTO"
5462 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5463 [(set_attr "type" "crypto_sha256_slow")]
5466 (define_insn "aarch64_crypto_sha256su0v4si"
5467 [(set (match_operand:V4SI 0 "register_operand" "=w")
5468 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5469 (match_operand:V4SI 2 "register_operand" "w")]
5471 "TARGET_SIMD &&TARGET_CRYPTO"
5472 "sha256su0\\t%0.4s, %2.4s"
5473 [(set_attr "type" "crypto_sha256_fast")]
5476 (define_insn "aarch64_crypto_sha256su1v4si"
5477 [(set (match_operand:V4SI 0 "register_operand" "=w")
5478 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5479 (match_operand:V4SI 2 "register_operand" "w")
5480 (match_operand:V4SI 3 "register_operand" "w")]
5482 "TARGET_SIMD &&TARGET_CRYPTO"
5483 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5484 [(set_attr "type" "crypto_sha256_slow")]
5489 (define_insn "aarch64_crypto_pmulldi"
5490 [(set (match_operand:TI 0 "register_operand" "=w")
5491 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5492 (match_operand:DI 2 "register_operand" "w")]
5494 "TARGET_SIMD && TARGET_CRYPTO"
5495 "pmull\\t%0.1q, %1.1d, %2.1d"
5496 [(set_attr "type" "neon_mul_d_long")]
5499 (define_insn "aarch64_crypto_pmullv2di"
5500 [(set (match_operand:TI 0 "register_operand" "=w")
5501 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5502 (match_operand:V2DI 2 "register_operand" "w")]
5504 "TARGET_SIMD && TARGET_CRYPTO"
5505 "pmull2\\t%0.1q, %1.2d, %2.2d"
5506 [(set_attr "type" "neon_mul_d_long")]