1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
50 dup\\t%0.<Vtype>, %1.<Vetype>[0]
51 dup\\t%0.<Vtype>, %<vw>1"
52 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
76 [(set_attr "type" "neon_dup<q>")]
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16_NO_V2Q
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
92 [(set_attr "type" "neon_dup<q>")]
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
104 switch (which_alternative)
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "fmov\t%d0, %1";
111 case 5: return "mov\t%0, %1";
113 return aarch64_output_simd_mov_immediate (operands[1],
115 default: gcc_unreachable ();
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, f_mcr,\
120 mov_reg, neon_move<q>")]
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
132 switch (which_alternative)
135 return "ldr\\t%q0, %1";
137 return "str\\t%q1, %0";
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
153 (set_attr "length" "4,4,4,8,8,8,4")]
156 ;; When storing lane zero we can use the normal STR and its more permissive
159 (define_insn "aarch64_store_lane0<mode>"
160 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
161 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
162 (parallel [(match_operand 2 "const_int_operand" "n")])))]
164 && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0"
165 "str\\t%<Vetype>1, %0"
166 [(set_attr "type" "neon_store1_1reg<q>")]
169 (define_insn "load_pair<mode>"
170 [(set (match_operand:VD 0 "register_operand" "=w")
171 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
172 (set (match_operand:VD 2 "register_operand" "=w")
173 (match_operand:VD 3 "memory_operand" "m"))]
175 && rtx_equal_p (XEXP (operands[3], 0),
176 plus_constant (Pmode,
177 XEXP (operands[1], 0),
178 GET_MODE_SIZE (<MODE>mode)))"
180 [(set_attr "type" "neon_ldp")]
183 (define_insn "store_pair<mode>"
184 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
185 (match_operand:VD 1 "register_operand" "w"))
186 (set (match_operand:VD 2 "memory_operand" "=m")
187 (match_operand:VD 3 "register_operand" "w"))]
189 && rtx_equal_p (XEXP (operands[2], 0),
190 plus_constant (Pmode,
191 XEXP (operands[0], 0),
192 GET_MODE_SIZE (<MODE>mode)))"
194 [(set_attr "type" "neon_stp")]
198 [(set (match_operand:VQ 0 "register_operand" "")
199 (match_operand:VQ 1 "register_operand" ""))]
200 "TARGET_SIMD && reload_completed
201 && GP_REGNUM_P (REGNO (operands[0]))
202 && GP_REGNUM_P (REGNO (operands[1]))"
205 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
210 [(set (match_operand:VQ 0 "register_operand" "")
211 (match_operand:VQ 1 "register_operand" ""))]
212 "TARGET_SIMD && reload_completed
213 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
214 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
217 aarch64_split_simd_move (operands[0], operands[1]);
221 (define_expand "aarch64_split_simd_mov<mode>"
222 [(set (match_operand:VQ 0)
223 (match_operand:VQ 1))]
226 rtx dst = operands[0];
227 rtx src = operands[1];
229 if (GP_REGNUM_P (REGNO (src)))
231 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
232 rtx src_high_part = gen_highpart (<VHALF>mode, src);
235 (gen_move_lo_quad_<mode> (dst, src_low_part));
237 (gen_move_hi_quad_<mode> (dst, src_high_part));
242 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
243 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
244 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
245 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
248 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
250 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
256 (define_insn "aarch64_simd_mov_from_<mode>low"
257 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
259 (match_operand:VQ 1 "register_operand" "w")
260 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
261 "TARGET_SIMD && reload_completed"
263 [(set_attr "type" "neon_to_gp<q>")
264 (set_attr "length" "4")
267 (define_insn "aarch64_simd_mov_from_<mode>high"
268 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
270 (match_operand:VQ 1 "register_operand" "w")
271 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
272 "TARGET_SIMD && reload_completed"
274 [(set_attr "type" "neon_to_gp<q>")
275 (set_attr "length" "4")
278 (define_insn "orn<mode>3"
279 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
280 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
281 (match_operand:VDQ_I 2 "register_operand" "w")))]
283 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
284 [(set_attr "type" "neon_logic<q>")]
287 (define_insn "bic<mode>3"
288 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
289 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
290 (match_operand:VDQ_I 2 "register_operand" "w")))]
292 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
293 [(set_attr "type" "neon_logic<q>")]
296 (define_insn "add<mode>3"
297 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
298 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
299 (match_operand:VDQ_I 2 "register_operand" "w")))]
301 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
302 [(set_attr "type" "neon_add<q>")]
305 (define_insn "sub<mode>3"
306 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
307 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
308 (match_operand:VDQ_I 2 "register_operand" "w")))]
310 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
311 [(set_attr "type" "neon_sub<q>")]
314 (define_insn "mul<mode>3"
315 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
316 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
317 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
319 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
320 [(set_attr "type" "neon_mul_<Vetype><q>")]
323 (define_insn "bswap<mode>2"
324 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
325 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
327 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
328 [(set_attr "type" "neon_rev<q>")]
331 (define_insn "aarch64_rbit<mode>"
332 [(set (match_operand:VB 0 "register_operand" "=w")
333 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
336 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
337 [(set_attr "type" "neon_rbit")]
340 (define_expand "ctz<mode>2"
341 [(set (match_operand:VS 0 "register_operand")
342 (ctz:VS (match_operand:VS 1 "register_operand")))]
345 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
346 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
348 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
349 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
354 (define_expand "copysign<mode>3"
355 [(match_operand:VHSDF 0 "register_operand")
356 (match_operand:VHSDF 1 "register_operand")
357 (match_operand:VHSDF 2 "register_operand")]
358 "TARGET_FLOAT && TARGET_SIMD"
360 rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode);
361 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
363 emit_move_insn (v_bitmask,
364 aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
365 HOST_WIDE_INT_M1U << bits));
366 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
367 operands[2], operands[1]));
372 (define_insn "*aarch64_mul3_elt<mode>"
373 [(set (match_operand:VMUL 0 "register_operand" "=w")
377 (match_operand:VMUL 1 "register_operand" "<h_con>")
378 (parallel [(match_operand:SI 2 "immediate_operand")])))
379 (match_operand:VMUL 3 "register_operand" "w")))]
382 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
383 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
385 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
388 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
389 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
390 (mult:VMUL_CHANGE_NLANES
391 (vec_duplicate:VMUL_CHANGE_NLANES
393 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
394 (parallel [(match_operand:SI 2 "immediate_operand")])))
395 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
398 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
399 INTVAL (operands[2])));
400 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
402 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
405 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
406 [(set (match_operand:VMUL 0 "register_operand" "=w")
409 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
410 (match_operand:VMUL 2 "register_operand" "w")))]
412 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
413 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
416 (define_insn "aarch64_rsqrte<mode>"
417 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
418 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
421 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
422 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
424 (define_insn "aarch64_rsqrts<mode>"
425 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
426 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
427 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
430 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
431 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
433 (define_expand "rsqrt<mode>2"
434 [(set (match_operand:VALLF 0 "register_operand" "=w")
435 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
439 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
443 (define_insn "*aarch64_mul3_elt_to_64v2df"
444 [(set (match_operand:DF 0 "register_operand" "=w")
447 (match_operand:V2DF 1 "register_operand" "w")
448 (parallel [(match_operand:SI 2 "immediate_operand")]))
449 (match_operand:DF 3 "register_operand" "w")))]
452 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
453 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
455 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
458 (define_insn "neg<mode>2"
459 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
460 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
462 "neg\t%0.<Vtype>, %1.<Vtype>"
463 [(set_attr "type" "neon_neg<q>")]
466 (define_insn "abs<mode>2"
467 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
468 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
470 "abs\t%0.<Vtype>, %1.<Vtype>"
471 [(set_attr "type" "neon_abs<q>")]
474 ;; The intrinsic version of integer ABS must not be allowed to
475 ;; combine with any operation with an integerated ABS step, such
477 (define_insn "aarch64_abs<mode>"
478 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
480 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
483 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
484 [(set_attr "type" "neon_abs<q>")]
487 (define_insn "abd<mode>_3"
488 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
489 (abs:VDQ_BHSI (minus:VDQ_BHSI
490 (match_operand:VDQ_BHSI 1 "register_operand" "w")
491 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
493 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
494 [(set_attr "type" "neon_abd<q>")]
497 (define_insn "aba<mode>_3"
498 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
499 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
500 (match_operand:VDQ_BHSI 1 "register_operand" "w")
501 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
502 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
504 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
505 [(set_attr "type" "neon_arith_acc<q>")]
508 (define_insn "fabd<mode>3"
509 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
512 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
513 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
515 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
516 [(set_attr "type" "neon_fp_abd_<stype><q>")]
519 (define_insn "and<mode>3"
520 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
521 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
522 (match_operand:VDQ_I 2 "register_operand" "w")))]
524 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
525 [(set_attr "type" "neon_logic<q>")]
528 (define_insn "ior<mode>3"
529 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
530 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
531 (match_operand:VDQ_I 2 "register_operand" "w")))]
533 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
534 [(set_attr "type" "neon_logic<q>")]
537 (define_insn "xor<mode>3"
538 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
539 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
540 (match_operand:VDQ_I 2 "register_operand" "w")))]
542 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
543 [(set_attr "type" "neon_logic<q>")]
546 (define_insn "one_cmpl<mode>2"
547 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
548 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
550 "not\t%0.<Vbtype>, %1.<Vbtype>"
551 [(set_attr "type" "neon_logic<q>")]
554 (define_insn "aarch64_simd_vec_set<mode>"
555 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
557 (vec_duplicate:VDQ_BHSI
558 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
559 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
560 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
563 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
564 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
565 switch (which_alternative)
568 return "ins\\t%0.<Vetype>[%p2], %w1";
570 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
572 return "ld1\\t{%0.<Vetype>}[%p2], %1";
577 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
580 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
581 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
583 (vec_duplicate:VALL_F16
585 (match_operand:VALL_F16 3 "register_operand" "w")
587 [(match_operand:SI 4 "immediate_operand" "i")])))
588 (match_operand:VALL_F16 1 "register_operand" "0")
589 (match_operand:SI 2 "immediate_operand" "i")))]
592 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
593 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
594 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
596 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
598 [(set_attr "type" "neon_ins<q>")]
601 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
602 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
603 (vec_merge:VALL_F16_NO_V2Q
604 (vec_duplicate:VALL_F16_NO_V2Q
606 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
608 [(match_operand:SI 4 "immediate_operand" "i")])))
609 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
610 (match_operand:SI 2 "immediate_operand" "i")))]
613 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
614 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
615 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
616 INTVAL (operands[4])));
618 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
620 [(set_attr "type" "neon_ins<q>")]
623 (define_insn "aarch64_simd_lshr<mode>"
624 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
625 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
626 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
628 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
629 [(set_attr "type" "neon_shift_imm<q>")]
632 (define_insn "aarch64_simd_ashr<mode>"
633 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
634 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
635 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
637 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
638 [(set_attr "type" "neon_shift_imm<q>")]
641 (define_insn "aarch64_simd_imm_shl<mode>"
642 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
643 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
644 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
646 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
647 [(set_attr "type" "neon_shift_imm<q>")]
650 (define_insn "aarch64_simd_reg_sshl<mode>"
651 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
652 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
653 (match_operand:VDQ_I 2 "register_operand" "w")))]
655 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
656 [(set_attr "type" "neon_shift_reg<q>")]
659 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
660 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
661 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
662 (match_operand:VDQ_I 2 "register_operand" "w")]
663 UNSPEC_ASHIFT_UNSIGNED))]
665 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
666 [(set_attr "type" "neon_shift_reg<q>")]
669 (define_insn "aarch64_simd_reg_shl<mode>_signed"
670 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
671 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
672 (match_operand:VDQ_I 2 "register_operand" "w")]
673 UNSPEC_ASHIFT_SIGNED))]
675 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
676 [(set_attr "type" "neon_shift_reg<q>")]
679 (define_expand "ashl<mode>3"
680 [(match_operand:VDQ_I 0 "register_operand" "")
681 (match_operand:VDQ_I 1 "register_operand" "")
682 (match_operand:SI 2 "general_operand" "")]
685 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
688 if (CONST_INT_P (operands[2]))
690 shift_amount = INTVAL (operands[2]);
691 if (shift_amount >= 0 && shift_amount < bit_width)
693 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
695 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
702 operands[2] = force_reg (SImode, operands[2]);
705 else if (MEM_P (operands[2]))
707 operands[2] = force_reg (SImode, operands[2]);
710 if (REG_P (operands[2]))
712 rtx tmp = gen_reg_rtx (<MODE>mode);
713 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
714 convert_to_mode (<VEL>mode,
717 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
726 (define_expand "lshr<mode>3"
727 [(match_operand:VDQ_I 0 "register_operand" "")
728 (match_operand:VDQ_I 1 "register_operand" "")
729 (match_operand:SI 2 "general_operand" "")]
732 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
735 if (CONST_INT_P (operands[2]))
737 shift_amount = INTVAL (operands[2]);
738 if (shift_amount > 0 && shift_amount <= bit_width)
740 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
742 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
748 operands[2] = force_reg (SImode, operands[2]);
750 else if (MEM_P (operands[2]))
752 operands[2] = force_reg (SImode, operands[2]);
755 if (REG_P (operands[2]))
757 rtx tmp = gen_reg_rtx (SImode);
758 rtx tmp1 = gen_reg_rtx (<MODE>mode);
759 emit_insn (gen_negsi2 (tmp, operands[2]));
760 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
761 convert_to_mode (<VEL>mode,
763 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
773 (define_expand "ashr<mode>3"
774 [(match_operand:VDQ_I 0 "register_operand" "")
775 (match_operand:VDQ_I 1 "register_operand" "")
776 (match_operand:SI 2 "general_operand" "")]
779 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
782 if (CONST_INT_P (operands[2]))
784 shift_amount = INTVAL (operands[2]);
785 if (shift_amount > 0 && shift_amount <= bit_width)
787 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
789 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
795 operands[2] = force_reg (SImode, operands[2]);
797 else if (MEM_P (operands[2]))
799 operands[2] = force_reg (SImode, operands[2]);
802 if (REG_P (operands[2]))
804 rtx tmp = gen_reg_rtx (SImode);
805 rtx tmp1 = gen_reg_rtx (<MODE>mode);
806 emit_insn (gen_negsi2 (tmp, operands[2]));
807 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
808 convert_to_mode (<VEL>mode,
810 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
820 (define_expand "vashl<mode>3"
821 [(match_operand:VDQ_I 0 "register_operand" "")
822 (match_operand:VDQ_I 1 "register_operand" "")
823 (match_operand:VDQ_I 2 "register_operand" "")]
826 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
831 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
832 ;; Negating individual lanes most certainly offsets the
833 ;; gain from vectorization.
834 (define_expand "vashr<mode>3"
835 [(match_operand:VDQ_BHSI 0 "register_operand" "")
836 (match_operand:VDQ_BHSI 1 "register_operand" "")
837 (match_operand:VDQ_BHSI 2 "register_operand" "")]
840 rtx neg = gen_reg_rtx (<MODE>mode);
841 emit (gen_neg<mode>2 (neg, operands[2]));
842 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
848 (define_expand "aarch64_ashr_simddi"
849 [(match_operand:DI 0 "register_operand" "=w")
850 (match_operand:DI 1 "register_operand" "w")
851 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
854 /* An arithmetic shift right by 64 fills the result with copies of the sign
855 bit, just like asr by 63 - however the standard pattern does not handle
857 if (INTVAL (operands[2]) == 64)
858 operands[2] = GEN_INT (63);
859 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
864 (define_expand "vlshr<mode>3"
865 [(match_operand:VDQ_BHSI 0 "register_operand" "")
866 (match_operand:VDQ_BHSI 1 "register_operand" "")
867 (match_operand:VDQ_BHSI 2 "register_operand" "")]
870 rtx neg = gen_reg_rtx (<MODE>mode);
871 emit (gen_neg<mode>2 (neg, operands[2]));
872 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
877 (define_expand "aarch64_lshr_simddi"
878 [(match_operand:DI 0 "register_operand" "=w")
879 (match_operand:DI 1 "register_operand" "w")
880 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
883 if (INTVAL (operands[2]) == 64)
884 emit_move_insn (operands[0], const0_rtx);
886 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
891 (define_expand "vec_set<mode>"
892 [(match_operand:VDQ_BHSI 0 "register_operand")
893 (match_operand:<VEL> 1 "register_operand")
894 (match_operand:SI 2 "immediate_operand")]
897 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
898 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
899 GEN_INT (elem), operands[0]));
904 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
905 (define_insn "vec_shr_<mode>"
906 [(set (match_operand:VD 0 "register_operand" "=w")
907 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
908 (match_operand:SI 2 "immediate_operand" "i")]
912 if (BYTES_BIG_ENDIAN)
913 return "shl %d0, %d1, %2";
915 return "ushr %d0, %d1, %2";
917 [(set_attr "type" "neon_shift_imm")]
920 (define_insn "aarch64_simd_vec_setv2di"
921 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
924 (match_operand:DI 1 "register_operand" "r,w"))
925 (match_operand:V2DI 3 "register_operand" "0,0")
926 (match_operand:SI 2 "immediate_operand" "i,i")))]
929 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
930 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
931 switch (which_alternative)
934 return "ins\\t%0.d[%p2], %1";
936 return "ins\\t%0.d[%p2], %1.d[0]";
941 [(set_attr "type" "neon_from_gp, neon_ins_q")]
944 (define_expand "vec_setv2di"
945 [(match_operand:V2DI 0 "register_operand")
946 (match_operand:DI 1 "register_operand")
947 (match_operand:SI 2 "immediate_operand")]
950 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
951 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
952 GEN_INT (elem), operands[0]));
957 (define_insn "aarch64_simd_vec_set<mode>"
958 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
960 (vec_duplicate:VDQF_F16
961 (match_operand:<VEL> 1 "register_operand" "w"))
962 (match_operand:VDQF_F16 3 "register_operand" "0")
963 (match_operand:SI 2 "immediate_operand" "i")))]
966 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
968 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
969 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
971 [(set_attr "type" "neon_ins<q>")]
974 (define_expand "vec_set<mode>"
975 [(match_operand:VDQF_F16 0 "register_operand" "+w")
976 (match_operand:<VEL> 1 "register_operand" "w")
977 (match_operand:SI 2 "immediate_operand" "")]
980 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
981 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
982 GEN_INT (elem), operands[0]));
988 (define_insn "aarch64_mla<mode>"
989 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
990 (plus:VDQ_BHSI (mult:VDQ_BHSI
991 (match_operand:VDQ_BHSI 2 "register_operand" "w")
992 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
993 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
995 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
996 [(set_attr "type" "neon_mla_<Vetype><q>")]
999 (define_insn "*aarch64_mla_elt<mode>"
1000 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1003 (vec_duplicate:VDQHS
1005 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1006 (parallel [(match_operand:SI 2 "immediate_operand")])))
1007 (match_operand:VDQHS 3 "register_operand" "w"))
1008 (match_operand:VDQHS 4 "register_operand" "0")))]
1011 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1012 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1014 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1017 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1018 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1021 (vec_duplicate:VDQHS
1023 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1024 (parallel [(match_operand:SI 2 "immediate_operand")])))
1025 (match_operand:VDQHS 3 "register_operand" "w"))
1026 (match_operand:VDQHS 4 "register_operand" "0")))]
1029 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1030 INTVAL (operands[2])));
1031 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1033 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1036 (define_insn "aarch64_mls<mode>"
1037 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1038 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1039 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1040 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1042 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1043 [(set_attr "type" "neon_mla_<Vetype><q>")]
1046 (define_insn "*aarch64_mls_elt<mode>"
1047 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1049 (match_operand:VDQHS 4 "register_operand" "0")
1051 (vec_duplicate:VDQHS
1053 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1054 (parallel [(match_operand:SI 2 "immediate_operand")])))
1055 (match_operand:VDQHS 3 "register_operand" "w"))))]
1058 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1059 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1061 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1064 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1065 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1067 (match_operand:VDQHS 4 "register_operand" "0")
1069 (vec_duplicate:VDQHS
1071 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1072 (parallel [(match_operand:SI 2 "immediate_operand")])))
1073 (match_operand:VDQHS 3 "register_operand" "w"))))]
1076 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1077 INTVAL (operands[2])));
1078 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1080 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1083 ;; Max/Min operations.
1084 (define_insn "<su><maxmin><mode>3"
1085 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1086 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1087 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1089 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1090 [(set_attr "type" "neon_minmax<q>")]
1093 (define_expand "<su><maxmin>v2di3"
1094 [(set (match_operand:V2DI 0 "register_operand" "")
1095 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1096 (match_operand:V2DI 2 "register_operand" "")))]
1099 enum rtx_code cmp_operator;
1120 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1121 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1122 operands[2], cmp_fmt, operands[1], operands[2]));
1126 ;; Pairwise Integer Max/Min operations.
1127 (define_insn "aarch64_<maxmin_uns>p<mode>"
1128 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1129 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1130 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1133 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1134 [(set_attr "type" "neon_minmax<q>")]
1137 ;; Pairwise FP Max/Min operations.
1138 (define_insn "aarch64_<maxmin_uns>p<mode>"
1139 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1140 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1141 (match_operand:VHSDF 2 "register_operand" "w")]
1144 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1145 [(set_attr "type" "neon_minmax<q>")]
1148 ;; vec_concat gives a new vector with the low elements from operand 1, and
1149 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1150 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1151 ;; What that means, is that the RTL descriptions of the below patterns
1152 ;; need to change depending on endianness.
1154 ;; Move to the low architectural bits of the register.
1155 ;; On little-endian this is { operand, zeroes }
1156 ;; On big-endian this is { zeroes, operand }
1158 (define_insn "move_lo_quad_internal_<mode>"
1159 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1161 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1162 (vec_duplicate:<VHALF> (const_int 0))))]
1163 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1168 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1169 (set_attr "simd" "yes,*,yes")
1170 (set_attr "fp" "*,yes,*")
1171 (set_attr "length" "4")]
1174 (define_insn "move_lo_quad_internal_<mode>"
1175 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1177 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1179 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1184 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1185 (set_attr "simd" "yes,*,yes")
1186 (set_attr "fp" "*,yes,*")
1187 (set_attr "length" "4")]
1190 (define_insn "move_lo_quad_internal_be_<mode>"
1191 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1193 (vec_duplicate:<VHALF> (const_int 0))
1194 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1195 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1200 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1201 (set_attr "simd" "yes,*,yes")
1202 (set_attr "fp" "*,yes,*")
1203 (set_attr "length" "4")]
1206 (define_insn "move_lo_quad_internal_be_<mode>"
1207 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1210 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1211 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1216 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1217 (set_attr "simd" "yes,*,yes")
1218 (set_attr "fp" "*,yes,*")
1219 (set_attr "length" "4")]
1222 (define_expand "move_lo_quad_<mode>"
1223 [(match_operand:VQ 0 "register_operand")
1224 (match_operand:VQ 1 "register_operand")]
1227 if (BYTES_BIG_ENDIAN)
1228 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1230 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1235 ;; Move operand1 to the high architectural bits of the register, keeping
1236 ;; the low architectural bits of operand2.
1237 ;; For little-endian this is { operand2, operand1 }
1238 ;; For big-endian this is { operand1, operand2 }
1240 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1241 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1245 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1246 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1247 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1249 ins\\t%0.d[1], %1.d[0]
1251 [(set_attr "type" "neon_ins")]
1254 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1255 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1257 (match_operand:<VHALF> 1 "register_operand" "w,r")
1260 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1261 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1263 ins\\t%0.d[1], %1.d[0]
1265 [(set_attr "type" "neon_ins")]
1268 (define_expand "move_hi_quad_<mode>"
1269 [(match_operand:VQ 0 "register_operand" "")
1270 (match_operand:<VHALF> 1 "register_operand" "")]
1273 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1274 if (BYTES_BIG_ENDIAN)
1275 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1278 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1283 ;; Narrowing operations.
1286 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1287 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1288 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1290 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1291 [(set_attr "type" "neon_shift_imm_narrow_q")]
1294 (define_expand "vec_pack_trunc_<mode>"
1295 [(match_operand:<VNARROWD> 0 "register_operand" "")
1296 (match_operand:VDN 1 "register_operand" "")
1297 (match_operand:VDN 2 "register_operand" "")]
1300 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1301 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1302 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1304 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1305 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1306 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1312 (define_insn "vec_pack_trunc_<mode>"
1313 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1314 (vec_concat:<VNARROWQ2>
1315 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1316 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1319 if (BYTES_BIG_ENDIAN)
1320 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1322 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1324 [(set_attr "type" "multiple")
1325 (set_attr "length" "8")]
1328 ;; Widening operations.
1330 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1331 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1332 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1333 (match_operand:VQW 1 "register_operand" "w")
1334 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1337 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1338 [(set_attr "type" "neon_shift_imm_long")]
1341 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1342 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1343 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1344 (match_operand:VQW 1 "register_operand" "w")
1345 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1348 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1349 [(set_attr "type" "neon_shift_imm_long")]
1352 (define_expand "vec_unpack<su>_hi_<mode>"
1353 [(match_operand:<VWIDE> 0 "register_operand" "")
1354 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1357 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1358 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1364 (define_expand "vec_unpack<su>_lo_<mode>"
1365 [(match_operand:<VWIDE> 0 "register_operand" "")
1366 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1369 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1370 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1376 ;; Widening arithmetic.
1378 (define_insn "*aarch64_<su>mlal_lo<mode>"
1379 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1382 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1383 (match_operand:VQW 2 "register_operand" "w")
1384 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1385 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1386 (match_operand:VQW 4 "register_operand" "w")
1388 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1390 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1391 [(set_attr "type" "neon_mla_<Vetype>_long")]
1394 (define_insn "*aarch64_<su>mlal_hi<mode>"
1395 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1398 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1399 (match_operand:VQW 2 "register_operand" "w")
1400 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1401 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1402 (match_operand:VQW 4 "register_operand" "w")
1404 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1406 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1407 [(set_attr "type" "neon_mla_<Vetype>_long")]
1410 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1411 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1413 (match_operand:<VWIDE> 1 "register_operand" "0")
1415 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1416 (match_operand:VQW 2 "register_operand" "w")
1417 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1418 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1419 (match_operand:VQW 4 "register_operand" "w")
1422 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1423 [(set_attr "type" "neon_mla_<Vetype>_long")]
1426 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1427 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1429 (match_operand:<VWIDE> 1 "register_operand" "0")
1431 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1432 (match_operand:VQW 2 "register_operand" "w")
1433 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1434 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1435 (match_operand:VQW 4 "register_operand" "w")
1438 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1439 [(set_attr "type" "neon_mla_<Vetype>_long")]
1442 (define_insn "*aarch64_<su>mlal<mode>"
1443 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1447 (match_operand:VD_BHSI 1 "register_operand" "w"))
1449 (match_operand:VD_BHSI 2 "register_operand" "w")))
1450 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1452 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1453 [(set_attr "type" "neon_mla_<Vetype>_long")]
1456 (define_insn "*aarch64_<su>mlsl<mode>"
1457 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1459 (match_operand:<VWIDE> 1 "register_operand" "0")
1462 (match_operand:VD_BHSI 2 "register_operand" "w"))
1464 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1466 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1467 [(set_attr "type" "neon_mla_<Vetype>_long")]
1470 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1471 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1472 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1473 (match_operand:VQW 1 "register_operand" "w")
1474 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1475 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1476 (match_operand:VQW 2 "register_operand" "w")
1479 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1480 [(set_attr "type" "neon_mul_<Vetype>_long")]
1483 (define_expand "vec_widen_<su>mult_lo_<mode>"
1484 [(match_operand:<VWIDE> 0 "register_operand" "")
1485 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1486 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1489 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1490 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1497 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1498 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1499 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1500 (match_operand:VQW 1 "register_operand" "w")
1501 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1502 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1503 (match_operand:VQW 2 "register_operand" "w")
1506 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1507 [(set_attr "type" "neon_mul_<Vetype>_long")]
1510 (define_expand "vec_widen_<su>mult_hi_<mode>"
1511 [(match_operand:<VWIDE> 0 "register_operand" "")
1512 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1513 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1516 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1517 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1525 ;; FP vector operations.
1526 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1527 ;; double-precision (64-bit) floating-point data types and arithmetic as
1528 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1529 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1531 ;; Floating-point operations can raise an exception. Vectorizing such
1532 ;; operations are safe because of reasons explained below.
1534 ;; ARMv8 permits an extension to enable trapped floating-point
1535 ;; exception handling, however this is an optional feature. In the
1536 ;; event of a floating-point exception being raised by vectorised
1538 ;; 1. If trapped floating-point exceptions are available, then a trap
1539 ;; will be taken when any lane raises an enabled exception. A trap
1540 ;; handler may determine which lane raised the exception.
1541 ;; 2. Alternatively a sticky exception flag is set in the
1542 ;; floating-point status register (FPSR). Software may explicitly
1543 ;; test the exception flags, in which case the tests will either
1544 ;; prevent vectorisation, allowing precise identification of the
1545 ;; failing operation, or if tested outside of vectorisable regions
1546 ;; then the specific operation and lane are not of interest.
1548 ;; FP arithmetic operations.
1550 (define_insn "add<mode>3"
1551 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1552 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1553 (match_operand:VHSDF 2 "register_operand" "w")))]
1555 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1556 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1559 (define_insn "sub<mode>3"
1560 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1561 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1562 (match_operand:VHSDF 2 "register_operand" "w")))]
1564 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1565 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1568 (define_insn "mul<mode>3"
1569 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1570 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1571 (match_operand:VHSDF 2 "register_operand" "w")))]
1573 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1574 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1577 (define_expand "div<mode>3"
1578 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1579 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1580 (match_operand:VHSDF 2 "register_operand" "w")))]
1583 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1586 operands[1] = force_reg (<MODE>mode, operands[1]);
1589 (define_insn "*div<mode>3"
1590 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1591 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1592 (match_operand:VHSDF 2 "register_operand" "w")))]
1594 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1595 [(set_attr "type" "neon_fp_div_<stype><q>")]
1598 (define_insn "neg<mode>2"
1599 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1600 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1602 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1603 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1606 (define_insn "abs<mode>2"
1607 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1608 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1610 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1611 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1614 (define_insn "fma<mode>4"
1615 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1616 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1617 (match_operand:VHSDF 2 "register_operand" "w")
1618 (match_operand:VHSDF 3 "register_operand" "0")))]
1620 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1621 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1624 (define_insn "*aarch64_fma4_elt<mode>"
1625 [(set (match_operand:VDQF 0 "register_operand" "=w")
1629 (match_operand:VDQF 1 "register_operand" "<h_con>")
1630 (parallel [(match_operand:SI 2 "immediate_operand")])))
1631 (match_operand:VDQF 3 "register_operand" "w")
1632 (match_operand:VDQF 4 "register_operand" "0")))]
1635 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1636 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1638 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1641 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1642 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1644 (vec_duplicate:VDQSF
1646 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1647 (parallel [(match_operand:SI 2 "immediate_operand")])))
1648 (match_operand:VDQSF 3 "register_operand" "w")
1649 (match_operand:VDQSF 4 "register_operand" "0")))]
1652 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1653 INTVAL (operands[2])));
1654 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1656 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1659 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1660 [(set (match_operand:VMUL 0 "register_operand" "=w")
1663 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1664 (match_operand:VMUL 2 "register_operand" "w")
1665 (match_operand:VMUL 3 "register_operand" "0")))]
1667 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1668 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1671 (define_insn "*aarch64_fma4_elt_to_64v2df"
1672 [(set (match_operand:DF 0 "register_operand" "=w")
1675 (match_operand:V2DF 1 "register_operand" "w")
1676 (parallel [(match_operand:SI 2 "immediate_operand")]))
1677 (match_operand:DF 3 "register_operand" "w")
1678 (match_operand:DF 4 "register_operand" "0")))]
1681 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1682 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1684 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1687 (define_insn "fnma<mode>4"
1688 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1690 (match_operand:VHSDF 1 "register_operand" "w")
1692 (match_operand:VHSDF 2 "register_operand" "w"))
1693 (match_operand:VHSDF 3 "register_operand" "0")))]
1695 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1696 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1699 (define_insn "*aarch64_fnma4_elt<mode>"
1700 [(set (match_operand:VDQF 0 "register_operand" "=w")
1703 (match_operand:VDQF 3 "register_operand" "w"))
1706 (match_operand:VDQF 1 "register_operand" "<h_con>")
1707 (parallel [(match_operand:SI 2 "immediate_operand")])))
1708 (match_operand:VDQF 4 "register_operand" "0")))]
1711 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1712 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1714 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1717 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1718 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1721 (match_operand:VDQSF 3 "register_operand" "w"))
1722 (vec_duplicate:VDQSF
1724 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1725 (parallel [(match_operand:SI 2 "immediate_operand")])))
1726 (match_operand:VDQSF 4 "register_operand" "0")))]
1729 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1730 INTVAL (operands[2])));
1731 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1733 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1736 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1737 [(set (match_operand:VMUL 0 "register_operand" "=w")
1740 (match_operand:VMUL 2 "register_operand" "w"))
1742 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1743 (match_operand:VMUL 3 "register_operand" "0")))]
1745 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1746 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1749 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1750 [(set (match_operand:DF 0 "register_operand" "=w")
1753 (match_operand:V2DF 1 "register_operand" "w")
1754 (parallel [(match_operand:SI 2 "immediate_operand")]))
1756 (match_operand:DF 3 "register_operand" "w"))
1757 (match_operand:DF 4 "register_operand" "0")))]
1760 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1761 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1763 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1766 ;; Vector versions of the floating-point frint patterns.
1767 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1768 (define_insn "<frint_pattern><mode>2"
1769 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1770 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1773 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1774 [(set_attr "type" "neon_fp_round_<stype><q>")]
1777 ;; Vector versions of the fcvt standard patterns.
1778 ;; Expands to lbtrunc, lround, lceil, lfloor
1779 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1780 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1781 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1782 [(match_operand:VHSDF 1 "register_operand" "w")]
1785 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1786 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1789 ;; HF Scalar variants of related SIMD instructions.
1790 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1791 [(set (match_operand:HI 0 "register_operand" "=w")
1792 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1794 "TARGET_SIMD_F16INST"
1795 "fcvt<frint_suffix><su>\t%h0, %h1"
1796 [(set_attr "type" "neon_fp_to_int_s")]
1799 (define_insn "<optab>_trunchfhi2"
1800 [(set (match_operand:HI 0 "register_operand" "=w")
1801 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1802 "TARGET_SIMD_F16INST"
1803 "fcvtz<su>\t%h0, %h1"
1804 [(set_attr "type" "neon_fp_to_int_s")]
1807 (define_insn "<optab>hihf2"
1808 [(set (match_operand:HF 0 "register_operand" "=w")
1809 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1810 "TARGET_SIMD_F16INST"
1811 "<su_optab>cvtf\t%h0, %h1"
1812 [(set_attr "type" "neon_int_to_fp_s")]
1815 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1816 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1817 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1819 (match_operand:VDQF 1 "register_operand" "w")
1820 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1823 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1824 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1826 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1828 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1829 output_asm_insn (buf, operands);
1832 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1835 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1836 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1837 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1838 [(match_operand:VHSDF 1 "register_operand")]
1843 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1844 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1845 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1846 [(match_operand:VHSDF 1 "register_operand")]
1851 (define_expand "ftrunc<VHSDF:mode>2"
1852 [(set (match_operand:VHSDF 0 "register_operand")
1853 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1858 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
1859 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1861 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1863 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1864 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1867 ;; Conversions between vectors of floats and doubles.
1868 ;; Contains a mix of patterns to match standard pattern names
1869 ;; and those for intrinsics.
1871 ;; Float widening operations.
1873 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1874 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1875 (float_extend:<VWIDE> (vec_select:<VHALF>
1876 (match_operand:VQ_HSF 1 "register_operand" "w")
1877 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1880 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1881 [(set_attr "type" "neon_fp_cvt_widen_s")]
1884 ;; Convert between fixed-point and floating-point (vector modes)
1886 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1887 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1888 (unspec:<VHSDF:FCVT_TARGET>
1889 [(match_operand:VHSDF 1 "register_operand" "w")
1890 (match_operand:SI 2 "immediate_operand" "i")]
1893 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1894 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1897 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1898 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1899 (unspec:<VDQ_HSDI:FCVT_TARGET>
1900 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1901 (match_operand:SI 2 "immediate_operand" "i")]
1904 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1905 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1908 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1909 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1910 ;; the meaning of HI and LO changes depending on the target endianness.
1911 ;; While elsewhere we map the higher numbered elements of a vector to
1912 ;; the lower architectural lanes of the vector, for these patterns we want
1913 ;; to always treat "hi" as referring to the higher architectural lanes.
1914 ;; Consequently, while the patterns below look inconsistent with our
1915 ;; other big-endian patterns their behavior is as required.
1917 (define_expand "vec_unpacks_lo_<mode>"
1918 [(match_operand:<VWIDE> 0 "register_operand" "")
1919 (match_operand:VQ_HSF 1 "register_operand" "")]
1922 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1923 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1929 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1930 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1931 (float_extend:<VWIDE> (vec_select:<VHALF>
1932 (match_operand:VQ_HSF 1 "register_operand" "w")
1933 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1936 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1937 [(set_attr "type" "neon_fp_cvt_widen_s")]
1940 (define_expand "vec_unpacks_hi_<mode>"
1941 [(match_operand:<VWIDE> 0 "register_operand" "")
1942 (match_operand:VQ_HSF 1 "register_operand" "")]
1945 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1946 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1951 (define_insn "aarch64_float_extend_lo_<Vwide>"
1952 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1953 (float_extend:<VWIDE>
1954 (match_operand:VDF 1 "register_operand" "w")))]
1956 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1957 [(set_attr "type" "neon_fp_cvt_widen_s")]
1960 ;; Float narrowing operations.
1962 (define_insn "aarch64_float_truncate_lo_<mode>"
1963 [(set (match_operand:VDF 0 "register_operand" "=w")
1965 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1967 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1968 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1971 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1972 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1974 (match_operand:VDF 1 "register_operand" "0")
1976 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1977 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1978 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1979 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1982 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1983 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1986 (match_operand:<VWIDE> 2 "register_operand" "w"))
1987 (match_operand:VDF 1 "register_operand" "0")))]
1988 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1989 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1990 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1993 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1994 [(match_operand:<VDBL> 0 "register_operand" "=w")
1995 (match_operand:VDF 1 "register_operand" "0")
1996 (match_operand:<VWIDE> 2 "register_operand" "w")]
1999 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2000 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2001 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2002 emit_insn (gen (operands[0], operands[1], operands[2]));
2007 (define_expand "vec_pack_trunc_v2df"
2008 [(set (match_operand:V4SF 0 "register_operand")
2010 (float_truncate:V2SF
2011 (match_operand:V2DF 1 "register_operand"))
2012 (float_truncate:V2SF
2013 (match_operand:V2DF 2 "register_operand"))
2017 rtx tmp = gen_reg_rtx (V2SFmode);
2018 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2019 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2021 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2022 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2023 tmp, operands[hi]));
2028 (define_expand "vec_pack_trunc_df"
2029 [(set (match_operand:V2SF 0 "register_operand")
2032 (match_operand:DF 1 "register_operand"))
2034 (match_operand:DF 2 "register_operand"))
2038 rtx tmp = gen_reg_rtx (V2SFmode);
2039 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2040 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2042 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2043 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2044 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2050 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2052 ;; a = (b < c) ? b : c;
2053 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2054 ;; either explicitly or indirectly via -ffast-math.
2056 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2057 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2058 ;; operand will be returned when both operands are zero (i.e. they may not
2059 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2060 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2063 (define_insn "<su><maxmin><mode>3"
2064 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2065 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2066 (match_operand:VHSDF 2 "register_operand" "w")))]
2068 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2069 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2072 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2073 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2074 ;; which implement the IEEE fmax ()/fmin () functions.
2075 (define_insn "<maxmin_uns><mode>3"
2076 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2077 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2078 (match_operand:VHSDF 2 "register_operand" "w")]
2081 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2082 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2085 ;; 'across lanes' add.
2087 (define_expand "reduc_plus_scal_<mode>"
2088 [(match_operand:<VEL> 0 "register_operand" "=w")
2089 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2093 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2094 rtx scratch = gen_reg_rtx (<MODE>mode);
2095 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2096 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2101 (define_insn "aarch64_faddp<mode>"
2102 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2103 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2104 (match_operand:VHSDF 2 "register_operand" "w")]
2107 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2108 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2111 (define_insn "aarch64_reduc_plus_internal<mode>"
2112 [(set (match_operand:VDQV 0 "register_operand" "=w")
2113 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2116 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2117 [(set_attr "type" "neon_reduc_add<q>")]
2120 (define_insn "aarch64_reduc_plus_internalv2si"
2121 [(set (match_operand:V2SI 0 "register_operand" "=w")
2122 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2125 "addp\\t%0.2s, %1.2s, %1.2s"
2126 [(set_attr "type" "neon_reduc_add")]
2129 (define_insn "reduc_plus_scal_<mode>"
2130 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2131 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2134 "faddp\\t%<Vetype>0, %1.<Vtype>"
2135 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2138 (define_expand "reduc_plus_scal_v4sf"
2139 [(set (match_operand:SF 0 "register_operand")
2140 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2144 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2145 rtx scratch = gen_reg_rtx (V4SFmode);
2146 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2147 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2148 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2152 (define_insn "clrsb<mode>2"
2153 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2154 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2156 "cls\\t%0.<Vtype>, %1.<Vtype>"
2157 [(set_attr "type" "neon_cls<q>")]
2160 (define_insn "clz<mode>2"
2161 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2162 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2164 "clz\\t%0.<Vtype>, %1.<Vtype>"
2165 [(set_attr "type" "neon_cls<q>")]
2168 (define_insn "popcount<mode>2"
2169 [(set (match_operand:VB 0 "register_operand" "=w")
2170 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2172 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2173 [(set_attr "type" "neon_cnt<q>")]
2176 ;; 'across lanes' max and min ops.
2178 ;; Template for outputting a scalar, so we can create __builtins which can be
2179 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2180 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2181 [(match_operand:<VEL> 0 "register_operand")
2182 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2186 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2187 rtx scratch = gen_reg_rtx (<MODE>mode);
2188 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2190 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2195 ;; Likewise for integer cases, signed and unsigned.
2196 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2197 [(match_operand:<VEL> 0 "register_operand")
2198 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2202 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2203 rtx scratch = gen_reg_rtx (<MODE>mode);
2204 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2206 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2211 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2212 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2213 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2216 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2217 [(set_attr "type" "neon_reduc_minmax<q>")]
2220 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2221 [(set (match_operand:V2SI 0 "register_operand" "=w")
2222 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2225 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2226 [(set_attr "type" "neon_reduc_minmax")]
2229 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2230 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2231 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2234 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2235 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2238 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2240 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2243 ;; Thus our BSL is of the form:
2244 ;; op0 = bsl (mask, op2, op3)
2245 ;; We can use any of:
2248 ;; bsl mask, op1, op2
2249 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2250 ;; bit op0, op2, mask
2251 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2252 ;; bif op0, op1, mask
2254 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2255 ;; Some forms of straight-line code may generate the equivalent form
2256 ;; in *aarch64_simd_bsl<mode>_alt.
2258 (define_insn "aarch64_simd_bsl<mode>_internal"
2259 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2263 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2264 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2265 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2266 (match_dup:<V_cmp_result> 3)
2270 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2271 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2272 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2273 [(set_attr "type" "neon_bsl<q>")]
2276 ;; We need this form in addition to the above pattern to match the case
2277 ;; when combine tries merging three insns such that the second operand of
2278 ;; the outer XOR matches the second operand of the inner XOR rather than
2279 ;; the first. The two are equivalent but since recog doesn't try all
2280 ;; permutations of commutative operations, we have to have a separate pattern.
2282 (define_insn "*aarch64_simd_bsl<mode>_alt"
2283 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2287 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2288 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2289 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2290 (match_dup:VSDQ_I_DI 2)))]
2293 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2294 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2295 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2296 [(set_attr "type" "neon_bsl<q>")]
2299 (define_expand "aarch64_simd_bsl<mode>"
2300 [(match_operand:VALLDIF 0 "register_operand")
2301 (match_operand:<V_cmp_result> 1 "register_operand")
2302 (match_operand:VALLDIF 2 "register_operand")
2303 (match_operand:VALLDIF 3 "register_operand")]
2306 /* We can't alias operands together if they have different modes. */
2307 rtx tmp = operands[0];
2308 if (FLOAT_MODE_P (<MODE>mode))
2310 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2311 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2312 tmp = gen_reg_rtx (<V_cmp_result>mode);
2314 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2315 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2319 if (tmp != operands[0])
2320 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2325 (define_expand "vcond_mask_<mode><v_cmp_result>"
2326 [(match_operand:VALLDI 0 "register_operand")
2327 (match_operand:VALLDI 1 "nonmemory_operand")
2328 (match_operand:VALLDI 2 "nonmemory_operand")
2329 (match_operand:<V_cmp_result> 3 "register_operand")]
2332 /* If we have (a = (P) ? -1 : 0);
2333 Then we can simply move the generated mask (result must be int). */
2334 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2335 && operands[2] == CONST0_RTX (<MODE>mode))
2336 emit_move_insn (operands[0], operands[3]);
2337 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2338 else if (operands[1] == CONST0_RTX (<MODE>mode)
2339 && operands[2] == CONSTM1_RTX (<MODE>mode))
2340 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[3]));
2343 if (!REG_P (operands[1]))
2344 operands[1] = force_reg (<MODE>mode, operands[1]);
2345 if (!REG_P (operands[2]))
2346 operands[2] = force_reg (<MODE>mode, operands[2]);
2347 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2348 operands[1], operands[2]));
2354 ;; Patterns comparing two vectors to produce a mask.
2356 (define_expand "vec_cmp<mode><mode>"
2357 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2358 (match_operator 1 "comparison_operator"
2359 [(match_operand:VSDQ_I_DI 2 "register_operand")
2360 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2363 rtx mask = operands[0];
2364 enum rtx_code code = GET_CODE (operands[1]);
2374 if (operands[3] == CONST0_RTX (<MODE>mode))
2379 if (!REG_P (operands[3]))
2380 operands[3] = force_reg (<MODE>mode, operands[3]);
2388 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2392 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2396 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2400 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2404 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2408 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2412 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2416 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2420 /* Handle NE as !EQ. */
2421 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2422 emit_insn (gen_one_cmpl<v_cmp_result>2 (mask, mask));
2426 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2436 (define_expand "vec_cmp<mode><v_cmp_result>"
2437 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2438 (match_operator 1 "comparison_operator"
2439 [(match_operand:VDQF 2 "register_operand")
2440 (match_operand:VDQF 3 "nonmemory_operand")]))]
2443 int use_zero_form = 0;
2444 enum rtx_code code = GET_CODE (operands[1]);
2445 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
2447 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2456 if (operands[3] == CONST0_RTX (<MODE>mode))
2463 if (!REG_P (operands[3]))
2464 operands[3] = force_reg (<MODE>mode, operands[3]);
2474 comparison = gen_aarch64_cmlt<mode>;
2479 std::swap (operands[2], operands[3]);
2483 comparison = gen_aarch64_cmgt<mode>;
2488 comparison = gen_aarch64_cmle<mode>;
2493 std::swap (operands[2], operands[3]);
2497 comparison = gen_aarch64_cmge<mode>;
2501 comparison = gen_aarch64_cmeq<mode>;
2518 /* FCM returns false for lanes which are unordered, so if we use
2519 the inverse of the comparison we actually want to emit, then
2520 invert the result, we will end up with the correct result.
2521 Note that a NE NaN and NaN NE b are true for all a, b.
2523 Our transformations are:
2524 a UNGE b -> !(b GT a)
2525 a UNGT b -> !(b GE a)
2526 a UNLE b -> !(a GT b)
2527 a UNLT b -> !(a GE b)
2528 a NE b -> !(a EQ b) */
2529 gcc_assert (comparison != NULL);
2530 emit_insn (comparison (operands[0], operands[2], operands[3]));
2531 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2539 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2540 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2546 gcc_assert (comparison != NULL);
2547 emit_insn (comparison (operands[0], operands[2], operands[3]));
2551 /* We first check (a > b || b > a) which is !UNEQ, inverting
2552 this result will then give us (a == b || a UNORDERED b). */
2553 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2554 operands[2], operands[3]));
2555 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2556 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2557 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2561 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2562 UNORDERED as !ORDERED. */
2563 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2564 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2565 operands[3], operands[2]));
2566 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2567 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2571 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2572 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2573 operands[3], operands[2]));
2574 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2584 (define_expand "vec_cmpu<mode><mode>"
2585 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2586 (match_operator 1 "comparison_operator"
2587 [(match_operand:VSDQ_I_DI 2 "register_operand")
2588 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2591 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2592 operands[2], operands[3]));
2596 (define_expand "vcond<mode><mode>"
2597 [(set (match_operand:VALLDI 0 "register_operand")
2598 (if_then_else:VALLDI
2599 (match_operator 3 "comparison_operator"
2600 [(match_operand:VALLDI 4 "register_operand")
2601 (match_operand:VALLDI 5 "nonmemory_operand")])
2602 (match_operand:VALLDI 1 "nonmemory_operand")
2603 (match_operand:VALLDI 2 "nonmemory_operand")))]
2606 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2607 enum rtx_code code = GET_CODE (operands[3]);
2609 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2610 it as well as switch operands 1/2 in order to avoid the additional
2614 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2615 operands[4], operands[5]);
2616 std::swap (operands[1], operands[2]);
2618 emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2619 operands[4], operands[5]));
2620 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2621 operands[2], mask));
2626 (define_expand "vcond<v_cmp_mixed><mode>"
2627 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2628 (if_then_else:<V_cmp_mixed>
2629 (match_operator 3 "comparison_operator"
2630 [(match_operand:VDQF_COND 4 "register_operand")
2631 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2632 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2633 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2636 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2637 enum rtx_code code = GET_CODE (operands[3]);
2639 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2640 it as well as switch operands 1/2 in order to avoid the additional
2644 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2645 operands[4], operands[5]);
2646 std::swap (operands[1], operands[2]);
2648 emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2649 operands[4], operands[5]));
2650 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
2651 operands[0], operands[1],
2652 operands[2], mask));
2657 (define_expand "vcondu<mode><mode>"
2658 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2659 (if_then_else:VSDQ_I_DI
2660 (match_operator 3 "comparison_operator"
2661 [(match_operand:VSDQ_I_DI 4 "register_operand")
2662 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2663 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2664 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2667 rtx mask = gen_reg_rtx (<MODE>mode);
2668 enum rtx_code code = GET_CODE (operands[3]);
2670 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2671 it as well as switch operands 1/2 in order to avoid the additional
2675 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2676 operands[4], operands[5]);
2677 std::swap (operands[1], operands[2]);
2679 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2680 operands[4], operands[5]));
2681 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2682 operands[2], mask));
2686 (define_expand "vcondu<mode><v_cmp_mixed>"
2687 [(set (match_operand:VDQF 0 "register_operand")
2689 (match_operator 3 "comparison_operator"
2690 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2691 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2692 (match_operand:VDQF 1 "nonmemory_operand")
2693 (match_operand:VDQF 2 "nonmemory_operand")))]
2696 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2697 enum rtx_code code = GET_CODE (operands[3]);
2699 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2700 it as well as switch operands 1/2 in order to avoid the additional
2704 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2705 operands[4], operands[5]);
2706 std::swap (operands[1], operands[2]);
2708 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2710 operands[4], operands[5]));
2711 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2712 operands[2], mask));
2716 ;; Patterns for AArch64 SIMD Intrinsics.
2718 ;; Lane extraction with sign extension to general purpose register.
2719 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2720 [(set (match_operand:GPI 0 "register_operand" "=r")
2723 (match_operand:VDQQH 1 "register_operand" "w")
2724 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2727 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2728 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2730 [(set_attr "type" "neon_to_gp<q>")]
2733 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2734 [(set (match_operand:SI 0 "register_operand" "=r")
2737 (match_operand:VDQQH 1 "register_operand" "w")
2738 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2741 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2742 return "umov\\t%w0, %1.<Vetype>[%2]";
2744 [(set_attr "type" "neon_to_gp<q>")]
2747 ;; Lane extraction of a value, neither sign nor zero extension
2748 ;; is guaranteed so upper bits should be considered undefined.
2749 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2750 (define_insn "aarch64_get_lane<mode>"
2751 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2753 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2754 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2757 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2758 switch (which_alternative)
2761 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2763 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2765 return "st1\\t{%1.<Vetype>}[%2], %0";
2770 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2773 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2776 (define_insn "*aarch64_combinez<mode>"
2777 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2779 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2780 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2781 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2786 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2787 (set_attr "simd" "yes,*,yes")
2788 (set_attr "fp" "*,yes,*")]
2791 (define_insn "*aarch64_combinez_be<mode>"
2792 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2794 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2795 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2796 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2801 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2802 (set_attr "simd" "yes,*,yes")
2803 (set_attr "fp" "*,yes,*")]
2806 (define_expand "aarch64_combine<mode>"
2807 [(match_operand:<VDBL> 0 "register_operand")
2808 (match_operand:VDC 1 "register_operand")
2809 (match_operand:VDC 2 "register_operand")]
2813 if (BYTES_BIG_ENDIAN)
2823 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2828 (define_insn_and_split "aarch64_combine_internal<mode>"
2829 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2830 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2831 (match_operand:VDC 2 "register_operand" "w")))]
2834 "&& reload_completed"
2837 if (BYTES_BIG_ENDIAN)
2838 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2840 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2843 [(set_attr "type" "multiple")]
2846 (define_expand "aarch64_simd_combine<mode>"
2847 [(match_operand:<VDBL> 0 "register_operand")
2848 (match_operand:VDC 1 "register_operand")
2849 (match_operand:VDC 2 "register_operand")]
2852 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2853 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2856 [(set_attr "type" "multiple")]
2859 ;; <su><addsub>l<q>.
2861 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2862 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2863 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2864 (match_operand:VQW 1 "register_operand" "w")
2865 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2866 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2867 (match_operand:VQW 2 "register_operand" "w")
2870 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2871 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2874 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2876 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2877 (match_operand:VQW 1 "register_operand" "w")
2878 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2879 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2880 (match_operand:VQW 2 "register_operand" "w")
2883 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2884 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2888 (define_expand "aarch64_saddl2<mode>"
2889 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2890 (match_operand:VQW 1 "register_operand" "w")
2891 (match_operand:VQW 2 "register_operand" "w")]
2894 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2895 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2900 (define_expand "aarch64_uaddl2<mode>"
2901 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2902 (match_operand:VQW 1 "register_operand" "w")
2903 (match_operand:VQW 2 "register_operand" "w")]
2906 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2907 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2912 (define_expand "aarch64_ssubl2<mode>"
2913 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2914 (match_operand:VQW 1 "register_operand" "w")
2915 (match_operand:VQW 2 "register_operand" "w")]
2918 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2919 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2924 (define_expand "aarch64_usubl2<mode>"
2925 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2926 (match_operand:VQW 1 "register_operand" "w")
2927 (match_operand:VQW 2 "register_operand" "w")]
2930 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2931 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2936 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2937 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2938 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2939 (match_operand:VD_BHSI 1 "register_operand" "w"))
2941 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2943 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2944 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2947 ;; <su><addsub>w<q>.
2949 (define_expand "widen_ssum<mode>3"
2950 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2951 (plus:<VDBLW> (sign_extend:<VDBLW>
2952 (match_operand:VQW 1 "register_operand" ""))
2953 (match_operand:<VDBLW> 2 "register_operand" "")))]
2956 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2957 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2959 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2961 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2966 (define_expand "widen_ssum<mode>3"
2967 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2968 (plus:<VWIDE> (sign_extend:<VWIDE>
2969 (match_operand:VD_BHSI 1 "register_operand" ""))
2970 (match_operand:<VWIDE> 2 "register_operand" "")))]
2973 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2977 (define_expand "widen_usum<mode>3"
2978 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2979 (plus:<VDBLW> (zero_extend:<VDBLW>
2980 (match_operand:VQW 1 "register_operand" ""))
2981 (match_operand:<VDBLW> 2 "register_operand" "")))]
2984 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2985 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2987 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2989 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2994 (define_expand "widen_usum<mode>3"
2995 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2996 (plus:<VWIDE> (zero_extend:<VWIDE>
2997 (match_operand:VD_BHSI 1 "register_operand" ""))
2998 (match_operand:<VWIDE> 2 "register_operand" "")))]
3001 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3005 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3006 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3007 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3009 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3011 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3012 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3015 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3016 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3017 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3020 (match_operand:VQW 2 "register_operand" "w")
3021 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3023 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3024 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3027 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3028 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3029 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3032 (match_operand:VQW 2 "register_operand" "w")
3033 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3035 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3036 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3039 (define_expand "aarch64_saddw2<mode>"
3040 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3041 (match_operand:<VWIDE> 1 "register_operand" "w")
3042 (match_operand:VQW 2 "register_operand" "w")]
3045 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3046 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3051 (define_expand "aarch64_uaddw2<mode>"
3052 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3053 (match_operand:<VWIDE> 1 "register_operand" "w")
3054 (match_operand:VQW 2 "register_operand" "w")]
3057 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3058 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3064 (define_expand "aarch64_ssubw2<mode>"
3065 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3066 (match_operand:<VWIDE> 1 "register_operand" "w")
3067 (match_operand:VQW 2 "register_operand" "w")]
3070 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3071 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3076 (define_expand "aarch64_usubw2<mode>"
3077 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3078 (match_operand:<VWIDE> 1 "register_operand" "w")
3079 (match_operand:VQW 2 "register_operand" "w")]
3082 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3083 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3088 ;; <su><r>h<addsub>.
3090 (define_insn "aarch64_<sur>h<addsub><mode>"
3091 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3092 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3093 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3096 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3097 [(set_attr "type" "neon_<addsub>_halve<q>")]
3100 ;; <r><addsub>hn<q>.
3102 (define_insn "aarch64_<sur><addsub>hn<mode>"
3103 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3104 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3105 (match_operand:VQN 2 "register_operand" "w")]
3108 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3109 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3112 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3113 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3114 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3115 (match_operand:VQN 2 "register_operand" "w")
3116 (match_operand:VQN 3 "register_operand" "w")]
3119 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3120 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3125 (define_insn "aarch64_pmul<mode>"
3126 [(set (match_operand:VB 0 "register_operand" "=w")
3127 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3128 (match_operand:VB 2 "register_operand" "w")]
3131 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3132 [(set_attr "type" "neon_mul_<Vetype><q>")]
3137 (define_insn "aarch64_fmulx<mode>"
3138 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3140 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3141 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3144 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3145 [(set_attr "type" "neon_fp_mul_<stype>")]
3148 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3150 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3151 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3153 [(match_operand:VDQSF 1 "register_operand" "w")
3154 (vec_duplicate:VDQSF
3156 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3157 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3161 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3162 INTVAL (operands[3])));
3163 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3165 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3168 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3170 (define_insn "*aarch64_mulx_elt<mode>"
3171 [(set (match_operand:VDQF 0 "register_operand" "=w")
3173 [(match_operand:VDQF 1 "register_operand" "w")
3176 (match_operand:VDQF 2 "register_operand" "w")
3177 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3181 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3182 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3184 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3189 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3190 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3192 [(match_operand:VHSDF 1 "register_operand" "w")
3193 (vec_duplicate:VHSDF
3194 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3197 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3198 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3201 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3202 ;; vmulxd_lane_f64 == vmulx_lane_f64
3203 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3205 (define_insn "*aarch64_vgetfmulx<mode>"
3206 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3208 [(match_operand:<VEL> 1 "register_operand" "w")
3210 (match_operand:VDQF 2 "register_operand" "w")
3211 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3215 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3216 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3218 [(set_attr "type" "fmul<Vetype>")]
3222 (define_insn "aarch64_<su_optab><optab><mode>"
3223 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3224 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3225 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3227 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3228 [(set_attr "type" "neon_<optab><q>")]
3231 ;; suqadd and usqadd
3233 (define_insn "aarch64_<sur>qadd<mode>"
3234 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3235 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3236 (match_operand:VSDQ_I 2 "register_operand" "w")]
3239 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3240 [(set_attr "type" "neon_qadd<q>")]
3245 (define_insn "aarch64_sqmovun<mode>"
3246 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3247 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3250 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3251 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3254 ;; sqmovn and uqmovn
3256 (define_insn "aarch64_<sur>qmovn<mode>"
3257 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3258 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3261 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3262 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3267 (define_insn "aarch64_s<optab><mode>"
3268 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3270 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3272 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3273 [(set_attr "type" "neon_<optab><q>")]
3278 (define_insn "aarch64_sq<r>dmulh<mode>"
3279 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3281 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3282 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3285 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3286 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3291 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3292 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3294 [(match_operand:VDQHS 1 "register_operand" "w")
3296 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3297 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3301 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3302 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3303 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3306 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3307 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3309 [(match_operand:VDQHS 1 "register_operand" "w")
3311 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3312 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3316 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3317 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3318 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3321 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3322 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3324 [(match_operand:SD_HSI 1 "register_operand" "w")
3326 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3327 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3331 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3332 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3333 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3336 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3337 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3339 [(match_operand:SD_HSI 1 "register_operand" "w")
3341 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3342 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3346 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3347 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3348 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3353 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3354 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3356 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3357 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3358 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3361 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3362 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3365 ;; sqrdml[as]h_lane.
3367 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3368 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3370 [(match_operand:VDQHS 1 "register_operand" "0")
3371 (match_operand:VDQHS 2 "register_operand" "w")
3373 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3374 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3378 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3380 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3382 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3385 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3386 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3388 [(match_operand:SD_HSI 1 "register_operand" "0")
3389 (match_operand:SD_HSI 2 "register_operand" "w")
3391 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3392 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3396 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3398 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3400 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3403 ;; sqrdml[as]h_laneq.
3405 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3406 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3408 [(match_operand:VDQHS 1 "register_operand" "0")
3409 (match_operand:VDQHS 2 "register_operand" "w")
3411 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3412 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3416 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3418 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3420 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3423 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3424 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3426 [(match_operand:SD_HSI 1 "register_operand" "0")
3427 (match_operand:SD_HSI 2 "register_operand" "w")
3429 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3430 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3434 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3436 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3438 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3443 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3444 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3446 (match_operand:<VWIDE> 1 "register_operand" "0")
3449 (sign_extend:<VWIDE>
3450 (match_operand:VSD_HSI 2 "register_operand" "w"))
3451 (sign_extend:<VWIDE>
3452 (match_operand:VSD_HSI 3 "register_operand" "w")))
3455 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3456 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3461 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3462 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3464 (match_operand:<VWIDE> 1 "register_operand" "0")
3467 (sign_extend:<VWIDE>
3468 (match_operand:VD_HSI 2 "register_operand" "w"))
3469 (sign_extend:<VWIDE>
3470 (vec_duplicate:VD_HSI
3472 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3473 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3478 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3480 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3482 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3485 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3486 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3488 (match_operand:<VWIDE> 1 "register_operand" "0")
3491 (sign_extend:<VWIDE>
3492 (match_operand:VD_HSI 2 "register_operand" "w"))
3493 (sign_extend:<VWIDE>
3494 (vec_duplicate:VD_HSI
3496 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3497 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3502 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3504 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3506 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3509 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3510 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3512 (match_operand:<VWIDE> 1 "register_operand" "0")
3515 (sign_extend:<VWIDE>
3516 (match_operand:SD_HSI 2 "register_operand" "w"))
3517 (sign_extend:<VWIDE>
3519 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3520 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3525 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3527 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3529 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3532 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3533 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3535 (match_operand:<VWIDE> 1 "register_operand" "0")
3538 (sign_extend:<VWIDE>
3539 (match_operand:SD_HSI 2 "register_operand" "w"))
3540 (sign_extend:<VWIDE>
3542 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3543 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3548 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3550 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3552 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3557 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3558 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3560 (match_operand:<VWIDE> 1 "register_operand" "0")
3563 (sign_extend:<VWIDE>
3564 (match_operand:VD_HSI 2 "register_operand" "w"))
3565 (sign_extend:<VWIDE>
3566 (vec_duplicate:VD_HSI
3567 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3570 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3571 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3576 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3577 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3579 (match_operand:<VWIDE> 1 "register_operand" "0")
3582 (sign_extend:<VWIDE>
3584 (match_operand:VQ_HSI 2 "register_operand" "w")
3585 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3586 (sign_extend:<VWIDE>
3588 (match_operand:VQ_HSI 3 "register_operand" "w")
3592 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3593 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3596 (define_expand "aarch64_sqdmlal2<mode>"
3597 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3598 (match_operand:<VWIDE> 1 "register_operand" "w")
3599 (match_operand:VQ_HSI 2 "register_operand" "w")
3600 (match_operand:VQ_HSI 3 "register_operand" "w")]
3603 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3604 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3605 operands[2], operands[3], p));
3609 (define_expand "aarch64_sqdmlsl2<mode>"
3610 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3611 (match_operand:<VWIDE> 1 "register_operand" "w")
3612 (match_operand:VQ_HSI 2 "register_operand" "w")
3613 (match_operand:VQ_HSI 3 "register_operand" "w")]
3616 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3617 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3618 operands[2], operands[3], p));
3624 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3625 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3627 (match_operand:<VWIDE> 1 "register_operand" "0")
3630 (sign_extend:<VWIDE>
3632 (match_operand:VQ_HSI 2 "register_operand" "w")
3633 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3634 (sign_extend:<VWIDE>
3635 (vec_duplicate:<VHALF>
3637 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3638 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3643 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3645 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3647 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3650 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3651 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3653 (match_operand:<VWIDE> 1 "register_operand" "0")
3656 (sign_extend:<VWIDE>
3658 (match_operand:VQ_HSI 2 "register_operand" "w")
3659 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3660 (sign_extend:<VWIDE>
3661 (vec_duplicate:<VHALF>
3663 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3664 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3669 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3671 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3673 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3676 (define_expand "aarch64_sqdmlal2_lane<mode>"
3677 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3678 (match_operand:<VWIDE> 1 "register_operand" "w")
3679 (match_operand:VQ_HSI 2 "register_operand" "w")
3680 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3681 (match_operand:SI 4 "immediate_operand" "i")]
3684 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3685 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3686 operands[2], operands[3],
3691 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3692 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3693 (match_operand:<VWIDE> 1 "register_operand" "w")
3694 (match_operand:VQ_HSI 2 "register_operand" "w")
3695 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3696 (match_operand:SI 4 "immediate_operand" "i")]
3699 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3700 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3701 operands[2], operands[3],
3706 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3707 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3708 (match_operand:<VWIDE> 1 "register_operand" "w")
3709 (match_operand:VQ_HSI 2 "register_operand" "w")
3710 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3711 (match_operand:SI 4 "immediate_operand" "i")]
3714 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3715 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3716 operands[2], operands[3],
3721 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3722 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3723 (match_operand:<VWIDE> 1 "register_operand" "w")
3724 (match_operand:VQ_HSI 2 "register_operand" "w")
3725 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3726 (match_operand:SI 4 "immediate_operand" "i")]
3729 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3730 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3731 operands[2], operands[3],
3736 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3737 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3739 (match_operand:<VWIDE> 1 "register_operand" "0")
3742 (sign_extend:<VWIDE>
3744 (match_operand:VQ_HSI 2 "register_operand" "w")
3745 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3746 (sign_extend:<VWIDE>
3747 (vec_duplicate:<VHALF>
3748 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3751 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3752 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3755 (define_expand "aarch64_sqdmlal2_n<mode>"
3756 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3757 (match_operand:<VWIDE> 1 "register_operand" "w")
3758 (match_operand:VQ_HSI 2 "register_operand" "w")
3759 (match_operand:<VEL> 3 "register_operand" "w")]
3762 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3763 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3764 operands[2], operands[3],
3769 (define_expand "aarch64_sqdmlsl2_n<mode>"
3770 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3771 (match_operand:<VWIDE> 1 "register_operand" "w")
3772 (match_operand:VQ_HSI 2 "register_operand" "w")
3773 (match_operand:<VEL> 3 "register_operand" "w")]
3776 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3777 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3778 operands[2], operands[3],
3785 (define_insn "aarch64_sqdmull<mode>"
3786 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3789 (sign_extend:<VWIDE>
3790 (match_operand:VSD_HSI 1 "register_operand" "w"))
3791 (sign_extend:<VWIDE>
3792 (match_operand:VSD_HSI 2 "register_operand" "w")))
3795 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3796 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3801 (define_insn "aarch64_sqdmull_lane<mode>"
3802 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3805 (sign_extend:<VWIDE>
3806 (match_operand:VD_HSI 1 "register_operand" "w"))
3807 (sign_extend:<VWIDE>
3808 (vec_duplicate:VD_HSI
3810 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3811 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3816 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3817 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3819 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3822 (define_insn "aarch64_sqdmull_laneq<mode>"
3823 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3826 (sign_extend:<VWIDE>
3827 (match_operand:VD_HSI 1 "register_operand" "w"))
3828 (sign_extend:<VWIDE>
3829 (vec_duplicate:VD_HSI
3831 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3832 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3837 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3838 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3840 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3843 (define_insn "aarch64_sqdmull_lane<mode>"
3844 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3847 (sign_extend:<VWIDE>
3848 (match_operand:SD_HSI 1 "register_operand" "w"))
3849 (sign_extend:<VWIDE>
3851 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3852 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3857 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3858 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3860 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3863 (define_insn "aarch64_sqdmull_laneq<mode>"
3864 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3867 (sign_extend:<VWIDE>
3868 (match_operand:SD_HSI 1 "register_operand" "w"))
3869 (sign_extend:<VWIDE>
3871 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3872 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3877 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3878 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3880 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3885 (define_insn "aarch64_sqdmull_n<mode>"
3886 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3889 (sign_extend:<VWIDE>
3890 (match_operand:VD_HSI 1 "register_operand" "w"))
3891 (sign_extend:<VWIDE>
3892 (vec_duplicate:VD_HSI
3893 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3897 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3898 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3905 (define_insn "aarch64_sqdmull2<mode>_internal"
3906 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3909 (sign_extend:<VWIDE>
3911 (match_operand:VQ_HSI 1 "register_operand" "w")
3912 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3913 (sign_extend:<VWIDE>
3915 (match_operand:VQ_HSI 2 "register_operand" "w")
3920 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3921 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3924 (define_expand "aarch64_sqdmull2<mode>"
3925 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3926 (match_operand:VQ_HSI 1 "register_operand" "w")
3927 (match_operand:VQ_HSI 2 "register_operand" "w")]
3930 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3931 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3938 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3939 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3942 (sign_extend:<VWIDE>
3944 (match_operand:VQ_HSI 1 "register_operand" "w")
3945 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3946 (sign_extend:<VWIDE>
3947 (vec_duplicate:<VHALF>
3949 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3950 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3955 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3956 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3958 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3961 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3962 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3965 (sign_extend:<VWIDE>
3967 (match_operand:VQ_HSI 1 "register_operand" "w")
3968 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3969 (sign_extend:<VWIDE>
3970 (vec_duplicate:<VHALF>
3972 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3973 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3978 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3979 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3981 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3984 (define_expand "aarch64_sqdmull2_lane<mode>"
3985 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3986 (match_operand:VQ_HSI 1 "register_operand" "w")
3987 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3988 (match_operand:SI 3 "immediate_operand" "i")]
3991 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3992 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3993 operands[2], operands[3],
3998 (define_expand "aarch64_sqdmull2_laneq<mode>"
3999 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4000 (match_operand:VQ_HSI 1 "register_operand" "w")
4001 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4002 (match_operand:SI 3 "immediate_operand" "i")]
4005 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4006 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4007 operands[2], operands[3],
4014 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4015 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4018 (sign_extend:<VWIDE>
4020 (match_operand:VQ_HSI 1 "register_operand" "w")
4021 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4022 (sign_extend:<VWIDE>
4023 (vec_duplicate:<VHALF>
4024 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4028 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4029 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4032 (define_expand "aarch64_sqdmull2_n<mode>"
4033 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4034 (match_operand:VQ_HSI 1 "register_operand" "w")
4035 (match_operand:<VEL> 2 "register_operand" "w")]
4038 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4039 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4046 (define_insn "aarch64_<sur>shl<mode>"
4047 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4049 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4050 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4053 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4054 [(set_attr "type" "neon_shift_reg<q>")]
4060 (define_insn "aarch64_<sur>q<r>shl<mode>"
4061 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4063 [(match_operand:VSDQ_I 1 "register_operand" "w")
4064 (match_operand:VSDQ_I 2 "register_operand" "w")]
4067 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4068 [(set_attr "type" "neon_sat_shift_reg<q>")]
4073 (define_insn "aarch64_<sur>shll_n<mode>"
4074 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4075 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4077 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4081 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4082 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4084 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4086 [(set_attr "type" "neon_shift_imm_long")]
4091 (define_insn "aarch64_<sur>shll2_n<mode>"
4092 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4093 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4094 (match_operand:SI 2 "immediate_operand" "i")]
4098 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4099 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4101 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4103 [(set_attr "type" "neon_shift_imm_long")]
4108 (define_insn "aarch64_<sur>shr_n<mode>"
4109 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4110 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4112 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4115 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4116 [(set_attr "type" "neon_sat_shift_imm<q>")]
4121 (define_insn "aarch64_<sur>sra_n<mode>"
4122 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4123 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4124 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4126 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4129 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4130 [(set_attr "type" "neon_shift_acc<q>")]
4135 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4136 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4137 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4138 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4140 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4143 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4144 [(set_attr "type" "neon_shift_imm<q>")]
4149 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4150 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4151 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4153 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4156 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4157 [(set_attr "type" "neon_sat_shift_imm<q>")]
4163 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4164 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4165 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4167 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4170 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4171 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4175 ;; cm(eq|ge|gt|lt|le)
4176 ;; Note, we have constraints for Dz and Z as different expanders
4177 ;; have different ideas of what should be passed to this pattern.
4179 (define_insn "aarch64_cm<optab><mode>"
4180 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4182 (COMPARISONS:<V_cmp_result>
4183 (match_operand:VDQ_I 1 "register_operand" "w,w")
4184 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4188 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4189 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4190 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4193 (define_insn_and_split "aarch64_cm<optab>di"
4194 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4197 (match_operand:DI 1 "register_operand" "w,w,r")
4198 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4200 (clobber (reg:CC CC_REGNUM))]
4204 [(set (match_operand:DI 0 "register_operand")
4207 (match_operand:DI 1 "register_operand")
4208 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4211 /* If we are in the general purpose register file,
4212 we split to a sequence of comparison and store. */
4213 if (GP_REGNUM_P (REGNO (operands[0]))
4214 && GP_REGNUM_P (REGNO (operands[1])))
4216 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4217 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4218 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4219 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4222 /* Otherwise, we expand to a similar pattern which does not
4223 clobber CC_REGNUM. */
4225 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4228 (define_insn "*aarch64_cm<optab>di"
4229 [(set (match_operand:DI 0 "register_operand" "=w,w")
4232 (match_operand:DI 1 "register_operand" "w,w")
4233 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4235 "TARGET_SIMD && reload_completed"
4237 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4238 cm<optab>\t%d0, %d1, #0"
4239 [(set_attr "type" "neon_compare, neon_compare_zero")]
4244 (define_insn "aarch64_cm<optab><mode>"
4245 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4247 (UCOMPARISONS:<V_cmp_result>
4248 (match_operand:VDQ_I 1 "register_operand" "w")
4249 (match_operand:VDQ_I 2 "register_operand" "w")
4252 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4253 [(set_attr "type" "neon_compare<q>")]
4256 (define_insn_and_split "aarch64_cm<optab>di"
4257 [(set (match_operand:DI 0 "register_operand" "=w,r")
4260 (match_operand:DI 1 "register_operand" "w,r")
4261 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4263 (clobber (reg:CC CC_REGNUM))]
4267 [(set (match_operand:DI 0 "register_operand")
4270 (match_operand:DI 1 "register_operand")
4271 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4274 /* If we are in the general purpose register file,
4275 we split to a sequence of comparison and store. */
4276 if (GP_REGNUM_P (REGNO (operands[0]))
4277 && GP_REGNUM_P (REGNO (operands[1])))
4279 machine_mode mode = CCmode;
4280 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4281 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4282 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4285 /* Otherwise, we expand to a similar pattern which does not
4286 clobber CC_REGNUM. */
4288 [(set_attr "type" "neon_compare,multiple")]
4291 (define_insn "*aarch64_cm<optab>di"
4292 [(set (match_operand:DI 0 "register_operand" "=w")
4295 (match_operand:DI 1 "register_operand" "w")
4296 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4298 "TARGET_SIMD && reload_completed"
4299 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4300 [(set_attr "type" "neon_compare")]
4305 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4306 ;; we don't have any insns using ne, and aarch64_vcond outputs
4307 ;; not (neg (eq (and x y) 0))
4308 ;; which is rewritten by simplify_rtx as
4309 ;; plus (eq (and x y) 0) -1.
4311 (define_insn "aarch64_cmtst<mode>"
4312 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4313 (plus:<V_cmp_result>
4316 (match_operand:VDQ_I 1 "register_operand" "w")
4317 (match_operand:VDQ_I 2 "register_operand" "w"))
4318 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4319 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4322 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4323 [(set_attr "type" "neon_tst<q>")]
4326 (define_insn_and_split "aarch64_cmtstdi"
4327 [(set (match_operand:DI 0 "register_operand" "=w,r")
4331 (match_operand:DI 1 "register_operand" "w,r")
4332 (match_operand:DI 2 "register_operand" "w,r"))
4334 (clobber (reg:CC CC_REGNUM))]
4338 [(set (match_operand:DI 0 "register_operand")
4342 (match_operand:DI 1 "register_operand")
4343 (match_operand:DI 2 "register_operand"))
4346 /* If we are in the general purpose register file,
4347 we split to a sequence of comparison and store. */
4348 if (GP_REGNUM_P (REGNO (operands[0]))
4349 && GP_REGNUM_P (REGNO (operands[1])))
4351 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4352 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4353 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4354 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4355 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4358 /* Otherwise, we expand to a similar pattern which does not
4359 clobber CC_REGNUM. */
4361 [(set_attr "type" "neon_tst,multiple")]
4364 (define_insn "*aarch64_cmtstdi"
4365 [(set (match_operand:DI 0 "register_operand" "=w")
4369 (match_operand:DI 1 "register_operand" "w")
4370 (match_operand:DI 2 "register_operand" "w"))
4373 "cmtst\t%d0, %d1, %d2"
4374 [(set_attr "type" "neon_tst")]
4377 ;; fcm(eq|ge|gt|le|lt)
4379 (define_insn "aarch64_cm<optab><mode>"
4380 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4382 (COMPARISONS:<V_cmp_result>
4383 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4384 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4388 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4389 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4390 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4394 ;; Note we can also handle what would be fac(le|lt) by
4395 ;; generating fac(ge|gt).
4397 (define_insn "aarch64_fac<optab><mode>"
4398 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4400 (FAC_COMPARISONS:<V_cmp_result>
4402 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4404 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4407 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4408 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4413 (define_insn "aarch64_addp<mode>"
4414 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4416 [(match_operand:VD_BHSI 1 "register_operand" "w")
4417 (match_operand:VD_BHSI 2 "register_operand" "w")]
4420 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4421 [(set_attr "type" "neon_reduc_add<q>")]
4424 (define_insn "aarch64_addpdi"
4425 [(set (match_operand:DI 0 "register_operand" "=w")
4427 [(match_operand:V2DI 1 "register_operand" "w")]
4431 [(set_attr "type" "neon_reduc_add")]
4436 (define_expand "sqrt<mode>2"
4437 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4438 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4441 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4445 (define_insn "*sqrt<mode>2"
4446 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4447 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4449 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4450 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4453 ;; Patterns for vector struct loads and stores.
4455 (define_insn "aarch64_simd_ld2<mode>"
4456 [(set (match_operand:OI 0 "register_operand" "=w")
4457 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4458 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4461 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4462 [(set_attr "type" "neon_load2_2reg<q>")]
4465 (define_insn "aarch64_simd_ld2r<mode>"
4466 [(set (match_operand:OI 0 "register_operand" "=w")
4467 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4468 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4471 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4472 [(set_attr "type" "neon_load2_all_lanes<q>")]
4475 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4476 [(set (match_operand:OI 0 "register_operand" "=w")
4477 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4478 (match_operand:OI 2 "register_operand" "0")
4479 (match_operand:SI 3 "immediate_operand" "i")
4480 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4484 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4485 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4487 [(set_attr "type" "neon_load2_one_lane")]
4490 (define_expand "vec_load_lanesoi<mode>"
4491 [(set (match_operand:OI 0 "register_operand" "=w")
4492 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4493 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4497 if (BYTES_BIG_ENDIAN)
4499 rtx tmp = gen_reg_rtx (OImode);
4500 rtx mask = aarch64_reverse_mask (<MODE>mode);
4501 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4502 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4505 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4509 (define_insn "aarch64_simd_st2<mode>"
4510 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4511 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4512 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4515 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4516 [(set_attr "type" "neon_store2_2reg<q>")]
4519 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4520 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4521 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4522 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4523 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4524 (match_operand:SI 2 "immediate_operand" "i")]
4528 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4529 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4531 [(set_attr "type" "neon_store2_one_lane<q>")]
4534 (define_expand "vec_store_lanesoi<mode>"
4535 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4536 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4537 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4541 if (BYTES_BIG_ENDIAN)
4543 rtx tmp = gen_reg_rtx (OImode);
4544 rtx mask = aarch64_reverse_mask (<MODE>mode);
4545 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4546 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4549 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4553 (define_insn "aarch64_simd_ld3<mode>"
4554 [(set (match_operand:CI 0 "register_operand" "=w")
4555 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4556 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4559 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4560 [(set_attr "type" "neon_load3_3reg<q>")]
4563 (define_insn "aarch64_simd_ld3r<mode>"
4564 [(set (match_operand:CI 0 "register_operand" "=w")
4565 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4566 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4569 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4570 [(set_attr "type" "neon_load3_all_lanes<q>")]
4573 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4574 [(set (match_operand:CI 0 "register_operand" "=w")
4575 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4576 (match_operand:CI 2 "register_operand" "0")
4577 (match_operand:SI 3 "immediate_operand" "i")
4578 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4582 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4583 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4585 [(set_attr "type" "neon_load3_one_lane")]
4588 (define_expand "vec_load_lanesci<mode>"
4589 [(set (match_operand:CI 0 "register_operand" "=w")
4590 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4591 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4595 if (BYTES_BIG_ENDIAN)
4597 rtx tmp = gen_reg_rtx (CImode);
4598 rtx mask = aarch64_reverse_mask (<MODE>mode);
4599 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4600 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4603 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4607 (define_insn "aarch64_simd_st3<mode>"
4608 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4609 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4610 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4613 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4614 [(set_attr "type" "neon_store3_3reg<q>")]
4617 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4618 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4619 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4620 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4621 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4622 (match_operand:SI 2 "immediate_operand" "i")]
4626 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4627 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4629 [(set_attr "type" "neon_store3_one_lane<q>")]
4632 (define_expand "vec_store_lanesci<mode>"
4633 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4634 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4635 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4639 if (BYTES_BIG_ENDIAN)
4641 rtx tmp = gen_reg_rtx (CImode);
4642 rtx mask = aarch64_reverse_mask (<MODE>mode);
4643 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4644 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4647 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4651 (define_insn "aarch64_simd_ld4<mode>"
4652 [(set (match_operand:XI 0 "register_operand" "=w")
4653 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4654 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4657 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4658 [(set_attr "type" "neon_load4_4reg<q>")]
4661 (define_insn "aarch64_simd_ld4r<mode>"
4662 [(set (match_operand:XI 0 "register_operand" "=w")
4663 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4664 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4667 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4668 [(set_attr "type" "neon_load4_all_lanes<q>")]
4671 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4672 [(set (match_operand:XI 0 "register_operand" "=w")
4673 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4674 (match_operand:XI 2 "register_operand" "0")
4675 (match_operand:SI 3 "immediate_operand" "i")
4676 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4680 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4681 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4683 [(set_attr "type" "neon_load4_one_lane")]
4686 (define_expand "vec_load_lanesxi<mode>"
4687 [(set (match_operand:XI 0 "register_operand" "=w")
4688 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4689 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4693 if (BYTES_BIG_ENDIAN)
4695 rtx tmp = gen_reg_rtx (XImode);
4696 rtx mask = aarch64_reverse_mask (<MODE>mode);
4697 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4698 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4701 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4705 (define_insn "aarch64_simd_st4<mode>"
4706 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4707 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4708 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4711 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4712 [(set_attr "type" "neon_store4_4reg<q>")]
4715 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4716 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4717 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4718 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4719 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4720 (match_operand:SI 2 "immediate_operand" "i")]
4724 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4725 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4727 [(set_attr "type" "neon_store4_one_lane<q>")]
4730 (define_expand "vec_store_lanesxi<mode>"
4731 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4732 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4733 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4737 if (BYTES_BIG_ENDIAN)
4739 rtx tmp = gen_reg_rtx (XImode);
4740 rtx mask = aarch64_reverse_mask (<MODE>mode);
4741 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4742 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4745 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4749 (define_insn_and_split "aarch64_rev_reglist<mode>"
4750 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4752 [(match_operand:VSTRUCT 1 "register_operand" "w")
4753 (match_operand:V16QI 2 "register_operand" "w")]
4754 UNSPEC_REV_REGLIST))]
4757 "&& reload_completed"
4761 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4762 for (i = 0; i < nregs; i++)
4764 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4765 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4766 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4770 [(set_attr "type" "neon_tbl1_q")
4771 (set_attr "length" "<insn_count>")]
4774 ;; Reload patterns for AdvSIMD register list operands.
4776 (define_expand "mov<mode>"
4777 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4778 (match_operand:VSTRUCT 1 "general_operand" ""))]
4781 if (can_create_pseudo_p ())
4783 if (GET_CODE (operands[0]) != REG)
4784 operands[1] = force_reg (<MODE>mode, operands[1]);
4788 (define_insn "*aarch64_mov<mode>"
4789 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4790 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4791 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4792 && (register_operand (operands[0], <MODE>mode)
4793 || register_operand (operands[1], <MODE>mode))"
4796 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4797 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4798 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4799 neon_load<nregs>_<nregs>reg_q")
4800 (set_attr "length" "<insn_count>,4,4")]
4803 (define_insn "aarch64_be_ld1<mode>"
4804 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4805 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4806 "aarch64_simd_struct_operand" "Utv")]
4809 "ld1\\t{%0<Vmtype>}, %1"
4810 [(set_attr "type" "neon_load1_1reg<q>")]
4813 (define_insn "aarch64_be_st1<mode>"
4814 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4815 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4818 "st1\\t{%1<Vmtype>}, %0"
4819 [(set_attr "type" "neon_store1_1reg<q>")]
4822 (define_insn "*aarch64_be_movoi"
4823 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4824 (match_operand:OI 1 "general_operand" " w,w,m"))]
4825 "TARGET_SIMD && BYTES_BIG_ENDIAN
4826 && (register_operand (operands[0], OImode)
4827 || register_operand (operands[1], OImode))"
4832 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4833 (set_attr "length" "8,4,4")]
4836 (define_insn "*aarch64_be_movci"
4837 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4838 (match_operand:CI 1 "general_operand" " w,w,o"))]
4839 "TARGET_SIMD && BYTES_BIG_ENDIAN
4840 && (register_operand (operands[0], CImode)
4841 || register_operand (operands[1], CImode))"
4843 [(set_attr "type" "multiple")
4844 (set_attr "length" "12,4,4")]
4847 (define_insn "*aarch64_be_movxi"
4848 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4849 (match_operand:XI 1 "general_operand" " w,w,o"))]
4850 "TARGET_SIMD && BYTES_BIG_ENDIAN
4851 && (register_operand (operands[0], XImode)
4852 || register_operand (operands[1], XImode))"
4854 [(set_attr "type" "multiple")
4855 (set_attr "length" "16,4,4")]
4859 [(set (match_operand:OI 0 "register_operand")
4860 (match_operand:OI 1 "register_operand"))]
4861 "TARGET_SIMD && reload_completed"
4864 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4869 [(set (match_operand:CI 0 "nonimmediate_operand")
4870 (match_operand:CI 1 "general_operand"))]
4871 "TARGET_SIMD && reload_completed"
4874 if (register_operand (operands[0], CImode)
4875 && register_operand (operands[1], CImode))
4877 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4880 else if (BYTES_BIG_ENDIAN)
4882 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4883 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4884 emit_move_insn (gen_lowpart (V16QImode,
4885 simplify_gen_subreg (TImode, operands[0],
4887 gen_lowpart (V16QImode,
4888 simplify_gen_subreg (TImode, operands[1],
4897 [(set (match_operand:XI 0 "nonimmediate_operand")
4898 (match_operand:XI 1 "general_operand"))]
4899 "TARGET_SIMD && reload_completed"
4902 if (register_operand (operands[0], XImode)
4903 && register_operand (operands[1], XImode))
4905 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4908 else if (BYTES_BIG_ENDIAN)
4910 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4911 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4912 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4913 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4920 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4921 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4922 (match_operand:DI 1 "register_operand" "w")
4923 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4927 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4930 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4935 (define_insn "aarch64_ld2<mode>_dreg_le"
4936 [(set (match_operand:OI 0 "register_operand" "=w")
4941 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4943 (vec_duplicate:VD (const_int 0)))
4945 (unspec:VD [(match_dup 1)]
4947 (vec_duplicate:VD (const_int 0)))) 0))]
4948 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4949 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4950 [(set_attr "type" "neon_load2_2reg<q>")]
4953 (define_insn "aarch64_ld2<mode>_dreg_be"
4954 [(set (match_operand:OI 0 "register_operand" "=w")
4958 (vec_duplicate:VD (const_int 0))
4960 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4963 (vec_duplicate:VD (const_int 0))
4964 (unspec:VD [(match_dup 1)]
4966 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4967 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4968 [(set_attr "type" "neon_load2_2reg<q>")]
4971 (define_insn "aarch64_ld2<mode>_dreg_le"
4972 [(set (match_operand:OI 0 "register_operand" "=w")
4977 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4981 (unspec:DX [(match_dup 1)]
4983 (const_int 0))) 0))]
4984 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4985 "ld1\\t{%S0.1d - %T0.1d}, %1"
4986 [(set_attr "type" "neon_load1_2reg<q>")]
4989 (define_insn "aarch64_ld2<mode>_dreg_be"
4990 [(set (match_operand:OI 0 "register_operand" "=w")
4996 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5000 (unspec:DX [(match_dup 1)]
5002 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5003 "ld1\\t{%S0.1d - %T0.1d}, %1"
5004 [(set_attr "type" "neon_load1_2reg<q>")]
5007 (define_insn "aarch64_ld3<mode>_dreg_le"
5008 [(set (match_operand:CI 0 "register_operand" "=w")
5014 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5016 (vec_duplicate:VD (const_int 0)))
5018 (unspec:VD [(match_dup 1)]
5020 (vec_duplicate:VD (const_int 0))))
5022 (unspec:VD [(match_dup 1)]
5024 (vec_duplicate:VD (const_int 0)))) 0))]
5025 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5026 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5027 [(set_attr "type" "neon_load3_3reg<q>")]
5030 (define_insn "aarch64_ld3<mode>_dreg_be"
5031 [(set (match_operand:CI 0 "register_operand" "=w")
5036 (vec_duplicate:VD (const_int 0))
5038 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5041 (vec_duplicate:VD (const_int 0))
5042 (unspec:VD [(match_dup 1)]
5045 (vec_duplicate:VD (const_int 0))
5046 (unspec:VD [(match_dup 1)]
5048 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5049 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5050 [(set_attr "type" "neon_load3_3reg<q>")]
5053 (define_insn "aarch64_ld3<mode>_dreg_le"
5054 [(set (match_operand:CI 0 "register_operand" "=w")
5060 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5064 (unspec:DX [(match_dup 1)]
5068 (unspec:DX [(match_dup 1)]
5070 (const_int 0))) 0))]
5071 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5072 "ld1\\t{%S0.1d - %U0.1d}, %1"
5073 [(set_attr "type" "neon_load1_3reg<q>")]
5076 (define_insn "aarch64_ld3<mode>_dreg_be"
5077 [(set (match_operand:CI 0 "register_operand" "=w")
5084 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5088 (unspec:DX [(match_dup 1)]
5092 (unspec:DX [(match_dup 1)]
5094 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5095 "ld1\\t{%S0.1d - %U0.1d}, %1"
5096 [(set_attr "type" "neon_load1_3reg<q>")]
5099 (define_insn "aarch64_ld4<mode>_dreg_le"
5100 [(set (match_operand:XI 0 "register_operand" "=w")
5106 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5108 (vec_duplicate:VD (const_int 0)))
5110 (unspec:VD [(match_dup 1)]
5112 (vec_duplicate:VD (const_int 0))))
5115 (unspec:VD [(match_dup 1)]
5117 (vec_duplicate:VD (const_int 0)))
5119 (unspec:VD [(match_dup 1)]
5121 (vec_duplicate:VD (const_int 0))))) 0))]
5122 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5123 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5124 [(set_attr "type" "neon_load4_4reg<q>")]
5127 (define_insn "aarch64_ld4<mode>_dreg_be"
5128 [(set (match_operand:XI 0 "register_operand" "=w")
5133 (vec_duplicate:VD (const_int 0))
5135 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5138 (vec_duplicate:VD (const_int 0))
5139 (unspec:VD [(match_dup 1)]
5143 (vec_duplicate:VD (const_int 0))
5144 (unspec:VD [(match_dup 1)]
5147 (vec_duplicate:VD (const_int 0))
5148 (unspec:VD [(match_dup 1)]
5150 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5151 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5152 [(set_attr "type" "neon_load4_4reg<q>")]
5155 (define_insn "aarch64_ld4<mode>_dreg_le"
5156 [(set (match_operand:XI 0 "register_operand" "=w")
5162 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5166 (unspec:DX [(match_dup 1)]
5171 (unspec:DX [(match_dup 1)]
5175 (unspec:DX [(match_dup 1)]
5177 (const_int 0)))) 0))]
5178 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5179 "ld1\\t{%S0.1d - %V0.1d}, %1"
5180 [(set_attr "type" "neon_load1_4reg<q>")]
5183 (define_insn "aarch64_ld4<mode>_dreg_be"
5184 [(set (match_operand:XI 0 "register_operand" "=w")
5191 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5195 (unspec:DX [(match_dup 1)]
5200 (unspec:DX [(match_dup 1)]
5204 (unspec:DX [(match_dup 1)]
5206 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5207 "ld1\\t{%S0.1d - %V0.1d}, %1"
5208 [(set_attr "type" "neon_load1_4reg<q>")]
5211 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5212 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5213 (match_operand:DI 1 "register_operand" "r")
5214 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5217 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5218 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5220 if (BYTES_BIG_ENDIAN)
5221 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_be (operands[0],
5224 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_le (operands[0],
5229 (define_expand "aarch64_ld1<VALL_F16:mode>"
5230 [(match_operand:VALL_F16 0 "register_operand")
5231 (match_operand:DI 1 "register_operand")]
5234 machine_mode mode = <VALL_F16:MODE>mode;
5235 rtx mem = gen_rtx_MEM (mode, operands[1]);
5237 if (BYTES_BIG_ENDIAN)
5238 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5240 emit_move_insn (operands[0], mem);
5244 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5245 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5246 (match_operand:DI 1 "register_operand" "r")
5247 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5250 machine_mode mode = <VSTRUCT:MODE>mode;
5251 rtx mem = gen_rtx_MEM (mode, operands[1]);
5253 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5257 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5258 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5259 (match_operand:DI 1 "register_operand" "w")
5260 (match_operand:VSTRUCT 2 "register_operand" "0")
5261 (match_operand:SI 3 "immediate_operand" "i")
5262 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5265 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5266 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5269 aarch64_simd_lane_bounds (operands[3], 0,
5270 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5272 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5273 operands[0], mem, operands[2], operands[3]));
5277 ;; Expanders for builtins to extract vector registers from large
5278 ;; opaque integer modes.
5282 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5283 [(match_operand:VDC 0 "register_operand" "=w")
5284 (match_operand:VSTRUCT 1 "register_operand" "w")
5285 (match_operand:SI 2 "immediate_operand" "i")]
5288 int part = INTVAL (operands[2]);
5289 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5290 int offset = part * 16;
5292 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5293 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5299 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5300 [(match_operand:VQ 0 "register_operand" "=w")
5301 (match_operand:VSTRUCT 1 "register_operand" "w")
5302 (match_operand:SI 2 "immediate_operand" "i")]
5305 int part = INTVAL (operands[2]);
5306 int offset = part * 16;
5308 emit_move_insn (operands[0],
5309 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5313 ;; Permuted-store expanders for neon intrinsics.
5315 ;; Permute instructions
5319 (define_expand "vec_perm_const<mode>"
5320 [(match_operand:VALL_F16 0 "register_operand")
5321 (match_operand:VALL_F16 1 "register_operand")
5322 (match_operand:VALL_F16 2 "register_operand")
5323 (match_operand:<V_cmp_result> 3)]
5326 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5327 operands[2], operands[3]))
5333 (define_expand "vec_perm<mode>"
5334 [(match_operand:VB 0 "register_operand")
5335 (match_operand:VB 1 "register_operand")
5336 (match_operand:VB 2 "register_operand")
5337 (match_operand:VB 3 "register_operand")]
5340 aarch64_expand_vec_perm (operands[0], operands[1],
5341 operands[2], operands[3]);
5345 (define_insn "aarch64_tbl1<mode>"
5346 [(set (match_operand:VB 0 "register_operand" "=w")
5347 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5348 (match_operand:VB 2 "register_operand" "w")]
5351 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5352 [(set_attr "type" "neon_tbl1<q>")]
5355 ;; Two source registers.
5357 (define_insn "aarch64_tbl2v16qi"
5358 [(set (match_operand:V16QI 0 "register_operand" "=w")
5359 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5360 (match_operand:V16QI 2 "register_operand" "w")]
5363 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5364 [(set_attr "type" "neon_tbl2_q")]
5367 (define_insn "aarch64_tbl3<mode>"
5368 [(set (match_operand:VB 0 "register_operand" "=w")
5369 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5370 (match_operand:VB 2 "register_operand" "w")]
5373 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5374 [(set_attr "type" "neon_tbl3")]
5377 (define_insn "aarch64_tbx4<mode>"
5378 [(set (match_operand:VB 0 "register_operand" "=w")
5379 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5380 (match_operand:OI 2 "register_operand" "w")
5381 (match_operand:VB 3 "register_operand" "w")]
5384 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5385 [(set_attr "type" "neon_tbl4")]
5388 ;; Three source registers.
5390 (define_insn "aarch64_qtbl3<mode>"
5391 [(set (match_operand:VB 0 "register_operand" "=w")
5392 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5393 (match_operand:VB 2 "register_operand" "w")]
5396 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5397 [(set_attr "type" "neon_tbl3")]
5400 (define_insn "aarch64_qtbx3<mode>"
5401 [(set (match_operand:VB 0 "register_operand" "=w")
5402 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5403 (match_operand:CI 2 "register_operand" "w")
5404 (match_operand:VB 3 "register_operand" "w")]
5407 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5408 [(set_attr "type" "neon_tbl3")]
5411 ;; Four source registers.
5413 (define_insn "aarch64_qtbl4<mode>"
5414 [(set (match_operand:VB 0 "register_operand" "=w")
5415 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5416 (match_operand:VB 2 "register_operand" "w")]
5419 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5420 [(set_attr "type" "neon_tbl4")]
5423 (define_insn "aarch64_qtbx4<mode>"
5424 [(set (match_operand:VB 0 "register_operand" "=w")
5425 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5426 (match_operand:XI 2 "register_operand" "w")
5427 (match_operand:VB 3 "register_operand" "w")]
5430 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5431 [(set_attr "type" "neon_tbl4")]
5434 (define_insn_and_split "aarch64_combinev16qi"
5435 [(set (match_operand:OI 0 "register_operand" "=w")
5436 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5437 (match_operand:V16QI 2 "register_operand" "w")]
5441 "&& reload_completed"
5444 aarch64_split_combinev16qi (operands);
5447 [(set_attr "type" "multiple")]
5450 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5451 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5452 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5453 (match_operand:VALL_F16 2 "register_operand" "w")]
5456 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5457 [(set_attr "type" "neon_permute<q>")]
5460 ;; Note immediate (third) operand is lane index not byte index.
5461 (define_insn "aarch64_ext<mode>"
5462 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5463 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5464 (match_operand:VALL_F16 2 "register_operand" "w")
5465 (match_operand:SI 3 "immediate_operand" "i")]
5469 operands[3] = GEN_INT (INTVAL (operands[3])
5470 * GET_MODE_UNIT_SIZE (<MODE>mode));
5471 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5473 [(set_attr "type" "neon_ext<q>")]
5476 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5477 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5478 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5481 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5482 [(set_attr "type" "neon_rev<q>")]
5485 (define_insn "aarch64_st2<mode>_dreg"
5486 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5487 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5488 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5491 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5492 [(set_attr "type" "neon_store2_2reg")]
5495 (define_insn "aarch64_st2<mode>_dreg"
5496 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5497 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5498 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5501 "st1\\t{%S1.1d - %T1.1d}, %0"
5502 [(set_attr "type" "neon_store1_2reg")]
5505 (define_insn "aarch64_st3<mode>_dreg"
5506 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5507 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5508 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5511 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5512 [(set_attr "type" "neon_store3_3reg")]
5515 (define_insn "aarch64_st3<mode>_dreg"
5516 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5517 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5518 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5521 "st1\\t{%S1.1d - %U1.1d}, %0"
5522 [(set_attr "type" "neon_store1_3reg")]
5525 (define_insn "aarch64_st4<mode>_dreg"
5526 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5527 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5528 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5531 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5532 [(set_attr "type" "neon_store4_4reg")]
5535 (define_insn "aarch64_st4<mode>_dreg"
5536 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5537 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5538 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5541 "st1\\t{%S1.1d - %V1.1d}, %0"
5542 [(set_attr "type" "neon_store1_4reg")]
5545 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5546 [(match_operand:DI 0 "register_operand" "r")
5547 (match_operand:VSTRUCT 1 "register_operand" "w")
5548 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5551 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5552 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5554 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5558 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5559 [(match_operand:DI 0 "register_operand" "r")
5560 (match_operand:VSTRUCT 1 "register_operand" "w")
5561 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5564 machine_mode mode = <VSTRUCT:MODE>mode;
5565 rtx mem = gen_rtx_MEM (mode, operands[0]);
5567 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5571 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5572 [(match_operand:DI 0 "register_operand" "r")
5573 (match_operand:VSTRUCT 1 "register_operand" "w")
5574 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5575 (match_operand:SI 2 "immediate_operand")]
5578 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5579 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5582 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5583 mem, operands[1], operands[2]));
5587 (define_expand "aarch64_st1<VALL_F16:mode>"
5588 [(match_operand:DI 0 "register_operand")
5589 (match_operand:VALL_F16 1 "register_operand")]
5592 machine_mode mode = <VALL_F16:MODE>mode;
5593 rtx mem = gen_rtx_MEM (mode, operands[0]);
5595 if (BYTES_BIG_ENDIAN)
5596 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5598 emit_move_insn (mem, operands[1]);
5602 ;; Expander for builtins to insert vector registers into large
5603 ;; opaque integer modes.
5605 ;; Q-register list. We don't need a D-reg inserter as we zero
5606 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5608 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5609 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5610 (match_operand:VSTRUCT 1 "register_operand" "0")
5611 (match_operand:VQ 2 "register_operand" "w")
5612 (match_operand:SI 3 "immediate_operand" "i")]
5615 int part = INTVAL (operands[3]);
5616 int offset = part * 16;
5618 emit_move_insn (operands[0], operands[1]);
5619 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5624 ;; Standard pattern name vec_init<mode>.
5626 (define_expand "vec_init<mode>"
5627 [(match_operand:VALL_F16 0 "register_operand" "")
5628 (match_operand 1 "" "")]
5631 aarch64_expand_vector_init (operands[0], operands[1]);
5635 (define_insn "*aarch64_simd_ld1r<mode>"
5636 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5637 (vec_duplicate:VALL_F16
5638 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5640 "ld1r\\t{%0.<Vtype>}, %1"
5641 [(set_attr "type" "neon_load1_all_lanes")]
5644 (define_insn "aarch64_frecpe<mode>"
5645 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5646 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5649 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5650 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5653 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5654 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5655 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5658 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5659 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5662 (define_insn "aarch64_frecps<mode>"
5663 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5665 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5666 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5669 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5670 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5673 (define_insn "aarch64_urecpe<mode>"
5674 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5675 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5678 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5679 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5681 ;; Standard pattern name vec_extract<mode>.
5683 (define_expand "vec_extract<mode>"
5684 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5685 (match_operand:VALL_F16 1 "register_operand" "")
5686 (match_operand:SI 2 "immediate_operand" "")]
5690 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5696 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5697 [(set (match_operand:V16QI 0 "register_operand" "=w")
5698 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5699 (match_operand:V16QI 2 "register_operand" "w")]
5701 "TARGET_SIMD && TARGET_CRYPTO"
5702 "aes<aes_op>\\t%0.16b, %2.16b"
5703 [(set_attr "type" "crypto_aese")]
5706 ;; When AES/AESMC fusion is enabled we want the register allocation to
5710 ;; So prefer to tie operand 1 to operand 0 when fusing.
5712 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5713 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5714 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5716 "TARGET_SIMD && TARGET_CRYPTO"
5717 "aes<aesmc_op>\\t%0.16b, %1.16b"
5718 [(set_attr "type" "crypto_aesmc")
5719 (set_attr_alternative "enabled"
5720 [(if_then_else (match_test
5721 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5722 (const_string "yes" )
5723 (const_string "no"))
5724 (const_string "yes")])]
5729 (define_insn "aarch64_crypto_sha1hsi"
5730 [(set (match_operand:SI 0 "register_operand" "=w")
5731 (unspec:SI [(match_operand:SI 1
5732 "register_operand" "w")]
5734 "TARGET_SIMD && TARGET_CRYPTO"
5736 [(set_attr "type" "crypto_sha1_fast")]
5739 (define_insn "aarch64_crypto_sha1hv4si"
5740 [(set (match_operand:SI 0 "register_operand" "=w")
5741 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5742 (parallel [(const_int 0)]))]
5744 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5746 [(set_attr "type" "crypto_sha1_fast")]
5749 (define_insn "aarch64_be_crypto_sha1hv4si"
5750 [(set (match_operand:SI 0 "register_operand" "=w")
5751 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5752 (parallel [(const_int 3)]))]
5754 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5756 [(set_attr "type" "crypto_sha1_fast")]
5759 (define_insn "aarch64_crypto_sha1su1v4si"
5760 [(set (match_operand:V4SI 0 "register_operand" "=w")
5761 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5762 (match_operand:V4SI 2 "register_operand" "w")]
5764 "TARGET_SIMD && TARGET_CRYPTO"
5765 "sha1su1\\t%0.4s, %2.4s"
5766 [(set_attr "type" "crypto_sha1_fast")]
5769 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5770 [(set (match_operand:V4SI 0 "register_operand" "=w")
5771 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5772 (match_operand:SI 2 "register_operand" "w")
5773 (match_operand:V4SI 3 "register_operand" "w")]
5775 "TARGET_SIMD && TARGET_CRYPTO"
5776 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5777 [(set_attr "type" "crypto_sha1_slow")]
5780 (define_insn "aarch64_crypto_sha1su0v4si"
5781 [(set (match_operand:V4SI 0 "register_operand" "=w")
5782 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5783 (match_operand:V4SI 2 "register_operand" "w")
5784 (match_operand:V4SI 3 "register_operand" "w")]
5786 "TARGET_SIMD && TARGET_CRYPTO"
5787 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5788 [(set_attr "type" "crypto_sha1_xor")]
5793 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5794 [(set (match_operand:V4SI 0 "register_operand" "=w")
5795 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5796 (match_operand:V4SI 2 "register_operand" "w")
5797 (match_operand:V4SI 3 "register_operand" "w")]
5799 "TARGET_SIMD && TARGET_CRYPTO"
5800 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5801 [(set_attr "type" "crypto_sha256_slow")]
5804 (define_insn "aarch64_crypto_sha256su0v4si"
5805 [(set (match_operand:V4SI 0 "register_operand" "=w")
5806 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5807 (match_operand:V4SI 2 "register_operand" "w")]
5809 "TARGET_SIMD &&TARGET_CRYPTO"
5810 "sha256su0\\t%0.4s, %2.4s"
5811 [(set_attr "type" "crypto_sha256_fast")]
5814 (define_insn "aarch64_crypto_sha256su1v4si"
5815 [(set (match_operand:V4SI 0 "register_operand" "=w")
5816 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5817 (match_operand:V4SI 2 "register_operand" "w")
5818 (match_operand:V4SI 3 "register_operand" "w")]
5820 "TARGET_SIMD &&TARGET_CRYPTO"
5821 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5822 [(set_attr "type" "crypto_sha256_slow")]
5827 (define_insn "aarch64_crypto_pmulldi"
5828 [(set (match_operand:TI 0 "register_operand" "=w")
5829 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5830 (match_operand:DI 2 "register_operand" "w")]
5832 "TARGET_SIMD && TARGET_CRYPTO"
5833 "pmull\\t%0.1q, %1.1d, %2.1d"
5834 [(set_attr "type" "crypto_pmull")]
5837 (define_insn "aarch64_crypto_pmullv2di"
5838 [(set (match_operand:TI 0 "register_operand" "=w")
5839 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5840 (match_operand:V2DI 2 "register_operand" "w")]
5842 "TARGET_SIMD && TARGET_CRYPTO"
5843 "pmull2\\t%0.1q, %1.2d, %2.2d"
5844 [(set_attr "type" "crypto_pmull")]