1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Note on the handling of big-endian SVE
22 ;; --------------------------------------
24 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
25 ;; same way as movdi or movti would: the first byte of memory goes
26 ;; into the most significant byte of the register and the last byte
27 ;; of memory goes into the least significant byte of the register.
28 ;; This is the most natural ordering for Advanced SIMD and matches
29 ;; the ABI layout for 64-bit and 128-bit vector types.
31 ;; As a result, the order of bytes within the register is what GCC
32 ;; expects for a big-endian target, and subreg offsets therefore work
33 ;; as expected, with the first element in memory having subreg offset 0
34 ;; and the last element in memory having the subreg offset associated
35 ;; with a big-endian lowpart. However, this ordering also means that
36 ;; GCC's lane numbering does not match the architecture's numbering:
37 ;; GCC always treats the element at the lowest address in memory
38 ;; (subreg offset 0) as element 0, while the architecture treats
39 ;; the least significant end of the register as element 0.
41 ;; The situation for SVE is different. We want the layout of the
42 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
43 ;; logically, a mov<mode> load must be indistinguishable from a
44 ;; maskload<mode> whose mask is all true. We therefore need the
45 ;; register layout to match LD1 rather than LDR. The ABI layout of
46 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
48 ;; As a result, the architecture lane numbering matches GCC's lane
49 ;; numbering, with element 0 always being the first in memory.
52 ;; - Applying a subreg offset to a register does not give the element
53 ;; that GCC expects: the first element in memory has the subreg offset
54 ;; associated with a big-endian lowpart while the last element in memory
55 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
57 ;; - We cannot use LDR and STR for spill slots that might be accessed
58 ;; via subregs, since although the elements have the order GCC expects,
59 ;; the order of the bytes within the elements is different. We instead
60 ;; access spill slots via LD1 and ST1, using secondary reloads to
61 ;; reserve a predicate register.
65 (define_expand "mov<mode>"
66 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
67 (match_operand:SVE_ALL 1 "general_operand"))]
70 /* Use the predicated load and store patterns where possible.
71 This is required for big-endian targets (see the comment at the
72 head of the file) and increases the addressing choices for
74 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
75 && can_create_pseudo_p ())
77 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
81 if (CONSTANT_P (operands[1]))
83 aarch64_expand_mov_immediate (operands[0], operands[1],
84 gen_vec_duplicate<mode>);
88 /* Optimize subregs on big-endian targets: we can use REV[BHW]
89 instead of going through memory. */
91 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
96 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
97 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
98 ;; for details. We use a special predicate for operand 2 to reduce
99 ;; the number of patterns.
100 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
101 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
103 [(match_operand:VNx16BI 1 "register_operand" "Upl")
104 (match_operand 2 "aarch64_any_register_operand" "w")]
106 "TARGET_SVE && BYTES_BIG_ENDIAN"
108 "&& reload_completed"
111 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
116 ;; Unpredicated moves (little-endian). Only allow memory operations
117 ;; during and after RA; before RA we want the predicated load and
118 ;; store patterns to be used instead.
119 (define_insn "*aarch64_sve_mov<mode>_le"
120 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
121 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
124 && ((lra_in_progress || reload_completed)
125 || (register_operand (operands[0], <MODE>mode)
126 && nonmemory_operand (operands[1], <MODE>mode)))"
131 * return aarch64_output_sve_mov_immediate (operands[1]);"
134 ;; Unpredicated moves (big-endian). Memory accesses require secondary
136 (define_insn "*aarch64_sve_mov<mode>_be"
137 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
138 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
139 "TARGET_SVE && BYTES_BIG_ENDIAN"
142 * return aarch64_output_sve_mov_immediate (operands[1]);"
145 ;; Handle big-endian memory reloads. We use byte PTRUE for all modes
146 ;; to try to encourage reuse.
147 (define_expand "aarch64_sve_reload_be"
149 [(set (match_operand 0)
151 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
152 "TARGET_SVE && BYTES_BIG_ENDIAN"
154 /* Create a PTRUE. */
155 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
157 /* Refer to the PTRUE in the appropriate mode for this move. */
158 machine_mode mode = GET_MODE (operands[0]);
159 machine_mode pred_mode
160 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
161 rtx pred = gen_lowpart (pred_mode, operands[2]);
163 /* Emit a predicated load or store. */
164 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
169 ;; A predicated load or store for which the predicate is known to be
170 ;; all-true. Note that this pattern is generated directly by
171 ;; aarch64_emit_sve_pred_move, so changes to this pattern will
172 ;; need changes there as well.
173 (define_insn "*pred_mov<mode>"
174 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m")
176 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
177 (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")]
178 UNSPEC_MERGE_PTRUE))]
180 && (register_operand (operands[0], <MODE>mode)
181 || register_operand (operands[2], <MODE>mode))"
183 ld1<Vesize>\t%0.<Vetype>, %1/z, %2
184 st1<Vesize>\t%2.<Vetype>, %1, %0"
187 (define_expand "movmisalign<mode>"
188 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
189 (match_operand:SVE_ALL 1 "general_operand"))]
192 /* Equivalent to a normal move for our purpooses. */
193 emit_move_insn (operands[0], operands[1]);
198 (define_insn "maskload<mode><vpred>"
199 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
201 [(match_operand:<VPRED> 2 "register_operand" "Upl")
202 (match_operand:SVE_ALL 1 "memory_operand" "m")]
205 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
208 (define_insn "maskstore<mode><vpred>"
209 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
210 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
211 (match_operand:SVE_ALL 1 "register_operand" "w")
215 "st1<Vesize>\t%1.<Vetype>, %2, %0"
218 ;; Unpredicated gather loads.
219 (define_expand "gather_load<mode>"
220 [(set (match_operand:SVE_SD 0 "register_operand")
223 (match_operand:DI 1 "aarch64_reg_or_zero")
224 (match_operand:<V_INT_EQUIV> 2 "register_operand")
225 (match_operand:DI 3 "const_int_operand")
226 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
231 operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
235 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for
236 ;; unsigned extension and false for signed extension.
237 (define_insn "mask_gather_load<mode>"
238 [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
240 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
241 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
242 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
243 (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
244 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
249 ld1w\t%0.s, %5/z, [%2.s]
250 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
251 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
252 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
253 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
256 ;; Predicated gather loads for 64-bit elements. The value of operand 3
257 ;; doesn't matter in this case.
258 (define_insn "mask_gather_load<mode>"
259 [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
261 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
262 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
263 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
264 (match_operand:DI 3 "const_int_operand")
265 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
270 ld1d\t%0.d, %5/z, [%2.d]
271 ld1d\t%0.d, %5/z, [%1, %2.d]
272 ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
275 ;; Unpredicated scatter store.
276 (define_expand "scatter_store<mode>"
277 [(set (mem:BLK (scratch))
280 (match_operand:DI 0 "aarch64_reg_or_zero")
281 (match_operand:<V_INT_EQUIV> 1 "register_operand")
282 (match_operand:DI 2 "const_int_operand")
283 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
284 (match_operand:SVE_SD 4 "register_operand")]
285 UNSPEC_ST1_SCATTER))]
288 operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
292 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
293 ;; unsigned extension and false for signed extension.
294 (define_insn "mask_scatter_store<mode>"
295 [(set (mem:BLK (scratch))
297 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
298 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
299 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
300 (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
301 (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
302 (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
303 UNSPEC_ST1_SCATTER))]
306 st1w\t%4.s, %5, [%1.s]
307 st1w\t%4.s, %5, [%0, %1.s, sxtw]
308 st1w\t%4.s, %5, [%0, %1.s, uxtw]
309 st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
310 st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
313 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
314 ;; doesn't matter in this case.
315 (define_insn "mask_scatter_store<mode>"
316 [(set (mem:BLK (scratch))
318 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
319 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
320 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
321 (match_operand:DI 2 "const_int_operand")
322 (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
323 (match_operand:SVE_D 4 "register_operand" "w, w, w")]
324 UNSPEC_ST1_SCATTER))]
327 st1d\t%4.d, %5, [%1.d]
328 st1d\t%4.d, %5, [%0, %1.d]
329 st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
332 ;; SVE structure moves.
333 (define_expand "mov<mode>"
334 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
335 (match_operand:SVE_STRUCT 1 "general_operand"))]
338 /* Big-endian loads and stores need to be done via LD1 and ST1;
339 see the comment at the head of the file for details. */
340 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
343 gcc_assert (can_create_pseudo_p ());
344 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
348 if (CONSTANT_P (operands[1]))
350 aarch64_expand_mov_immediate (operands[0], operands[1]);
356 ;; Unpredicated structure moves (little-endian).
357 (define_insn "*aarch64_sve_mov<mode>_le"
358 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
359 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
360 "TARGET_SVE && !BYTES_BIG_ENDIAN"
362 [(set_attr "length" "<insn_length>")]
365 ;; Unpredicated structure moves (big-endian). Memory accesses require
366 ;; secondary reloads.
367 (define_insn "*aarch64_sve_mov<mode>_le"
368 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
369 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
370 "TARGET_SVE && BYTES_BIG_ENDIAN"
372 [(set_attr "length" "<insn_length>")]
375 ;; Split unpredicated structure moves into pieces. This is the same
376 ;; for both big-endian and little-endian code, although it only needs
377 ;; to handle memory operands for little-endian code.
379 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
380 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
381 "TARGET_SVE && reload_completed"
384 rtx dest = operands[0];
385 rtx src = operands[1];
386 if (REG_P (dest) && REG_P (src))
387 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
389 for (unsigned int i = 0; i < <vector_count>; ++i)
391 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
392 i * BYTES_PER_SVE_VECTOR);
393 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
394 i * BYTES_PER_SVE_VECTOR);
395 emit_insn (gen_rtx_SET (subdest, subsrc));
401 ;; Predicated structure moves. This works for both endiannesses but in
402 ;; practice is only useful for big-endian.
403 (define_insn_and_split "pred_mov<mode>"
404 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx")
406 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
407 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")]
408 UNSPEC_MERGE_PTRUE))]
410 && (register_operand (operands[0], <MODE>mode)
411 || register_operand (operands[2], <MODE>mode))"
413 "&& reload_completed"
416 for (unsigned int i = 0; i < <vector_count>; ++i)
418 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
420 i * BYTES_PER_SVE_VECTOR);
421 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
423 i * BYTES_PER_SVE_VECTOR);
424 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
428 [(set_attr "length" "<insn_length>")]
431 (define_expand "mov<mode>"
432 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
433 (match_operand:PRED_ALL 1 "general_operand"))]
436 if (GET_CODE (operands[0]) == MEM)
437 operands[1] = force_reg (<MODE>mode, operands[1]);
441 (define_insn "*aarch64_sve_mov<mode>"
442 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
443 (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
445 && (register_operand (operands[0], <MODE>mode)
446 || register_operand (operands[1], <MODE>mode))"
452 * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
455 ;; Handle extractions from a predicate by converting to an integer vector
456 ;; and extracting from there.
457 (define_expand "vec_extract<vpred><Vel>"
458 [(match_operand:<VEL> 0 "register_operand")
459 (match_operand:<VPRED> 1 "register_operand")
460 (match_operand:SI 2 "nonmemory_operand")
461 ;; Dummy operand to which we can attach the iterator.
462 (reg:SVE_I V0_REGNUM)]
465 rtx tmp = gen_reg_rtx (<MODE>mode);
466 emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
467 CONST1_RTX (<MODE>mode),
468 CONST0_RTX (<MODE>mode)));
469 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
474 (define_expand "vec_extract<mode><Vel>"
475 [(set (match_operand:<VEL> 0 "register_operand")
477 (match_operand:SVE_ALL 1 "register_operand")
478 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
482 if (poly_int_rtx_p (operands[2], &val)
483 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
485 /* The last element can be extracted with a LASTB and a false
487 rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode));
488 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
491 if (!CONST_INT_P (operands[2]))
493 /* Create an index with operand[2] as the base and -1 as the step.
494 It will then be zero for the element we care about. */
495 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
496 index = force_reg (<VEL_INT>mode, index);
497 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
498 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
500 /* Get a predicate that is true for only that element. */
501 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
502 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
503 rtx sel = gen_reg_rtx (<VPRED>mode);
504 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
506 /* Select the element using LASTB. */
507 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
513 ;; Extract element zero. This is a special case because we want to force
514 ;; the registers to be the same for the second alternative, and then
515 ;; split the instruction into nothing after RA.
516 (define_insn_and_split "*vec_extract<mode><Vel>_0"
517 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
519 (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
520 (parallel [(const_int 0)])))]
523 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
524 switch (which_alternative)
527 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
531 return "st1\\t{%1.<Vetype>}[0], %0";
537 && REG_P (operands[0])
538 && REGNO (operands[0]) == REGNO (operands[1])"
541 emit_note (NOTE_INSN_DELETED);
544 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
547 ;; Extract an element from the Advanced SIMD portion of the register.
548 ;; We don't just reuse the aarch64-simd.md pattern because we don't
549 ;; want any change in lane number on big-endian targets.
550 (define_insn "*vec_extract<mode><Vel>_v128"
551 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
553 (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
554 (parallel [(match_operand:SI 2 "const_int_operand")])))]
556 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
558 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
559 switch (which_alternative)
562 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
564 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
566 return "st1\\t{%1.<Vetype>}[%2], %0";
571 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
574 ;; Extract an element in the range of DUP. This pattern allows the
575 ;; source and destination to be different.
576 (define_insn "*vec_extract<mode><Vel>_dup"
577 [(set (match_operand:<VEL> 0 "register_operand" "=w")
579 (match_operand:SVE_ALL 1 "register_operand" "w")
580 (parallel [(match_operand:SI 2 "const_int_operand")])))]
582 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
584 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
585 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
589 ;; Extract an element outside the range of DUP. This pattern requires the
590 ;; source and destination to be the same.
591 (define_insn "*vec_extract<mode><Vel>_ext"
592 [(set (match_operand:<VEL> 0 "register_operand" "=w")
594 (match_operand:SVE_ALL 1 "register_operand" "0")
595 (parallel [(match_operand:SI 2 "const_int_operand")])))]
596 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
598 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
599 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
600 return "ext\t%0.b, %0.b, %0.b, #%2";
604 ;; Extract the last active element of operand 1 into operand 0.
605 ;; If no elements are active, extract the last inactive element instead.
606 (define_insn "extract_last_<mode>"
607 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
609 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
610 (match_operand:SVE_ALL 2 "register_operand" "w, w")]
614 lastb\t%<vwcore>0, %1, %2.<Vetype>
615 lastb\t%<Vetype>0, %1, %2.<Vetype>"
618 (define_expand "vec_duplicate<mode>"
620 [(set (match_operand:SVE_ALL 0 "register_operand")
621 (vec_duplicate:SVE_ALL
622 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
623 (clobber (scratch:<VPRED>))])]
626 if (MEM_P (operands[1]))
628 rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
629 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
630 CONST0_RTX (<MODE>mode)));
636 ;; Accept memory operands for the benefit of combine, and also in case
637 ;; the scalar input gets spilled to memory during RA. We want to split
638 ;; the load at the first opportunity in order to allow the PTRUE to be
639 ;; optimized with surrounding code.
640 (define_insn_and_split "*vec_duplicate<mode>_reg"
641 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
642 (vec_duplicate:SVE_ALL
643 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
644 (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
647 mov\t%0.<Vetype>, %<vwcore>1
648 mov\t%0.<Vetype>, %<Vetype>1
650 "&& MEM_P (operands[1])"
653 if (GET_CODE (operands[2]) == SCRATCH)
654 operands[2] = gen_reg_rtx (<VPRED>mode);
655 emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
656 emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
657 CONST0_RTX (<MODE>mode)));
660 [(set_attr "length" "4,4,8")]
663 ;; This is used for vec_duplicate<mode>s from memory, but can also
664 ;; be used by combine to optimize selects of a a vec_duplicate<mode>
666 (define_insn "sve_ld1r<mode>"
667 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
669 [(match_operand:<VPRED> 1 "register_operand" "Upl")
670 (vec_duplicate:SVE_ALL
671 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
672 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
675 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
678 ;; Load 128 bits from memory and duplicate to fill a vector. Since there
679 ;; are so few operations on 128-bit "elements", we don't define a VNx1TI
680 ;; and simply use vectors of bytes instead.
681 (define_insn "*sve_ld1rq<Vesize>"
682 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
684 [(match_operand:<VPRED> 1 "register_operand" "Upl")
685 (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
688 "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
691 ;; Implement a predicate broadcast by shifting the low bit of the scalar
692 ;; input into the top bit and using a WHILELO. An alternative would be to
693 ;; duplicate the input and do a compare with zero.
694 (define_expand "vec_duplicate<mode>"
695 [(set (match_operand:PRED_ALL 0 "register_operand")
696 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
699 rtx tmp = gen_reg_rtx (DImode);
700 rtx op1 = gen_lowpart (DImode, operands[1]);
701 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
702 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
707 (define_insn "vec_series<mode>"
708 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
710 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
711 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
714 index\t%0.<Vetype>, #%1, %<vw>2
715 index\t%0.<Vetype>, %<vw>1, #%2
716 index\t%0.<Vetype>, %<vw>1, %<vw>2"
719 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
720 ;; of an INDEX instruction.
721 (define_insn "*vec_series<mode>_plus"
722 [(set (match_operand:SVE_I 0 "register_operand" "=w")
725 (match_operand:<VEL> 1 "register_operand" "r"))
726 (match_operand:SVE_I 2 "immediate_operand")))]
727 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
729 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
730 return "index\t%0.<Vetype>, %<vw>1, #%2";
734 ;; Unpredicated LD[234].
735 (define_expand "vec_load_lanes<mode><vsingle>"
736 [(set (match_operand:SVE_STRUCT 0 "register_operand")
739 (match_operand:SVE_STRUCT 1 "memory_operand")]
743 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
747 ;; Predicated LD[234].
748 (define_insn "vec_mask_load_lanes<mode><vsingle>"
749 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
751 [(match_operand:<VPRED> 2 "register_operand" "Upl")
752 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
755 "ld<vector_count><Vesize>\t%0, %2/z, %1"
758 ;; Unpredicated ST[234]. This is always a full update, so the dependence
759 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
760 ;; There doesn't seem to be any obvious benefit to treating the all-true
761 ;; case differently though. In particular, it's very unlikely that we'll
762 ;; only find out during RTL that a store_lanes is dead.
763 (define_expand "vec_store_lanes<mode><vsingle>"
764 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
767 (match_operand:SVE_STRUCT 1 "register_operand")
772 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
776 ;; Predicated ST[234].
777 (define_insn "vec_mask_store_lanes<mode><vsingle>"
778 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
780 [(match_operand:<VPRED> 2 "register_operand" "Upl")
781 (match_operand:SVE_STRUCT 1 "register_operand" "w")
785 "st<vector_count><Vesize>\t%1, %2, %0"
788 (define_expand "vec_perm<mode>"
789 [(match_operand:SVE_ALL 0 "register_operand")
790 (match_operand:SVE_ALL 1 "register_operand")
791 (match_operand:SVE_ALL 2 "register_operand")
792 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
793 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
795 aarch64_expand_sve_vec_perm (operands[0], operands[1],
796 operands[2], operands[3]);
801 (define_insn "*aarch64_sve_tbl<mode>"
802 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
804 [(match_operand:SVE_ALL 1 "register_operand" "w")
805 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
808 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
811 (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
812 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
813 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
814 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
817 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
820 (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
821 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
822 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
823 (match_operand:SVE_ALL 2 "register_operand" "w")]
826 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
829 (define_insn "*aarch64_sve_rev64<mode>"
830 [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
832 [(match_operand:VNx2BI 1 "register_operand" "Upl")
833 (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
835 UNSPEC_MERGE_PTRUE))]
837 "rev<Vesize>\t%0.d, %1/m, %2.d"
840 (define_insn "*aarch64_sve_rev32<mode>"
841 [(set (match_operand:SVE_BH 0 "register_operand" "=w")
843 [(match_operand:VNx4BI 1 "register_operand" "Upl")
844 (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
846 UNSPEC_MERGE_PTRUE))]
848 "rev<Vesize>\t%0.s, %1/m, %2.s"
851 (define_insn "*aarch64_sve_rev16vnx16qi"
852 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
854 [(match_operand:VNx8BI 1 "register_operand" "Upl")
855 (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
857 UNSPEC_MERGE_PTRUE))]
859 "revb\t%0.h, %1/m, %2.h"
862 (define_insn "*aarch64_sve_rev<mode>"
863 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
864 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
867 "rev\t%0.<Vetype>, %1.<Vetype>")
869 (define_insn "*aarch64_sve_dup_lane<mode>"
870 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
871 (vec_duplicate:SVE_ALL
873 (match_operand:SVE_ALL 1 "register_operand" "w")
874 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
876 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
877 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
880 ;; Note that the immediate (third) operand is the lane index not
882 (define_insn "*aarch64_sve_ext<mode>"
883 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
884 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
885 (match_operand:SVE_ALL 2 "register_operand" "w")
886 (match_operand:SI 3 "const_int_operand")]
889 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
891 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
892 return "ext\\t%0.b, %0.b, %2.b, #%3";
896 (define_insn "add<mode>3"
897 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
899 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
900 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
903 add\t%0.<Vetype>, %0.<Vetype>, #%D2
904 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
905 * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
906 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
909 (define_insn "sub<mode>3"
910 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
912 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
913 (match_operand:SVE_I 2 "register_operand" "w, 0")))]
916 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
917 subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
920 ;; Unpredicated multiplication.
921 (define_expand "mul<mode>3"
922 [(set (match_operand:SVE_I 0 "register_operand")
926 (match_operand:SVE_I 1 "register_operand")
927 (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
928 UNSPEC_MERGE_PTRUE))]
931 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
935 ;; Multiplication predicated with a PTRUE. We don't actually need the
936 ;; predicate for the first alternative, but using Upa or X isn't likely
937 ;; to gain much and would make the instruction seem less uniform to the
938 ;; register allocator.
939 (define_insn "*mul<mode>3"
940 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
942 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
944 (match_operand:SVE_I 2 "register_operand" "%0, 0")
945 (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w"))]
946 UNSPEC_MERGE_PTRUE))]
949 mul\t%0.<Vetype>, %0.<Vetype>, #%3
950 mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
953 (define_insn "*madd<mode>"
954 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
957 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
958 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
959 (match_operand:SVE_I 3 "register_operand" "w, w"))]
961 (match_operand:SVE_I 4 "register_operand" "w, 0")))]
964 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
965 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
968 (define_insn "*msub<mode>3"
969 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
971 (match_operand:SVE_I 4 "register_operand" "w, 0")
973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
974 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
975 (match_operand:SVE_I 3 "register_operand" "w, w"))]
976 UNSPEC_MERGE_PTRUE)))]
979 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
980 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
983 ;; Unpredicated NEG, NOT and POPCOUNT.
984 (define_expand "<optab><mode>2"
985 [(set (match_operand:SVE_I 0 "register_operand")
988 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
989 UNSPEC_MERGE_PTRUE))]
992 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
996 ;; NEG, NOT and POPCOUNT predicated with a PTRUE.
997 (define_insn "*<optab><mode>2"
998 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1000 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1001 (SVE_INT_UNARY:SVE_I
1002 (match_operand:SVE_I 2 "register_operand" "w"))]
1003 UNSPEC_MERGE_PTRUE))]
1005 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1008 ;; Vector AND, ORR and XOR.
1009 (define_insn "<optab><mode>3"
1010 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1012 (match_operand:SVE_I 1 "register_operand" "%0, w")
1013 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
1016 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
1017 <logical>\t%0.d, %1.d, %2.d"
1020 ;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs
1021 ;; by providing this, but we need to use UNSPECs since rtx logical ops
1022 ;; aren't defined for floating-point modes.
1023 (define_insn "*<optab><mode>3"
1024 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1025 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
1026 (match_operand:SVE_F 2 "register_operand" "w")]
1029 "<logicalf_op>\t%0.d, %1.d, %2.d"
1032 ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
1033 ;; this pattern even though the NOT instruction itself is predicated.
1034 (define_insn "bic<mode>3"
1035 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1037 (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
1038 (match_operand:SVE_I 2 "register_operand" "w")))]
1040 "bic\t%0.d, %2.d, %1.d"
1043 ;; Predicate AND. We can reuse one of the inputs as the GP.
1044 (define_insn "and<mode>3"
1045 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1046 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
1047 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
1049 "and\t%0.b, %1/z, %1.b, %2.b"
1052 ;; Unpredicated predicate ORR and XOR.
1053 (define_expand "<optab><mode>3"
1054 [(set (match_operand:PRED_ALL 0 "register_operand")
1056 (LOGICAL_OR:PRED_ALL
1057 (match_operand:PRED_ALL 1 "register_operand")
1058 (match_operand:PRED_ALL 2 "register_operand"))
1062 operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1066 ;; Predicated predicate ORR and XOR.
1067 (define_insn "pred_<optab><mode>3"
1068 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1071 (match_operand:PRED_ALL 2 "register_operand" "Upa")
1072 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1073 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1075 "<logical>\t%0.b, %1/z, %2.b, %3.b"
1078 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
1079 ;; the GP (which is known to be a PTRUE). Store the result in operand 0
1080 ;; and set the flags in the same way as for PTEST. The (and ...) in the
1081 ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
1082 ;; value is structurally equivalent to rhs of the second set.
1083 (define_insn "*<optab><mode>3_cc"
1084 [(set (reg:CC CC_REGNUM)
1086 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa")
1089 (match_operand:PRED_ALL 2 "register_operand" "Upa")
1090 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1094 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1095 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
1098 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
1101 ;; Unpredicated predicate inverse.
1102 (define_expand "one_cmpl<mode>2"
1103 [(set (match_operand:PRED_ALL 0 "register_operand")
1105 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
1109 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1113 ;; Predicated predicate inverse.
1114 (define_insn "*one_cmpl<mode>3"
1115 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1117 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1118 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1120 "not\t%0.b, %1/z, %2.b"
1123 ;; Predicated predicate BIC and ORN.
1124 (define_insn "*<nlogical><mode>3"
1125 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1128 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1129 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1130 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1132 "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
1135 ;; Predicated predicate NAND and NOR.
1136 (define_insn "*<logical_nn><mode>3"
1137 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1140 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1141 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
1142 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1144 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
1147 ;; Unpredicated LSL, LSR and ASR by a vector.
1148 (define_expand "v<optab><mode>3"
1149 [(set (match_operand:SVE_I 0 "register_operand")
1153 (match_operand:SVE_I 1 "register_operand")
1154 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
1155 UNSPEC_MERGE_PTRUE))]
1158 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1162 ;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't
1163 ;; actually need the predicate for the first alternative, but using Upa
1164 ;; or X isn't likely to gain much and would make the instruction seem
1165 ;; less uniform to the register allocator.
1166 (define_insn "*v<optab><mode>3"
1167 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1169 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1171 (match_operand:SVE_I 2 "register_operand" "w, 0")
1172 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w"))]
1173 UNSPEC_MERGE_PTRUE))]
1176 <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
1177 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1180 ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
1182 (define_expand "<ASHIFT:optab><mode>3"
1183 [(set (match_operand:SVE_I 0 "register_operand")
1184 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
1185 (match_operand:<VEL> 2 "general_operand")))]
1189 if (CONST_INT_P (operands[2]))
1191 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
1192 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
1193 amount = force_reg (<MODE>mode, amount);
1197 amount = gen_reg_rtx (<MODE>mode);
1198 emit_insn (gen_vec_duplicate<mode> (amount,
1199 convert_to_mode (<VEL>mode,
1202 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
1207 ;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
1209 ;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
1210 ;; is a PTRUE even if the optimizers haven't yet been able to propagate
1211 ;; the constant. We would use a separate unspec code for PTESTs involving
1212 ;; GPs that might not be PTRUEs.
1213 (define_insn "ptest_ptrue<mode>"
1214 [(set (reg:CC CC_REGNUM)
1216 (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa")
1217 (match_operand:PRED_ALL 1 "register_operand" "Upa")]
1224 ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
1225 ;; with the comparison being unsigned.
1226 (define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
1227 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1228 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
1229 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
1231 (clobber (reg:CC CC_REGNUM))]
1233 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
1236 ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
1237 ;; Handle the case in which both results are useful. The GP operand
1238 ;; to the PTEST isn't needed, so we allow it to be anything.
1239 (define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc"
1240 [(set (reg:CC CC_REGNUM)
1242 (unspec:SI [(match_operand:PRED_ALL 1)
1244 [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
1245 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
1249 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1250 (unspec:PRED_ALL [(match_dup 2)
1254 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
1255 ;; Force the compiler to drop the unused predicate operand, so that we
1256 ;; don't have an unnecessary PTRUE.
1257 "&& !CONSTANT_P (operands[1])"
1260 emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc
1261 (operands[0], CONSTM1_RTX (<MODE>mode),
1262 operands[2], operands[3]));
1267 ;; Predicated integer comparison.
1268 (define_insn "*vec_cmp<cmp_op>_<mode>"
1269 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1271 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1272 (match_operand:SVE_I 2 "register_operand" "w, w")
1273 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
1275 (clobber (reg:CC CC_REGNUM))]
1278 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1279 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1282 ;; Predicated integer comparison in which only the flags result is interesting.
1283 (define_insn "*vec_cmp<cmp_op>_<mode>_ptest"
1284 [(set (reg:CC CC_REGNUM)
1287 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1290 (match_operand:SVE_I 2 "register_operand" "w, w")
1291 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
1295 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
1298 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1299 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1302 ;; Predicated comparison in which both the flag and predicate results
1304 (define_insn "*vec_cmp<cmp_op>_<mode>_cc"
1305 [(set (reg:CC CC_REGNUM)
1308 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1311 (match_operand:SVE_I 2 "register_operand" "w, w")
1312 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
1316 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1324 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1325 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1328 ;; Predicated floating-point comparison (excluding FCMUO, which doesn't
1329 ;; allow #0.0 as an operand).
1330 (define_insn "*vec_fcm<cmp_op><mode>"
1331 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1333 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1334 (match_operand:SVE_F 2 "register_operand" "w, w")
1335 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
1339 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1340 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1343 ;; Predicated FCMUO.
1344 (define_insn "*vec_fcmuo<mode>"
1345 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1347 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1348 (match_operand:SVE_F 2 "register_operand" "w")
1349 (match_operand:SVE_F 3 "register_operand" "w")]
1352 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1355 ;; vcond_mask operand order: true, false, mask
1356 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
1357 ;; SEL operand order: mask, true, false
1358 (define_insn "vcond_mask_<mode><vpred>"
1359 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1361 [(match_operand:<VPRED> 3 "register_operand" "Upa")
1362 (match_operand:SVE_ALL 1 "register_operand" "w")
1363 (match_operand:SVE_ALL 2 "register_operand" "w")]
1366 "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
1369 ;; Selects between a duplicated immediate and zero.
1370 (define_insn "aarch64_sve_dup<mode>_const"
1371 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1373 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1374 (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
1375 (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
1378 "mov\t%0.<Vetype>, %1/z, #%2"
1381 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it
1382 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1383 (define_expand "vcond<mode><v_int_equiv>"
1384 [(set (match_operand:SVE_ALL 0 "register_operand")
1385 (if_then_else:SVE_ALL
1386 (match_operator 3 "comparison_operator"
1387 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1388 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1389 (match_operand:SVE_ALL 1 "register_operand")
1390 (match_operand:SVE_ALL 2 "register_operand")))]
1393 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1398 ;; Integer vcondu. Don't enforce an immediate range here, since it
1399 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1400 (define_expand "vcondu<mode><v_int_equiv>"
1401 [(set (match_operand:SVE_ALL 0 "register_operand")
1402 (if_then_else:SVE_ALL
1403 (match_operator 3 "comparison_operator"
1404 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1405 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1406 (match_operand:SVE_ALL 1 "register_operand")
1407 (match_operand:SVE_ALL 2 "register_operand")))]
1410 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1415 ;; Floating-point vcond. All comparisons except FCMUO allow a zero
1416 ;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
1418 (define_expand "vcond<mode><v_fp_equiv>"
1419 [(set (match_operand:SVE_SD 0 "register_operand")
1420 (if_then_else:SVE_SD
1421 (match_operator 3 "comparison_operator"
1422 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
1423 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
1424 (match_operand:SVE_SD 1 "register_operand")
1425 (match_operand:SVE_SD 2 "register_operand")))]
1428 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
1433 ;; Signed integer comparisons. Don't enforce an immediate range here, since
1434 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1436 (define_expand "vec_cmp<mode><vpred>"
1438 [(set (match_operand:<VPRED> 0 "register_operand")
1439 (match_operator:<VPRED> 1 "comparison_operator"
1440 [(match_operand:SVE_I 2 "register_operand")
1441 (match_operand:SVE_I 3 "nonmemory_operand")]))
1442 (clobber (reg:CC CC_REGNUM))])]
1445 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1446 operands[2], operands[3]);
1451 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
1452 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1454 (define_expand "vec_cmpu<mode><vpred>"
1456 [(set (match_operand:<VPRED> 0 "register_operand")
1457 (match_operator:<VPRED> 1 "comparison_operator"
1458 [(match_operand:SVE_I 2 "register_operand")
1459 (match_operand:SVE_I 3 "nonmemory_operand")]))
1460 (clobber (reg:CC CC_REGNUM))])]
1463 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1464 operands[2], operands[3]);
1469 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
1470 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
1472 (define_expand "vec_cmp<mode><vpred>"
1473 [(set (match_operand:<VPRED> 0 "register_operand")
1474 (match_operator:<VPRED> 1 "comparison_operator"
1475 [(match_operand:SVE_F 2 "register_operand")
1476 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
1479 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
1480 operands[2], operands[3], false);
1485 ;; Branch based on predicate equality or inequality.
1486 (define_expand "cbranch<mode>4"
1489 (match_operator 0 "aarch64_equality_operator"
1490 [(match_operand:PRED_ALL 1 "register_operand")
1491 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
1492 (label_ref (match_operand 3 ""))
1496 rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1498 if (operands[2] == CONST0_RTX (<MODE>mode))
1502 pred = gen_reg_rtx (<MODE>mode);
1503 emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
1506 emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
1507 operands[1] = gen_rtx_REG (CCmode, CC_REGNUM);
1508 operands[2] = const0_rtx;
1512 ;; Unpredicated integer MIN/MAX.
1513 (define_expand "<su><maxmin><mode>3"
1514 [(set (match_operand:SVE_I 0 "register_operand")
1517 (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
1518 (match_operand:SVE_I 2 "register_operand"))]
1519 UNSPEC_MERGE_PTRUE))]
1522 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1526 ;; Integer MIN/MAX predicated with a PTRUE.
1527 (define_insn "*<su><maxmin><mode>3"
1528 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1530 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1531 (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0")
1532 (match_operand:SVE_I 3 "register_operand" "w"))]
1533 UNSPEC_MERGE_PTRUE))]
1535 "<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1538 ;; Unpredicated floating-point MIN/MAX.
1539 (define_expand "<su><maxmin><mode>3"
1540 [(set (match_operand:SVE_F 0 "register_operand")
1543 (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
1544 (match_operand:SVE_F 2 "register_operand"))]
1545 UNSPEC_MERGE_PTRUE))]
1548 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1552 ;; Floating-point MIN/MAX predicated with a PTRUE.
1553 (define_insn "*<su><maxmin><mode>3"
1554 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1556 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1557 (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0")
1558 (match_operand:SVE_F 3 "register_operand" "w"))]
1559 UNSPEC_MERGE_PTRUE))]
1561 "f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1564 ;; Unpredicated fmin/fmax.
1565 (define_expand "<maxmin_uns><mode>3"
1566 [(set (match_operand:SVE_F 0 "register_operand")
1569 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
1570 (match_operand:SVE_F 2 "register_operand")]
1572 UNSPEC_MERGE_PTRUE))]
1575 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1579 ;; fmin/fmax predicated with a PTRUE.
1580 (define_insn "*<maxmin_uns><mode>3"
1581 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1583 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1584 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0")
1585 (match_operand:SVE_F 3 "register_operand" "w")]
1587 UNSPEC_MERGE_PTRUE))]
1589 "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1592 ;; Predicated integer operations.
1593 (define_insn "cond_<optab><mode>"
1594 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1596 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1597 (match_operand:SVE_I 2 "register_operand" "0")
1598 (match_operand:SVE_I 3 "register_operand" "w")]
1601 "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1604 ;; Set operand 0 to the last active element in operand 3, or to tied
1605 ;; operand 1 if no elements are active.
1606 (define_insn "fold_extract_last_<mode>"
1607 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
1609 [(match_operand:<VEL> 1 "register_operand" "0, 0")
1610 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
1611 (match_operand:SVE_ALL 3 "register_operand" "w, w")]
1615 clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
1616 clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
1619 ;; Unpredicated integer add reduction.
1620 (define_expand "reduc_plus_scal_<mode>"
1621 [(set (match_operand:<VEL> 0 "register_operand")
1622 (unspec:<VEL> [(match_dup 2)
1623 (match_operand:SVE_I 1 "register_operand")]
1627 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1631 ;; Predicated integer add reduction. The result is always 64-bits.
1632 (define_insn "*reduc_plus_scal_<mode>"
1633 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1634 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1635 (match_operand:SVE_I 2 "register_operand" "w")]
1638 "uaddv\t%d0, %1, %2.<Vetype>"
1641 ;; Unpredicated floating-point add reduction.
1642 (define_expand "reduc_plus_scal_<mode>"
1643 [(set (match_operand:<VEL> 0 "register_operand")
1644 (unspec:<VEL> [(match_dup 2)
1645 (match_operand:SVE_F 1 "register_operand")]
1649 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1653 ;; Predicated floating-point add reduction.
1654 (define_insn "*reduc_plus_scal_<mode>"
1655 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1656 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1657 (match_operand:SVE_F 2 "register_operand" "w")]
1660 "faddv\t%<Vetype>0, %1, %2.<Vetype>"
1663 ;; Unpredicated integer MIN/MAX reduction.
1664 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1665 [(set (match_operand:<VEL> 0 "register_operand")
1666 (unspec:<VEL> [(match_dup 2)
1667 (match_operand:SVE_I 1 "register_operand")]
1671 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1675 ;; Predicated integer MIN/MAX reduction.
1676 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
1677 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1678 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1679 (match_operand:SVE_I 2 "register_operand" "w")]
1682 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
1685 ;; Unpredicated floating-point MIN/MAX reduction.
1686 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1687 [(set (match_operand:<VEL> 0 "register_operand")
1688 (unspec:<VEL> [(match_dup 2)
1689 (match_operand:SVE_F 1 "register_operand")]
1693 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1697 ;; Predicated floating-point MIN/MAX reduction.
1698 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
1699 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1700 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1701 (match_operand:SVE_F 2 "register_operand" "w")]
1704 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
1707 (define_expand "reduc_<optab>_scal_<mode>"
1708 [(set (match_operand:<VEL> 0 "register_operand")
1709 (unspec:<VEL> [(match_dup 2)
1710 (match_operand:SVE_I 1 "register_operand")]
1714 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1718 (define_insn "*reduc_<optab>_scal_<mode>"
1719 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1720 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1721 (match_operand:SVE_I 2 "register_operand" "w")]
1724 "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
1727 ;; Unpredicated in-order FP reductions.
1728 (define_expand "fold_left_plus_<mode>"
1729 [(set (match_operand:<VEL> 0 "register_operand")
1730 (unspec:<VEL> [(match_dup 3)
1731 (match_operand:<VEL> 1 "register_operand")
1732 (match_operand:SVE_F 2 "register_operand")]
1736 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1740 ;; In-order FP reductions predicated with PTRUE.
1741 (define_insn "*fold_left_plus_<mode>"
1742 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1743 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1744 (match_operand:<VEL> 2 "register_operand" "0")
1745 (match_operand:SVE_F 3 "register_operand" "w")]
1748 "fadda\t%<Vetype>0, %1, %<Vetype>0, %3.<Vetype>"
1751 ;; Predicated form of the above in-order reduction.
1752 (define_insn "*pred_fold_left_plus_<mode>"
1753 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1755 [(match_operand:<VEL> 1 "register_operand" "0")
1757 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1758 (match_operand:SVE_F 3 "register_operand" "w")
1759 (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
1763 "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
1766 ;; Unpredicated floating-point addition.
1767 (define_expand "add<mode>3"
1768 [(set (match_operand:SVE_F 0 "register_operand")
1772 (match_operand:SVE_F 1 "register_operand")
1773 (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
1774 UNSPEC_MERGE_PTRUE))]
1777 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1781 ;; Floating-point addition predicated with a PTRUE.
1782 (define_insn "*add<mode>3"
1783 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
1785 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1787 (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
1788 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
1789 UNSPEC_MERGE_PTRUE))]
1792 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1793 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
1794 fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1797 ;; Unpredicated floating-point subtraction.
1798 (define_expand "sub<mode>3"
1799 [(set (match_operand:SVE_F 0 "register_operand")
1803 (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
1804 (match_operand:SVE_F 2 "register_operand"))]
1805 UNSPEC_MERGE_PTRUE))]
1808 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1812 ;; Floating-point subtraction predicated with a PTRUE.
1813 (define_insn "*sub<mode>3"
1814 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
1816 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
1818 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
1819 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
1820 UNSPEC_MERGE_PTRUE))]
1822 && (register_operand (operands[2], <MODE>mode)
1823 || register_operand (operands[3], <MODE>mode))"
1825 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1826 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
1827 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
1828 fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1831 ;; Unpredicated floating-point multiplication.
1832 (define_expand "mul<mode>3"
1833 [(set (match_operand:SVE_F 0 "register_operand")
1837 (match_operand:SVE_F 1 "register_operand")
1838 (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
1839 UNSPEC_MERGE_PTRUE))]
1842 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1846 ;; Floating-point multiplication predicated with a PTRUE.
1847 (define_insn "*mul<mode>3"
1848 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1850 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1852 (match_operand:SVE_F 2 "register_operand" "%0, w")
1853 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
1854 UNSPEC_MERGE_PTRUE))]
1857 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1858 fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1861 ;; Unpredicated fma (%0 = (%1 * %2) + %3).
1862 (define_expand "fma<mode>4"
1863 [(set (match_operand:SVE_F 0 "register_operand")
1866 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
1867 (match_operand:SVE_F 2 "register_operand")
1868 (match_operand:SVE_F 3 "register_operand"))]
1869 UNSPEC_MERGE_PTRUE))]
1872 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1876 ;; fma predicated with a PTRUE.
1877 (define_insn "*fma<mode>4"
1878 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1880 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1881 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
1882 (match_operand:SVE_F 4 "register_operand" "w, w")
1883 (match_operand:SVE_F 2 "register_operand" "w, 0"))]
1884 UNSPEC_MERGE_PTRUE))]
1887 fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1888 fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1891 ;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
1892 (define_expand "fnma<mode>4"
1893 [(set (match_operand:SVE_F 0 "register_operand")
1896 (fma:SVE_F (neg:SVE_F
1897 (match_operand:SVE_F 1 "register_operand"))
1898 (match_operand:SVE_F 2 "register_operand")
1899 (match_operand:SVE_F 3 "register_operand"))]
1900 UNSPEC_MERGE_PTRUE))]
1903 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1907 ;; fnma predicated with a PTRUE.
1908 (define_insn "*fnma<mode>4"
1909 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1911 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1912 (fma:SVE_F (neg:SVE_F
1913 (match_operand:SVE_F 3 "register_operand" "%0, w"))
1914 (match_operand:SVE_F 4 "register_operand" "w, w")
1915 (match_operand:SVE_F 2 "register_operand" "w, 0"))]
1916 UNSPEC_MERGE_PTRUE))]
1919 fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1920 fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1923 ;; Unpredicated fms (%0 = (%1 * %2) - %3).
1924 (define_expand "fms<mode>4"
1925 [(set (match_operand:SVE_F 0 "register_operand")
1928 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
1929 (match_operand:SVE_F 2 "register_operand")
1931 (match_operand:SVE_F 3 "register_operand")))]
1932 UNSPEC_MERGE_PTRUE))]
1935 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1939 ;; fms predicated with a PTRUE.
1940 (define_insn "*fms<mode>4"
1941 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1943 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1944 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
1945 (match_operand:SVE_F 4 "register_operand" "w, w")
1947 (match_operand:SVE_F 2 "register_operand" "w, 0")))]
1948 UNSPEC_MERGE_PTRUE))]
1951 fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1952 fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1955 ;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
1956 (define_expand "fnms<mode>4"
1957 [(set (match_operand:SVE_F 0 "register_operand")
1960 (fma:SVE_F (neg:SVE_F
1961 (match_operand:SVE_F 1 "register_operand"))
1962 (match_operand:SVE_F 2 "register_operand")
1964 (match_operand:SVE_F 3 "register_operand")))]
1965 UNSPEC_MERGE_PTRUE))]
1968 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1972 ;; fnms predicated with a PTRUE.
1973 (define_insn "*fnms<mode>4"
1974 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1976 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1977 (fma:SVE_F (neg:SVE_F
1978 (match_operand:SVE_F 3 "register_operand" "%0, w"))
1979 (match_operand:SVE_F 4 "register_operand" "w, w")
1981 (match_operand:SVE_F 2 "register_operand" "w, 0")))]
1982 UNSPEC_MERGE_PTRUE))]
1985 fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1986 fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1989 ;; Unpredicated floating-point division.
1990 (define_expand "div<mode>3"
1991 [(set (match_operand:SVE_F 0 "register_operand")
1994 (div:SVE_F (match_operand:SVE_F 1 "register_operand")
1995 (match_operand:SVE_F 2 "register_operand"))]
1996 UNSPEC_MERGE_PTRUE))]
1999 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2003 ;; Floating-point division predicated with a PTRUE.
2004 (define_insn "*div<mode>3"
2005 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2007 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2008 (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w")
2009 (match_operand:SVE_F 3 "register_operand" "w, 0"))]
2010 UNSPEC_MERGE_PTRUE))]
2013 fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2014 fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
2017 ;; Unpredicated FNEG, FABS and FSQRT.
2018 (define_expand "<optab><mode>2"
2019 [(set (match_operand:SVE_F 0 "register_operand")
2022 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
2023 UNSPEC_MERGE_PTRUE))]
2026 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2030 ;; FNEG, FABS and FSQRT predicated with a PTRUE.
2031 (define_insn "*<optab><mode>2"
2032 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2034 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2035 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
2036 UNSPEC_MERGE_PTRUE))]
2038 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2041 ;; Unpredicated FRINTy.
2042 (define_expand "<frint_pattern><mode>2"
2043 [(set (match_operand:SVE_F 0 "register_operand")
2046 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
2048 UNSPEC_MERGE_PTRUE))]
2051 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2055 ;; FRINTy predicated with a PTRUE.
2056 (define_insn "*<frint_pattern><mode>2"
2057 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2059 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2060 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
2062 UNSPEC_MERGE_PTRUE))]
2064 "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2067 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
2068 ;; SF to SI or DF to DI).
2069 (define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
2070 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2071 (unspec:<V_INT_EQUIV>
2073 (FIXUORS:<V_INT_EQUIV>
2074 (match_operand:SVE_F 1 "register_operand"))]
2075 UNSPEC_MERGE_PTRUE))]
2078 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2082 ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
2083 (define_insn "*<fix_trunc_optab>v16hsf<mode>2"
2084 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2086 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2088 (match_operand:VNx8HF 2 "register_operand" "w"))]
2089 UNSPEC_MERGE_PTRUE))]
2091 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
2094 ;; Conversion of SF to DI or SI, predicated with a PTRUE.
2095 (define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
2096 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2098 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2100 (match_operand:VNx4SF 2 "register_operand" "w"))]
2101 UNSPEC_MERGE_PTRUE))]
2103 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
2106 ;; Conversion of DF to DI or SI, predicated with a PTRUE.
2107 (define_insn "*<fix_trunc_optab>vnx2df<mode>2"
2108 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2110 [(match_operand:VNx2BI 1 "register_operand" "Upl")
2112 (match_operand:VNx2DF 2 "register_operand" "w"))]
2113 UNSPEC_MERGE_PTRUE))]
2115 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
2118 ;; Unpredicated conversion of integers to floats of the same size
2119 ;; (HI to HF, SI to SF or DI to DF).
2120 (define_expand "<optab><v_int_equiv><mode>2"
2121 [(set (match_operand:SVE_F 0 "register_operand")
2125 (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
2126 UNSPEC_MERGE_PTRUE))]
2129 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2133 ;; Conversion of DI, SI or HI to the same number of HFs, predicated
2135 (define_insn "*<optab><mode>vnx8hf2"
2136 [(set (match_operand:VNx8HF 0 "register_operand" "=w")
2138 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2140 (match_operand:SVE_HSDI 2 "register_operand" "w"))]
2141 UNSPEC_MERGE_PTRUE))]
2143 "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
2146 ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
2147 (define_insn "*<optab><mode>vnx4sf2"
2148 [(set (match_operand:VNx4SF 0 "register_operand" "=w")
2150 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2152 (match_operand:SVE_SDI 2 "register_operand" "w"))]
2153 UNSPEC_MERGE_PTRUE))]
2155 "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
2158 ;; Conversion of DI or SI to DF, predicated with a PTRUE.
2159 (define_insn "*<optab><mode>vnx2df2"
2160 [(set (match_operand:VNx2DF 0 "register_operand" "=w")
2162 [(match_operand:VNx2BI 1 "register_operand" "Upl")
2164 (match_operand:SVE_SDI 2 "register_operand" "w"))]
2165 UNSPEC_MERGE_PTRUE))]
2167 "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
2170 ;; Conversion of DFs to the same number of SFs, or SFs to the same number
2172 (define_insn "*trunc<Vwide><mode>2"
2173 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
2175 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2177 [(match_operand:<VWIDE> 2 "register_operand" "w")]
2178 UNSPEC_FLOAT_CONVERT)]
2179 UNSPEC_MERGE_PTRUE))]
2181 "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
2184 ;; Conversion of SFs to the same number of DFs, or HFs to the same number
2186 (define_insn "*extend<mode><Vwide>2"
2187 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2189 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2191 [(match_operand:SVE_HSF 2 "register_operand" "w")]
2192 UNSPEC_FLOAT_CONVERT)]
2193 UNSPEC_MERGE_PTRUE))]
2195 "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
2198 ;; PUNPKHI and PUNPKLO.
2199 (define_insn "vec_unpack<su>_<perm_hilo>_<mode>"
2200 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
2201 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
2204 "punpk<perm_hilo>\t%0.h, %1.b"
2207 ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
2208 (define_insn "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
2209 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2210 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
2213 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
2216 ;; Used by the vec_unpacks_<perm_hilo>_<mode> expander to unpack the bit
2217 ;; representation of a VNx4SF or VNx8HF without conversion. The choice
2218 ;; between signed and unsigned isn't significant.
2219 (define_insn "*vec_unpacku_<perm_hilo>_<mode>_no_convert"
2220 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
2221 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand" "w")]
2224 "uunpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
2227 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
2228 ;; First unpack the source without conversion, then float-convert the
2230 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
2232 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
2234 (set (match_operand:<VWIDE> 0 "register_operand")
2235 (unspec:<VWIDE> [(match_dup 3)
2236 (unspec:<VWIDE> [(match_dup 2)] UNSPEC_FLOAT_CONVERT)]
2237 UNSPEC_MERGE_PTRUE))]
2240 operands[2] = gen_reg_rtx (<MODE>mode);
2241 operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
2245 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
2246 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
2247 ;; unpacked VNx4SI to VNx2DF.
2248 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
2250 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
2252 (set (match_operand:VNx2DF 0 "register_operand")
2253 (unspec:VNx2DF [(match_dup 3)
2254 (FLOATUORS:VNx2DF (match_dup 4))]
2255 UNSPEC_MERGE_PTRUE))]
2258 operands[2] = gen_reg_rtx (VNx2DImode);
2259 operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
2260 operands[4] = gen_rtx_SUBREG (VNx4SImode, operands[2], 0);
2264 ;; Predicate pack. Use UZP1 on the narrower type, which discards
2265 ;; the high part of each wide element.
2266 (define_insn "vec_pack_trunc_<Vwide>"
2267 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
2269 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
2270 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
2273 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2276 ;; Integer pack. Use UZP1 on the narrower type, which discards
2277 ;; the high part of each wide element.
2278 (define_insn "vec_pack_trunc_<Vwide>"
2279 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
2281 [(match_operand:<VWIDE> 1 "register_operand" "w")
2282 (match_operand:<VWIDE> 2 "register_operand" "w")]
2285 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2288 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
2289 ;; the results into a single vector.
2290 (define_expand "vec_pack_trunc_<Vwide>"
2294 (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
2295 UNSPEC_FLOAT_CONVERT)]
2296 UNSPEC_MERGE_PTRUE))
2300 (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
2301 UNSPEC_FLOAT_CONVERT)]
2302 UNSPEC_MERGE_PTRUE))
2303 (set (match_operand:SVE_HSF 0 "register_operand")
2304 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2307 operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
2308 operands[4] = gen_reg_rtx (<MODE>mode);
2309 operands[5] = gen_reg_rtx (<MODE>mode);
2313 ;; Convert two vectors of DF to SI and pack the results into a single vector.
2314 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
2318 (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
2319 UNSPEC_MERGE_PTRUE))
2323 (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
2324 UNSPEC_MERGE_PTRUE))
2325 (set (match_operand:VNx4SI 0 "register_operand")
2326 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2329 operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
2330 operands[4] = gen_reg_rtx (VNx4SImode);
2331 operands[5] = gen_reg_rtx (VNx4SImode);
2335 ;; Predicated floating-point operations.
2336 (define_insn "cond_<optab><mode>"
2337 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2339 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2340 (match_operand:SVE_F 2 "register_operand" "0")
2341 (match_operand:SVE_F 3 "register_operand" "w")]
2344 "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2347 ;; Shift an SVE vector left and insert a scalar into element 0.
2348 (define_insn "vec_shl_insert_<mode>"
2349 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
2351 [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
2352 (match_operand:<VEL> 2 "register_operand" "rZ, w")]
2356 insr\t%0.<Vetype>, %<vwcore>2
2357 insr\t%0.<Vetype>, %<Vetype>2"