1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Note on the handling of big-endian SVE
22 ;; --------------------------------------
24 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
25 ;; same way as movdi or movti would: the first byte of memory goes
26 ;; into the most significant byte of the register and the last byte
27 ;; of memory goes into the least significant byte of the register.
28 ;; This is the most natural ordering for Advanced SIMD and matches
29 ;; the ABI layout for 64-bit and 128-bit vector types.
31 ;; As a result, the order of bytes within the register is what GCC
32 ;; expects for a big-endian target, and subreg offsets therefore work
33 ;; as expected, with the first element in memory having subreg offset 0
34 ;; and the last element in memory having the subreg offset associated
35 ;; with a big-endian lowpart. However, this ordering also means that
36 ;; GCC's lane numbering does not match the architecture's numbering:
37 ;; GCC always treats the element at the lowest address in memory
38 ;; (subreg offset 0) as element 0, while the architecture treats
39 ;; the least significant end of the register as element 0.
41 ;; The situation for SVE is different. We want the layout of the
42 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
43 ;; logically, a mov<mode> load must be indistinguishable from a
44 ;; maskload<mode> whose mask is all true. We therefore need the
45 ;; register layout to match LD1 rather than LDR. The ABI layout of
46 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
48 ;; As a result, the architecture lane numbering matches GCC's lane
49 ;; numbering, with element 0 always being the first in memory.
52 ;; - Applying a subreg offset to a register does not give the element
53 ;; that GCC expects: the first element in memory has the subreg offset
54 ;; associated with a big-endian lowpart while the last element in memory
55 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
57 ;; - We cannot use LDR and STR for spill slots that might be accessed
58 ;; via subregs, since although the elements have the order GCC expects,
59 ;; the order of the bytes within the elements is different. We instead
60 ;; access spill slots via LD1 and ST1, using secondary reloads to
61 ;; reserve a predicate register.
65 (define_expand "mov<mode>"
66 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
67 (match_operand:SVE_ALL 1 "general_operand"))]
70 /* Use the predicated load and store patterns where possible.
71 This is required for big-endian targets (see the comment at the
72 head of the file) and increases the addressing choices for
74 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
75 && can_create_pseudo_p ())
77 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
81 if (CONSTANT_P (operands[1]))
83 aarch64_expand_mov_immediate (operands[0], operands[1],
84 gen_vec_duplicate<mode>);
88 /* Optimize subregs on big-endian targets: we can use REV[BHW]
89 instead of going through memory. */
91 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
96 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
97 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
98 ;; for details. We use a special predicate for operand 2 to reduce
99 ;; the number of patterns.
100 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
101 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
103 [(match_operand:VNx16BI 1 "register_operand" "Upl")
104 (match_operand 2 "aarch64_any_register_operand" "w")]
106 "TARGET_SVE && BYTES_BIG_ENDIAN"
108 "&& reload_completed"
111 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
116 ;; Unpredicated moves (little-endian). Only allow memory operations
117 ;; during and after RA; before RA we want the predicated load and
118 ;; store patterns to be used instead.
119 (define_insn "*aarch64_sve_mov<mode>_le"
120 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
121 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
124 && ((lra_in_progress || reload_completed)
125 || (register_operand (operands[0], <MODE>mode)
126 && nonmemory_operand (operands[1], <MODE>mode)))"
131 * return aarch64_output_sve_mov_immediate (operands[1]);"
134 ;; Unpredicated moves (big-endian). Memory accesses require secondary
136 (define_insn "*aarch64_sve_mov<mode>_be"
137 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
138 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
139 "TARGET_SVE && BYTES_BIG_ENDIAN"
142 * return aarch64_output_sve_mov_immediate (operands[1]);"
145 ;; Handle big-endian memory reloads. We use byte PTRUE for all modes
146 ;; to try to encourage reuse.
147 (define_expand "aarch64_sve_reload_be"
149 [(set (match_operand 0)
151 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
152 "TARGET_SVE && BYTES_BIG_ENDIAN"
154 /* Create a PTRUE. */
155 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
157 /* Refer to the PTRUE in the appropriate mode for this move. */
158 machine_mode mode = GET_MODE (operands[0]);
159 machine_mode pred_mode
160 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
161 rtx pred = gen_lowpart (pred_mode, operands[2]);
163 /* Emit a predicated load or store. */
164 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
169 ;; A predicated load or store for which the predicate is known to be
170 ;; all-true. Note that this pattern is generated directly by
171 ;; aarch64_emit_sve_pred_move, so changes to this pattern will
172 ;; need changes there as well.
173 (define_insn "*pred_mov<mode>"
174 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m")
176 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
177 (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")]
178 UNSPEC_MERGE_PTRUE))]
180 && (register_operand (operands[0], <MODE>mode)
181 || register_operand (operands[2], <MODE>mode))"
183 ld1<Vesize>\t%0.<Vetype>, %1/z, %2
184 st1<Vesize>\t%2.<Vetype>, %1, %0"
187 (define_expand "movmisalign<mode>"
188 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
189 (match_operand:SVE_ALL 1 "general_operand"))]
192 /* Equivalent to a normal move for our purpooses. */
193 emit_move_insn (operands[0], operands[1]);
198 (define_insn "maskload<mode><vpred>"
199 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
201 [(match_operand:<VPRED> 2 "register_operand" "Upl")
202 (match_operand:SVE_ALL 1 "memory_operand" "m")]
205 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
208 (define_insn "maskstore<mode><vpred>"
209 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
210 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
211 (match_operand:SVE_ALL 1 "register_operand" "w")
215 "st1<Vesize>\t%1.<Vetype>, %2, %0"
218 ;; Unpredicated gather loads.
219 (define_expand "gather_load<mode>"
220 [(set (match_operand:SVE_SD 0 "register_operand")
223 (match_operand:DI 1 "aarch64_reg_or_zero")
224 (match_operand:<V_INT_EQUIV> 2 "register_operand")
225 (match_operand:DI 3 "const_int_operand")
226 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
231 operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
235 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for
236 ;; unsigned extension and false for signed extension.
237 (define_insn "mask_gather_load<mode>"
238 [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
240 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
241 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
242 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
243 (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
244 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
249 ld1w\t%0.s, %5/z, [%2.s]
250 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
251 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
252 ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
253 ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
256 ;; Predicated gather loads for 64-bit elements. The value of operand 3
257 ;; doesn't matter in this case.
258 (define_insn "mask_gather_load<mode>"
259 [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
261 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
262 (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
263 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
264 (match_operand:DI 3 "const_int_operand")
265 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
270 ld1d\t%0.d, %5/z, [%2.d]
271 ld1d\t%0.d, %5/z, [%1, %2.d]
272 ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
275 ;; Unpredicated scatter store.
276 (define_expand "scatter_store<mode>"
277 [(set (mem:BLK (scratch))
280 (match_operand:DI 0 "aarch64_reg_or_zero")
281 (match_operand:<V_INT_EQUIV> 1 "register_operand")
282 (match_operand:DI 2 "const_int_operand")
283 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
284 (match_operand:SVE_SD 4 "register_operand")]
285 UNSPEC_ST1_SCATTER))]
288 operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
292 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
293 ;; unsigned extension and false for signed extension.
294 (define_insn "mask_scatter_store<mode>"
295 [(set (mem:BLK (scratch))
297 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
298 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
299 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
300 (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
301 (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
302 (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
303 UNSPEC_ST1_SCATTER))]
306 st1w\t%4.s, %5, [%1.s]
307 st1w\t%4.s, %5, [%0, %1.s, sxtw]
308 st1w\t%4.s, %5, [%0, %1.s, uxtw]
309 st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
310 st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
313 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
314 ;; doesn't matter in this case.
315 (define_insn "mask_scatter_store<mode>"
316 [(set (mem:BLK (scratch))
318 [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
319 (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
320 (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
321 (match_operand:DI 2 "const_int_operand")
322 (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
323 (match_operand:SVE_D 4 "register_operand" "w, w, w")]
324 UNSPEC_ST1_SCATTER))]
327 st1d\t%4.d, %5, [%1.d]
328 st1d\t%4.d, %5, [%0, %1.d]
329 st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
332 ;; SVE structure moves.
333 (define_expand "mov<mode>"
334 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
335 (match_operand:SVE_STRUCT 1 "general_operand"))]
338 /* Big-endian loads and stores need to be done via LD1 and ST1;
339 see the comment at the head of the file for details. */
340 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
343 gcc_assert (can_create_pseudo_p ());
344 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
348 if (CONSTANT_P (operands[1]))
350 aarch64_expand_mov_immediate (operands[0], operands[1]);
356 ;; Unpredicated structure moves (little-endian).
357 (define_insn "*aarch64_sve_mov<mode>_le"
358 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
359 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
360 "TARGET_SVE && !BYTES_BIG_ENDIAN"
362 [(set_attr "length" "<insn_length>")]
365 ;; Unpredicated structure moves (big-endian). Memory accesses require
366 ;; secondary reloads.
367 (define_insn "*aarch64_sve_mov<mode>_le"
368 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
369 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
370 "TARGET_SVE && BYTES_BIG_ENDIAN"
372 [(set_attr "length" "<insn_length>")]
375 ;; Split unpredicated structure moves into pieces. This is the same
376 ;; for both big-endian and little-endian code, although it only needs
377 ;; to handle memory operands for little-endian code.
379 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
380 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
381 "TARGET_SVE && reload_completed"
384 rtx dest = operands[0];
385 rtx src = operands[1];
386 if (REG_P (dest) && REG_P (src))
387 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
389 for (unsigned int i = 0; i < <vector_count>; ++i)
391 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
392 i * BYTES_PER_SVE_VECTOR);
393 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
394 i * BYTES_PER_SVE_VECTOR);
395 emit_insn (gen_rtx_SET (subdest, subsrc));
401 ;; Predicated structure moves. This works for both endiannesses but in
402 ;; practice is only useful for big-endian.
403 (define_insn_and_split "pred_mov<mode>"
404 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx")
406 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
407 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")]
408 UNSPEC_MERGE_PTRUE))]
410 && (register_operand (operands[0], <MODE>mode)
411 || register_operand (operands[2], <MODE>mode))"
413 "&& reload_completed"
416 for (unsigned int i = 0; i < <vector_count>; ++i)
418 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
420 i * BYTES_PER_SVE_VECTOR);
421 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
423 i * BYTES_PER_SVE_VECTOR);
424 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
428 [(set_attr "length" "<insn_length>")]
431 (define_expand "mov<mode>"
432 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
433 (match_operand:PRED_ALL 1 "general_operand"))]
436 if (GET_CODE (operands[0]) == MEM)
437 operands[1] = force_reg (<MODE>mode, operands[1]);
441 (define_insn "*aarch64_sve_mov<mode>"
442 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
443 (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
445 && (register_operand (operands[0], <MODE>mode)
446 || register_operand (operands[1], <MODE>mode))"
452 * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
455 ;; Handle extractions from a predicate by converting to an integer vector
456 ;; and extracting from there.
457 (define_expand "vec_extract<vpred><Vel>"
458 [(match_operand:<VEL> 0 "register_operand")
459 (match_operand:<VPRED> 1 "register_operand")
460 (match_operand:SI 2 "nonmemory_operand")
461 ;; Dummy operand to which we can attach the iterator.
462 (reg:SVE_I V0_REGNUM)]
465 rtx tmp = gen_reg_rtx (<MODE>mode);
466 emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
467 CONST1_RTX (<MODE>mode),
468 CONST0_RTX (<MODE>mode)));
469 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
474 (define_expand "vec_extract<mode><Vel>"
475 [(set (match_operand:<VEL> 0 "register_operand")
477 (match_operand:SVE_ALL 1 "register_operand")
478 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
482 if (poly_int_rtx_p (operands[2], &val)
483 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
485 /* The last element can be extracted with a LASTB and a false
487 rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode));
488 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
491 if (!CONST_INT_P (operands[2]))
493 /* Create an index with operand[2] as the base and -1 as the step.
494 It will then be zero for the element we care about. */
495 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
496 index = force_reg (<VEL_INT>mode, index);
497 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
498 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
500 /* Get a predicate that is true for only that element. */
501 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
502 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
503 rtx sel = gen_reg_rtx (<VPRED>mode);
504 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
506 /* Select the element using LASTB. */
507 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
513 ;; Extract element zero. This is a special case because we want to force
514 ;; the registers to be the same for the second alternative, and then
515 ;; split the instruction into nothing after RA.
516 (define_insn_and_split "*vec_extract<mode><Vel>_0"
517 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
519 (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
520 (parallel [(const_int 0)])))]
523 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
524 switch (which_alternative)
527 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
531 return "st1\\t{%1.<Vetype>}[0], %0";
537 && REG_P (operands[0])
538 && REGNO (operands[0]) == REGNO (operands[1])"
541 emit_note (NOTE_INSN_DELETED);
544 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
547 ;; Extract an element from the Advanced SIMD portion of the register.
548 ;; We don't just reuse the aarch64-simd.md pattern because we don't
549 ;; want any change in lane number on big-endian targets.
550 (define_insn "*vec_extract<mode><Vel>_v128"
551 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
553 (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
554 (parallel [(match_operand:SI 2 "const_int_operand")])))]
556 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
558 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
559 switch (which_alternative)
562 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
564 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
566 return "st1\\t{%1.<Vetype>}[%2], %0";
571 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
574 ;; Extract an element in the range of DUP. This pattern allows the
575 ;; source and destination to be different.
576 (define_insn "*vec_extract<mode><Vel>_dup"
577 [(set (match_operand:<VEL> 0 "register_operand" "=w")
579 (match_operand:SVE_ALL 1 "register_operand" "w")
580 (parallel [(match_operand:SI 2 "const_int_operand")])))]
582 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
584 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
585 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
589 ;; Extract an element outside the range of DUP. This pattern requires the
590 ;; source and destination to be the same.
591 (define_insn "*vec_extract<mode><Vel>_ext"
592 [(set (match_operand:<VEL> 0 "register_operand" "=w")
594 (match_operand:SVE_ALL 1 "register_operand" "0")
595 (parallel [(match_operand:SI 2 "const_int_operand")])))]
596 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
598 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
599 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
600 return "ext\t%0.b, %0.b, %0.b, #%2";
604 ;; Extract the last active element of operand 1 into operand 0.
605 ;; If no elements are active, extract the last inactive element instead.
606 (define_insn "extract_last_<mode>"
607 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
609 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
610 (match_operand:SVE_ALL 2 "register_operand" "w, w")]
614 lastb\t%<vwcore>0, %1, %2.<Vetype>
615 lastb\t%<Vetype>0, %1, %2.<Vetype>"
618 (define_expand "vec_duplicate<mode>"
620 [(set (match_operand:SVE_ALL 0 "register_operand")
621 (vec_duplicate:SVE_ALL
622 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
623 (clobber (scratch:<VPRED>))])]
626 if (MEM_P (operands[1]))
628 rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
629 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
630 CONST0_RTX (<MODE>mode)));
636 ;; Accept memory operands for the benefit of combine, and also in case
637 ;; the scalar input gets spilled to memory during RA. We want to split
638 ;; the load at the first opportunity in order to allow the PTRUE to be
639 ;; optimized with surrounding code.
640 (define_insn_and_split "*vec_duplicate<mode>_reg"
641 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
642 (vec_duplicate:SVE_ALL
643 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
644 (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
647 mov\t%0.<Vetype>, %<vwcore>1
648 mov\t%0.<Vetype>, %<Vetype>1
650 "&& MEM_P (operands[1])"
653 if (GET_CODE (operands[2]) == SCRATCH)
654 operands[2] = gen_reg_rtx (<VPRED>mode);
655 emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
656 emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
657 CONST0_RTX (<MODE>mode)));
660 [(set_attr "length" "4,4,8")]
663 ;; This is used for vec_duplicate<mode>s from memory, but can also
664 ;; be used by combine to optimize selects of a a vec_duplicate<mode>
666 (define_insn "sve_ld1r<mode>"
667 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
669 [(match_operand:<VPRED> 1 "register_operand" "Upl")
670 (vec_duplicate:SVE_ALL
671 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
672 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
675 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
678 ;; Load 128 bits from memory and duplicate to fill a vector. Since there
679 ;; are so few operations on 128-bit "elements", we don't define a VNx1TI
680 ;; and simply use vectors of bytes instead.
681 (define_insn "*sve_ld1rq<Vesize>"
682 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
684 [(match_operand:<VPRED> 1 "register_operand" "Upl")
685 (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
688 "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
691 ;; Implement a predicate broadcast by shifting the low bit of the scalar
692 ;; input into the top bit and using a WHILELO. An alternative would be to
693 ;; duplicate the input and do a compare with zero.
694 (define_expand "vec_duplicate<mode>"
695 [(set (match_operand:PRED_ALL 0 "register_operand")
696 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
699 rtx tmp = gen_reg_rtx (DImode);
700 rtx op1 = gen_lowpart (DImode, operands[1]);
701 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
702 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
707 (define_insn "vec_series<mode>"
708 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
710 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
711 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
714 index\t%0.<Vetype>, #%1, %<vw>2
715 index\t%0.<Vetype>, %<vw>1, #%2
716 index\t%0.<Vetype>, %<vw>1, %<vw>2"
719 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
720 ;; of an INDEX instruction.
721 (define_insn "*vec_series<mode>_plus"
722 [(set (match_operand:SVE_I 0 "register_operand" "=w")
725 (match_operand:<VEL> 1 "register_operand" "r"))
726 (match_operand:SVE_I 2 "immediate_operand")))]
727 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
729 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
730 return "index\t%0.<Vetype>, %<vw>1, #%2";
734 ;; Unpredicated LD[234].
735 (define_expand "vec_load_lanes<mode><vsingle>"
736 [(set (match_operand:SVE_STRUCT 0 "register_operand")
739 (match_operand:SVE_STRUCT 1 "memory_operand")]
743 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
747 ;; Predicated LD[234].
748 (define_insn "vec_mask_load_lanes<mode><vsingle>"
749 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
751 [(match_operand:<VPRED> 2 "register_operand" "Upl")
752 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
755 "ld<vector_count><Vesize>\t%0, %2/z, %1"
758 ;; Unpredicated ST[234]. This is always a full update, so the dependence
759 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
760 ;; There doesn't seem to be any obvious benefit to treating the all-true
761 ;; case differently though. In particular, it's very unlikely that we'll
762 ;; only find out during RTL that a store_lanes is dead.
763 (define_expand "vec_store_lanes<mode><vsingle>"
764 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
767 (match_operand:SVE_STRUCT 1 "register_operand")
772 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
776 ;; Predicated ST[234].
777 (define_insn "vec_mask_store_lanes<mode><vsingle>"
778 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
780 [(match_operand:<VPRED> 2 "register_operand" "Upl")
781 (match_operand:SVE_STRUCT 1 "register_operand" "w")
785 "st<vector_count><Vesize>\t%1, %2, %0"
788 (define_expand "vec_perm<mode>"
789 [(match_operand:SVE_ALL 0 "register_operand")
790 (match_operand:SVE_ALL 1 "register_operand")
791 (match_operand:SVE_ALL 2 "register_operand")
792 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
793 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
795 aarch64_expand_sve_vec_perm (operands[0], operands[1],
796 operands[2], operands[3]);
801 (define_insn "*aarch64_sve_tbl<mode>"
802 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
804 [(match_operand:SVE_ALL 1 "register_operand" "w")
805 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
808 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
811 (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
812 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
813 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
814 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
817 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
820 (define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
821 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
822 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
823 (match_operand:SVE_ALL 2 "register_operand" "w")]
826 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
829 (define_insn "*aarch64_sve_rev64<mode>"
830 [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
832 [(match_operand:VNx2BI 1 "register_operand" "Upl")
833 (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
835 UNSPEC_MERGE_PTRUE))]
837 "rev<Vesize>\t%0.d, %1/m, %2.d"
840 (define_insn "*aarch64_sve_rev32<mode>"
841 [(set (match_operand:SVE_BH 0 "register_operand" "=w")
843 [(match_operand:VNx4BI 1 "register_operand" "Upl")
844 (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
846 UNSPEC_MERGE_PTRUE))]
848 "rev<Vesize>\t%0.s, %1/m, %2.s"
851 (define_insn "*aarch64_sve_rev16vnx16qi"
852 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
854 [(match_operand:VNx8BI 1 "register_operand" "Upl")
855 (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
857 UNSPEC_MERGE_PTRUE))]
859 "revb\t%0.h, %1/m, %2.h"
862 (define_insn "*aarch64_sve_rev<mode>"
863 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
864 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
867 "rev\t%0.<Vetype>, %1.<Vetype>")
869 (define_insn "*aarch64_sve_dup_lane<mode>"
870 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
871 (vec_duplicate:SVE_ALL
873 (match_operand:SVE_ALL 1 "register_operand" "w")
874 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
876 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
877 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
880 ;; Note that the immediate (third) operand is the lane index not
882 (define_insn "*aarch64_sve_ext<mode>"
883 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
884 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
885 (match_operand:SVE_ALL 2 "register_operand" "w")
886 (match_operand:SI 3 "const_int_operand")]
889 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
891 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
892 return "ext\\t%0.b, %0.b, %2.b, #%3";
896 (define_insn "add<mode>3"
897 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
899 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
900 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
903 add\t%0.<Vetype>, %0.<Vetype>, #%D2
904 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
905 * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
906 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
909 (define_insn "sub<mode>3"
910 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
912 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
913 (match_operand:SVE_I 2 "register_operand" "w, 0")))]
916 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
917 subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
920 ;; Unpredicated multiplication.
921 (define_expand "mul<mode>3"
922 [(set (match_operand:SVE_I 0 "register_operand")
926 (match_operand:SVE_I 1 "register_operand")
927 (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
928 UNSPEC_MERGE_PTRUE))]
931 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
935 ;; Multiplication predicated with a PTRUE. We don't actually need the
936 ;; predicate for the first alternative, but using Upa or X isn't likely
937 ;; to gain much and would make the instruction seem less uniform to the
938 ;; register allocator.
939 (define_insn "*mul<mode>3"
940 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
942 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
944 (match_operand:SVE_I 2 "register_operand" "%0, 0")
945 (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w"))]
946 UNSPEC_MERGE_PTRUE))]
949 mul\t%0.<Vetype>, %0.<Vetype>, #%3
950 mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
953 (define_insn "*madd<mode>"
954 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
957 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
958 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
959 (match_operand:SVE_I 3 "register_operand" "w, w"))]
961 (match_operand:SVE_I 4 "register_operand" "w, 0")))]
964 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
965 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
968 (define_insn "*msub<mode>3"
969 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
971 (match_operand:SVE_I 4 "register_operand" "w, 0")
973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
974 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
975 (match_operand:SVE_I 3 "register_operand" "w, w"))]
976 UNSPEC_MERGE_PTRUE)))]
979 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
980 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
983 ;; Unpredicated highpart multiplication.
984 (define_expand "<su>mul<mode>3_highpart"
985 [(set (match_operand:SVE_I 0 "register_operand")
988 (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
989 (match_operand:SVE_I 2 "register_operand")]
991 UNSPEC_MERGE_PTRUE))]
994 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
998 ;; Predicated highpart multiplication.
999 (define_insn "*<su>mul<mode>3_highpart"
1000 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1002 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1003 (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0")
1004 (match_operand:SVE_I 3 "register_operand" "w")]
1006 UNSPEC_MERGE_PTRUE))]
1008 "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1011 ;; Unpredicated division.
1012 (define_expand "<optab><mode>3"
1013 [(set (match_operand:SVE_SDI 0 "register_operand")
1016 (SVE_INT_BINARY_SD:SVE_SDI
1017 (match_operand:SVE_SDI 1 "register_operand")
1018 (match_operand:SVE_SDI 2 "register_operand"))]
1019 UNSPEC_MERGE_PTRUE))]
1022 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1026 ;; Division predicated with a PTRUE.
1027 (define_insn "*<optab><mode>3"
1028 [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w")
1030 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1031 (SVE_INT_BINARY_SD:SVE_SDI
1032 (match_operand:SVE_SDI 2 "register_operand" "0, w")
1033 (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0"))]
1034 UNSPEC_MERGE_PTRUE))]
1037 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1038 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1041 ;; Unpredicated NEG, NOT and POPCOUNT.
1042 (define_expand "<optab><mode>2"
1043 [(set (match_operand:SVE_I 0 "register_operand")
1046 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
1047 UNSPEC_MERGE_PTRUE))]
1050 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1054 ;; NEG, NOT and POPCOUNT predicated with a PTRUE.
1055 (define_insn "*<optab><mode>2"
1056 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1058 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1059 (SVE_INT_UNARY:SVE_I
1060 (match_operand:SVE_I 2 "register_operand" "w"))]
1061 UNSPEC_MERGE_PTRUE))]
1063 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1066 ;; Vector AND, ORR and XOR.
1067 (define_insn "<optab><mode>3"
1068 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1070 (match_operand:SVE_I 1 "register_operand" "%0, w")
1071 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
1074 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
1075 <logical>\t%0.d, %1.d, %2.d"
1078 ;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs
1079 ;; by providing this, but we need to use UNSPECs since rtx logical ops
1080 ;; aren't defined for floating-point modes.
1081 (define_insn "*<optab><mode>3"
1082 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1083 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
1084 (match_operand:SVE_F 2 "register_operand" "w")]
1087 "<logicalf_op>\t%0.d, %1.d, %2.d"
1090 ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
1091 ;; this pattern even though the NOT instruction itself is predicated.
1092 (define_insn "bic<mode>3"
1093 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1095 (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
1096 (match_operand:SVE_I 2 "register_operand" "w")))]
1098 "bic\t%0.d, %2.d, %1.d"
1101 ;; Predicate AND. We can reuse one of the inputs as the GP.
1102 (define_insn "and<mode>3"
1103 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1104 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
1105 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
1107 "and\t%0.b, %1/z, %1.b, %2.b"
1110 ;; Unpredicated predicate ORR and XOR.
1111 (define_expand "<optab><mode>3"
1112 [(set (match_operand:PRED_ALL 0 "register_operand")
1114 (LOGICAL_OR:PRED_ALL
1115 (match_operand:PRED_ALL 1 "register_operand")
1116 (match_operand:PRED_ALL 2 "register_operand"))
1120 operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1124 ;; Predicated predicate ORR and XOR.
1125 (define_insn "pred_<optab><mode>3"
1126 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1129 (match_operand:PRED_ALL 2 "register_operand" "Upa")
1130 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1131 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1133 "<logical>\t%0.b, %1/z, %2.b, %3.b"
1136 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
1137 ;; the GP (which is known to be a PTRUE). Store the result in operand 0
1138 ;; and set the flags in the same way as for PTEST. The (and ...) in the
1139 ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
1140 ;; value is structurally equivalent to rhs of the second set.
1141 (define_insn "*<optab><mode>3_cc"
1142 [(set (reg:CC CC_REGNUM)
1144 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa")
1147 (match_operand:PRED_ALL 2 "register_operand" "Upa")
1148 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1152 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1153 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
1156 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
1159 ;; Unpredicated predicate inverse.
1160 (define_expand "one_cmpl<mode>2"
1161 [(set (match_operand:PRED_ALL 0 "register_operand")
1163 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
1167 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1171 ;; Predicated predicate inverse.
1172 (define_insn "*one_cmpl<mode>3"
1173 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1175 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1176 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1178 "not\t%0.b, %1/z, %2.b"
1181 ;; Predicated predicate BIC and ORN.
1182 (define_insn "*<nlogical><mode>3"
1183 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1186 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1187 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1188 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1190 "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
1193 ;; Predicated predicate NAND and NOR.
1194 (define_insn "*<logical_nn><mode>3"
1195 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1198 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1199 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
1200 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1202 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
1205 ;; Unpredicated LSL, LSR and ASR by a vector.
1206 (define_expand "v<optab><mode>3"
1207 [(set (match_operand:SVE_I 0 "register_operand")
1211 (match_operand:SVE_I 1 "register_operand")
1212 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
1213 UNSPEC_MERGE_PTRUE))]
1216 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1220 ;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't
1221 ;; actually need the predicate for the first alternative, but using Upa
1222 ;; or X isn't likely to gain much and would make the instruction seem
1223 ;; less uniform to the register allocator.
1224 (define_insn "*v<optab><mode>3"
1225 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1227 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1229 (match_operand:SVE_I 2 "register_operand" "w, 0")
1230 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w"))]
1231 UNSPEC_MERGE_PTRUE))]
1234 <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
1235 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1238 ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
1240 (define_expand "<ASHIFT:optab><mode>3"
1241 [(set (match_operand:SVE_I 0 "register_operand")
1242 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
1243 (match_operand:<VEL> 2 "general_operand")))]
1247 if (CONST_INT_P (operands[2]))
1249 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
1250 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
1251 amount = force_reg (<MODE>mode, amount);
1255 amount = gen_reg_rtx (<MODE>mode);
1256 emit_insn (gen_vec_duplicate<mode> (amount,
1257 convert_to_mode (<VEL>mode,
1260 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
1265 ;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
1267 ;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
1268 ;; is a PTRUE even if the optimizers haven't yet been able to propagate
1269 ;; the constant. We would use a separate unspec code for PTESTs involving
1270 ;; GPs that might not be PTRUEs.
1271 (define_insn "ptest_ptrue<mode>"
1272 [(set (reg:CC CC_REGNUM)
1274 (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa")
1275 (match_operand:PRED_ALL 1 "register_operand" "Upa")]
1282 ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
1283 ;; with the comparison being unsigned.
1284 (define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
1285 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1286 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
1287 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
1289 (clobber (reg:CC CC_REGNUM))]
1291 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
1294 ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
1295 ;; Handle the case in which both results are useful. The GP operand
1296 ;; to the PTEST isn't needed, so we allow it to be anything.
1297 (define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc"
1298 [(set (reg:CC CC_REGNUM)
1300 (unspec:SI [(match_operand:PRED_ALL 1)
1302 [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
1303 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
1307 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1308 (unspec:PRED_ALL [(match_dup 2)
1312 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
1313 ;; Force the compiler to drop the unused predicate operand, so that we
1314 ;; don't have an unnecessary PTRUE.
1315 "&& !CONSTANT_P (operands[1])"
1318 emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc
1319 (operands[0], CONSTM1_RTX (<MODE>mode),
1320 operands[2], operands[3]));
1325 ;; Integer comparisons predicated with a PTRUE.
1326 (define_insn "*cmp<cmp_op><mode>"
1327 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1329 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1330 (SVE_INT_CMP:<VPRED>
1331 (match_operand:SVE_I 2 "register_operand" "w, w")
1332 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1333 UNSPEC_MERGE_PTRUE))
1334 (clobber (reg:CC CC_REGNUM))]
1337 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1338 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1341 ;; Integer comparisons predicated with a PTRUE in which only the flags result
1343 (define_insn "*cmp<cmp_op><mode>_ptest"
1344 [(set (reg:CC CC_REGNUM)
1347 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1350 (SVE_INT_CMP:<VPRED>
1351 (match_operand:SVE_I 2 "register_operand" "w, w")
1352 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1353 UNSPEC_MERGE_PTRUE)]
1356 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
1359 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1360 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1363 ;; Integer comparisons predicated with a PTRUE in which both the flag and
1364 ;; predicate results are interesting.
1365 (define_insn "*cmp<cmp_op><mode>_cc"
1366 [(set (reg:CC CC_REGNUM)
1369 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1372 (SVE_INT_CMP:<VPRED>
1373 (match_operand:SVE_I 2 "register_operand" "w, w")
1374 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1375 UNSPEC_MERGE_PTRUE)]
1378 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1381 (SVE_INT_CMP:<VPRED>
1384 UNSPEC_MERGE_PTRUE))]
1387 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1388 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1391 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
1392 ;; comparison with an AND. Split the instruction into its preferred form
1393 ;; (below) at the earliest opportunity, in order to get rid of the
1394 ;; redundant operand 1.
1395 (define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
1396 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1399 [(match_operand:<VPRED> 1)
1400 (SVE_INT_CMP:<VPRED>
1401 (match_operand:SVE_I 2 "register_operand" "w, w")
1402 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1404 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
1405 (clobber (reg:CC CC_REGNUM))]
1412 (SVE_INT_CMP:<VPRED>
1416 (clobber (reg:CC CC_REGNUM))])]
1419 ;; Predicated integer comparisons.
1420 (define_insn "*pred_cmp<cmp_op><mode>"
1421 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1423 (SVE_INT_CMP:<VPRED>
1424 (match_operand:SVE_I 2 "register_operand" "w, w")
1425 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
1426 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
1427 (clobber (reg:CC CC_REGNUM))]
1430 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1431 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1434 ;; Floating-point comparisons predicated with a PTRUE.
1435 (define_insn "*fcm<cmp_op><mode>"
1436 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1438 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1440 (match_operand:SVE_F 2 "register_operand" "w, w")
1441 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
1442 UNSPEC_MERGE_PTRUE))]
1445 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1446 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1449 (define_insn "*fcmuo<mode>"
1450 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1452 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1454 (match_operand:SVE_F 2 "register_operand" "w")
1455 (match_operand:SVE_F 3 "register_operand" "w"))]
1456 UNSPEC_MERGE_PTRUE))]
1458 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1461 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
1462 ;; with another predicate P. This does not have the same trapping behavior
1463 ;; as predicating the comparison itself on P, but it's a legitimate fold,
1464 ;; since we can drop any potentially-trapping operations whose results
1467 ;; Split the instruction into its preferred form (below) at the earliest
1468 ;; opportunity, in order to get rid of the redundant operand 1.
1469 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
1470 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1473 [(match_operand:<VPRED> 1)
1475 (match_operand:SVE_F 2 "register_operand" "w, w")
1476 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
1478 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
1490 (define_insn_and_split "*fcmuo<mode>_and_combine"
1491 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1494 [(match_operand:<VPRED> 1)
1496 (match_operand:SVE_F 2 "register_operand" "w")
1497 (match_operand:SVE_F 3 "register_operand" "w"))]
1499 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
1511 ;; Unpredicated floating-point comparisons, with the results ANDed
1512 ;; with another predicate. This is a valid fold for the same reasons
1514 (define_insn "*fcm<cmp_op><mode>_and"
1515 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1518 (match_operand:SVE_F 2 "register_operand" "w, w")
1519 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
1520 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
1523 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1524 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1527 (define_insn "*fcmuo<mode>_and"
1528 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1531 (match_operand:SVE_F 2 "register_operand" "w")
1532 (match_operand:SVE_F 3 "register_operand" "w"))
1533 (match_operand:<VPRED> 1 "register_operand" "Upl")))]
1535 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1538 ;; Predicated floating-point comparisons. We don't need a version
1539 ;; of this for unordered comparisons.
1540 (define_insn "*pred_fcm<cmp_op><mode>"
1541 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1543 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1544 (match_operand:SVE_F 2 "register_operand" "w, w")
1545 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
1549 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1550 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1553 ;; vcond_mask operand order: true, false, mask
1554 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
1555 ;; SEL operand order: mask, true, false
1556 (define_insn "vcond_mask_<mode><vpred>"
1557 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1559 [(match_operand:<VPRED> 3 "register_operand" "Upa")
1560 (match_operand:SVE_ALL 1 "register_operand" "w")
1561 (match_operand:SVE_ALL 2 "register_operand" "w")]
1564 "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
1567 ;; Selects between a duplicated immediate and zero.
1568 (define_insn "aarch64_sve_dup<mode>_const"
1569 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1571 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1572 (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
1573 (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
1576 "mov\t%0.<Vetype>, %1/z, #%2"
1579 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it
1580 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1581 (define_expand "vcond<mode><v_int_equiv>"
1582 [(set (match_operand:SVE_ALL 0 "register_operand")
1583 (if_then_else:SVE_ALL
1584 (match_operator 3 "comparison_operator"
1585 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1586 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1587 (match_operand:SVE_ALL 1 "register_operand")
1588 (match_operand:SVE_ALL 2 "register_operand")))]
1591 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1596 ;; Integer vcondu. Don't enforce an immediate range here, since it
1597 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1598 (define_expand "vcondu<mode><v_int_equiv>"
1599 [(set (match_operand:SVE_ALL 0 "register_operand")
1600 (if_then_else:SVE_ALL
1601 (match_operator 3 "comparison_operator"
1602 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1603 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1604 (match_operand:SVE_ALL 1 "register_operand")
1605 (match_operand:SVE_ALL 2 "register_operand")))]
1608 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1613 ;; Floating-point vcond. All comparisons except FCMUO allow a zero
1614 ;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
1616 (define_expand "vcond<mode><v_fp_equiv>"
1617 [(set (match_operand:SVE_SD 0 "register_operand")
1618 (if_then_else:SVE_SD
1619 (match_operator 3 "comparison_operator"
1620 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
1621 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
1622 (match_operand:SVE_SD 1 "register_operand")
1623 (match_operand:SVE_SD 2 "register_operand")))]
1626 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
1631 ;; Signed integer comparisons. Don't enforce an immediate range here, since
1632 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1634 (define_expand "vec_cmp<mode><vpred>"
1636 [(set (match_operand:<VPRED> 0 "register_operand")
1637 (match_operator:<VPRED> 1 "comparison_operator"
1638 [(match_operand:SVE_I 2 "register_operand")
1639 (match_operand:SVE_I 3 "nonmemory_operand")]))
1640 (clobber (reg:CC CC_REGNUM))])]
1643 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1644 operands[2], operands[3]);
1649 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
1650 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1652 (define_expand "vec_cmpu<mode><vpred>"
1654 [(set (match_operand:<VPRED> 0 "register_operand")
1655 (match_operator:<VPRED> 1 "comparison_operator"
1656 [(match_operand:SVE_I 2 "register_operand")
1657 (match_operand:SVE_I 3 "nonmemory_operand")]))
1658 (clobber (reg:CC CC_REGNUM))])]
1661 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1662 operands[2], operands[3]);
1667 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
1668 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
1670 (define_expand "vec_cmp<mode><vpred>"
1671 [(set (match_operand:<VPRED> 0 "register_operand")
1672 (match_operator:<VPRED> 1 "comparison_operator"
1673 [(match_operand:SVE_F 2 "register_operand")
1674 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
1677 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
1678 operands[2], operands[3], false);
1683 ;; Branch based on predicate equality or inequality.
1684 (define_expand "cbranch<mode>4"
1687 (match_operator 0 "aarch64_equality_operator"
1688 [(match_operand:PRED_ALL 1 "register_operand")
1689 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
1690 (label_ref (match_operand 3 ""))
1694 rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1696 if (operands[2] == CONST0_RTX (<MODE>mode))
1700 pred = gen_reg_rtx (<MODE>mode);
1701 emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
1704 emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
1705 operands[1] = gen_rtx_REG (CCmode, CC_REGNUM);
1706 operands[2] = const0_rtx;
1710 ;; Unpredicated integer MIN/MAX.
1711 (define_expand "<su><maxmin><mode>3"
1712 [(set (match_operand:SVE_I 0 "register_operand")
1715 (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
1716 (match_operand:SVE_I 2 "register_operand"))]
1717 UNSPEC_MERGE_PTRUE))]
1720 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1724 ;; Integer MIN/MAX predicated with a PTRUE.
1725 (define_insn "*<su><maxmin><mode>3"
1726 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1728 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1729 (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0")
1730 (match_operand:SVE_I 3 "register_operand" "w"))]
1731 UNSPEC_MERGE_PTRUE))]
1733 "<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1736 ;; Unpredicated floating-point MIN/MAX.
1737 (define_expand "<su><maxmin><mode>3"
1738 [(set (match_operand:SVE_F 0 "register_operand")
1741 (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
1742 (match_operand:SVE_F 2 "register_operand"))]
1743 UNSPEC_MERGE_PTRUE))]
1746 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1750 ;; Floating-point MIN/MAX predicated with a PTRUE.
1751 (define_insn "*<su><maxmin><mode>3"
1752 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1754 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1755 (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0")
1756 (match_operand:SVE_F 3 "register_operand" "w"))]
1757 UNSPEC_MERGE_PTRUE))]
1759 "f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1762 ;; Unpredicated fmin/fmax.
1763 (define_expand "<maxmin_uns><mode>3"
1764 [(set (match_operand:SVE_F 0 "register_operand")
1767 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
1768 (match_operand:SVE_F 2 "register_operand")]
1770 UNSPEC_MERGE_PTRUE))]
1773 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1777 ;; fmin/fmax predicated with a PTRUE.
1778 (define_insn "*<maxmin_uns><mode>3"
1779 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1781 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1782 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0")
1783 (match_operand:SVE_F 3 "register_operand" "w")]
1785 UNSPEC_MERGE_PTRUE))]
1787 "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1790 ;; Predicated integer operations with select.
1791 (define_expand "cond_<optab><mode>"
1792 [(set (match_operand:SVE_I 0 "register_operand")
1794 [(match_operand:<VPRED> 1 "register_operand")
1795 (SVE_INT_BINARY:SVE_I
1796 (match_operand:SVE_I 2 "register_operand")
1797 (match_operand:SVE_I 3 "register_operand"))
1798 (match_operand:SVE_I 4 "register_operand")]
1802 bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
1803 aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
1806 (define_expand "cond_<optab><mode>"
1807 [(set (match_operand:SVE_SDI 0 "register_operand")
1809 [(match_operand:<VPRED> 1 "register_operand")
1810 (SVE_INT_BINARY_SD:SVE_SDI
1811 (match_operand:SVE_SDI 2 "register_operand")
1812 (match_operand:SVE_SDI 3 "register_operand"))
1813 (match_operand:SVE_SDI 4 "register_operand")]
1817 bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
1818 aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
1821 ;; Predicated integer operations.
1822 (define_insn "*cond_<optab><mode>"
1823 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1825 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1826 (SVE_INT_BINARY:SVE_I
1827 (match_operand:SVE_I 2 "register_operand" "0")
1828 (match_operand:SVE_I 3 "register_operand" "w"))
1832 "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1835 (define_insn "*cond_<optab><mode>"
1836 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
1838 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1839 (SVE_INT_BINARY_SD:SVE_SDI
1840 (match_operand:SVE_SDI 2 "register_operand" "0")
1841 (match_operand:SVE_SDI 3 "register_operand" "w"))
1845 "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1848 ;; Predicated integer operations with the operands reversed.
1849 (define_insn "*cond_<optab><mode>"
1850 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1852 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1853 (SVE_INT_BINARY_REV:SVE_I
1854 (match_operand:SVE_I 2 "register_operand" "w")
1855 (match_operand:SVE_I 3 "register_operand" "0"))
1859 "<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1862 (define_insn "*cond_<optab><mode>"
1863 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
1865 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1866 (SVE_INT_BINARY_SD:SVE_SDI
1867 (match_operand:SVE_SDI 2 "register_operand" "w")
1868 (match_operand:SVE_SDI 3 "register_operand" "0"))
1872 "<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1875 ;; Set operand 0 to the last active element in operand 3, or to tied
1876 ;; operand 1 if no elements are active.
1877 (define_insn "fold_extract_last_<mode>"
1878 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
1880 [(match_operand:<VEL> 1 "register_operand" "0, 0")
1881 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
1882 (match_operand:SVE_ALL 3 "register_operand" "w, w")]
1886 clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
1887 clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
1890 ;; Unpredicated integer add reduction.
1891 (define_expand "reduc_plus_scal_<mode>"
1892 [(set (match_operand:<VEL> 0 "register_operand")
1893 (unspec:<VEL> [(match_dup 2)
1894 (match_operand:SVE_I 1 "register_operand")]
1898 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1902 ;; Predicated integer add reduction. The result is always 64-bits.
1903 (define_insn "*reduc_plus_scal_<mode>"
1904 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1905 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1906 (match_operand:SVE_I 2 "register_operand" "w")]
1909 "uaddv\t%d0, %1, %2.<Vetype>"
1912 ;; Unpredicated floating-point add reduction.
1913 (define_expand "reduc_plus_scal_<mode>"
1914 [(set (match_operand:<VEL> 0 "register_operand")
1915 (unspec:<VEL> [(match_dup 2)
1916 (match_operand:SVE_F 1 "register_operand")]
1920 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1924 ;; Predicated floating-point add reduction.
1925 (define_insn "*reduc_plus_scal_<mode>"
1926 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1927 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1928 (match_operand:SVE_F 2 "register_operand" "w")]
1931 "faddv\t%<Vetype>0, %1, %2.<Vetype>"
1934 ;; Unpredicated integer MIN/MAX reduction.
1935 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1936 [(set (match_operand:<VEL> 0 "register_operand")
1937 (unspec:<VEL> [(match_dup 2)
1938 (match_operand:SVE_I 1 "register_operand")]
1942 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1946 ;; Predicated integer MIN/MAX reduction.
1947 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
1948 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1949 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1950 (match_operand:SVE_I 2 "register_operand" "w")]
1953 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
1956 ;; Unpredicated floating-point MIN/MAX reduction.
1957 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1958 [(set (match_operand:<VEL> 0 "register_operand")
1959 (unspec:<VEL> [(match_dup 2)
1960 (match_operand:SVE_F 1 "register_operand")]
1964 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1968 ;; Predicated floating-point MIN/MAX reduction.
1969 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
1970 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1971 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1972 (match_operand:SVE_F 2 "register_operand" "w")]
1975 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
1978 (define_expand "reduc_<optab>_scal_<mode>"
1979 [(set (match_operand:<VEL> 0 "register_operand")
1980 (unspec:<VEL> [(match_dup 2)
1981 (match_operand:SVE_I 1 "register_operand")]
1985 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1989 (define_insn "*reduc_<optab>_scal_<mode>"
1990 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1991 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1992 (match_operand:SVE_I 2 "register_operand" "w")]
1995 "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
1998 ;; Unpredicated in-order FP reductions.
1999 (define_expand "fold_left_plus_<mode>"
2000 [(set (match_operand:<VEL> 0 "register_operand")
2001 (unspec:<VEL> [(match_dup 3)
2002 (match_operand:<VEL> 1 "register_operand")
2003 (match_operand:SVE_F 2 "register_operand")]
2007 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2011 ;; In-order FP reductions predicated with PTRUE.
2012 (define_insn "*fold_left_plus_<mode>"
2013 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2014 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2015 (match_operand:<VEL> 2 "register_operand" "0")
2016 (match_operand:SVE_F 3 "register_operand" "w")]
2019 "fadda\t%<Vetype>0, %1, %<Vetype>0, %3.<Vetype>"
2022 ;; Predicated form of the above in-order reduction.
2023 (define_insn "*pred_fold_left_plus_<mode>"
2024 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2026 [(match_operand:<VEL> 1 "register_operand" "0")
2028 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2029 (match_operand:SVE_F 3 "register_operand" "w")
2030 (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
2034 "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
2037 ;; Unpredicated floating-point addition.
2038 (define_expand "add<mode>3"
2039 [(set (match_operand:SVE_F 0 "register_operand")
2043 (match_operand:SVE_F 1 "register_operand")
2044 (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
2045 UNSPEC_MERGE_PTRUE))]
2048 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2052 ;; Floating-point addition predicated with a PTRUE.
2053 (define_insn "*add<mode>3"
2054 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
2056 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2058 (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
2059 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
2060 UNSPEC_MERGE_PTRUE))]
2063 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2064 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2065 fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
2068 ;; Unpredicated floating-point subtraction.
2069 (define_expand "sub<mode>3"
2070 [(set (match_operand:SVE_F 0 "register_operand")
2074 (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
2075 (match_operand:SVE_F 2 "register_operand"))]
2076 UNSPEC_MERGE_PTRUE))]
2079 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2083 ;; Floating-point subtraction predicated with a PTRUE.
2084 (define_insn "*sub<mode>3"
2085 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
2087 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
2089 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
2090 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
2091 UNSPEC_MERGE_PTRUE))]
2093 && (register_operand (operands[2], <MODE>mode)
2094 || register_operand (operands[3], <MODE>mode))"
2096 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2097 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2098 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
2099 fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
2102 ;; Unpredicated floating-point multiplication.
2103 (define_expand "mul<mode>3"
2104 [(set (match_operand:SVE_F 0 "register_operand")
2108 (match_operand:SVE_F 1 "register_operand")
2109 (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
2110 UNSPEC_MERGE_PTRUE))]
2113 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2117 ;; Floating-point multiplication predicated with a PTRUE.
2118 (define_insn "*mul<mode>3"
2119 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2121 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2123 (match_operand:SVE_F 2 "register_operand" "%0, w")
2124 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
2125 UNSPEC_MERGE_PTRUE))]
2128 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2129 fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
2132 ;; Unpredicated fma (%0 = (%1 * %2) + %3).
2133 (define_expand "fma<mode>4"
2134 [(set (match_operand:SVE_F 0 "register_operand")
2137 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
2138 (match_operand:SVE_F 2 "register_operand")
2139 (match_operand:SVE_F 3 "register_operand"))]
2140 UNSPEC_MERGE_PTRUE))]
2143 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2147 ;; fma predicated with a PTRUE.
2148 (define_insn "*fma<mode>4"
2149 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2151 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2152 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
2153 (match_operand:SVE_F 4 "register_operand" "w, w")
2154 (match_operand:SVE_F 2 "register_operand" "w, 0"))]
2155 UNSPEC_MERGE_PTRUE))]
2158 fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2159 fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2162 ;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
2163 (define_expand "fnma<mode>4"
2164 [(set (match_operand:SVE_F 0 "register_operand")
2167 (fma:SVE_F (neg:SVE_F
2168 (match_operand:SVE_F 1 "register_operand"))
2169 (match_operand:SVE_F 2 "register_operand")
2170 (match_operand:SVE_F 3 "register_operand"))]
2171 UNSPEC_MERGE_PTRUE))]
2174 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2178 ;; fnma predicated with a PTRUE.
2179 (define_insn "*fnma<mode>4"
2180 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2182 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2183 (fma:SVE_F (neg:SVE_F
2184 (match_operand:SVE_F 3 "register_operand" "%0, w"))
2185 (match_operand:SVE_F 4 "register_operand" "w, w")
2186 (match_operand:SVE_F 2 "register_operand" "w, 0"))]
2187 UNSPEC_MERGE_PTRUE))]
2190 fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2191 fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2194 ;; Unpredicated fms (%0 = (%1 * %2) - %3).
2195 (define_expand "fms<mode>4"
2196 [(set (match_operand:SVE_F 0 "register_operand")
2199 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
2200 (match_operand:SVE_F 2 "register_operand")
2202 (match_operand:SVE_F 3 "register_operand")))]
2203 UNSPEC_MERGE_PTRUE))]
2206 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2210 ;; fms predicated with a PTRUE.
2211 (define_insn "*fms<mode>4"
2212 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2214 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2215 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
2216 (match_operand:SVE_F 4 "register_operand" "w, w")
2218 (match_operand:SVE_F 2 "register_operand" "w, 0")))]
2219 UNSPEC_MERGE_PTRUE))]
2222 fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2223 fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2226 ;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
2227 (define_expand "fnms<mode>4"
2228 [(set (match_operand:SVE_F 0 "register_operand")
2231 (fma:SVE_F (neg:SVE_F
2232 (match_operand:SVE_F 1 "register_operand"))
2233 (match_operand:SVE_F 2 "register_operand")
2235 (match_operand:SVE_F 3 "register_operand")))]
2236 UNSPEC_MERGE_PTRUE))]
2239 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2243 ;; fnms predicated with a PTRUE.
2244 (define_insn "*fnms<mode>4"
2245 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2247 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2248 (fma:SVE_F (neg:SVE_F
2249 (match_operand:SVE_F 3 "register_operand" "%0, w"))
2250 (match_operand:SVE_F 4 "register_operand" "w, w")
2252 (match_operand:SVE_F 2 "register_operand" "w, 0")))]
2253 UNSPEC_MERGE_PTRUE))]
2256 fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2257 fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2260 ;; Unpredicated floating-point division.
2261 (define_expand "div<mode>3"
2262 [(set (match_operand:SVE_F 0 "register_operand")
2265 (div:SVE_F (match_operand:SVE_F 1 "register_operand")
2266 (match_operand:SVE_F 2 "register_operand"))]
2267 UNSPEC_MERGE_PTRUE))]
2270 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2274 ;; Floating-point division predicated with a PTRUE.
2275 (define_insn "*div<mode>3"
2276 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2278 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2279 (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w")
2280 (match_operand:SVE_F 3 "register_operand" "w, 0"))]
2281 UNSPEC_MERGE_PTRUE))]
2284 fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2285 fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
2288 ;; Unpredicated FNEG, FABS and FSQRT.
2289 (define_expand "<optab><mode>2"
2290 [(set (match_operand:SVE_F 0 "register_operand")
2293 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
2294 UNSPEC_MERGE_PTRUE))]
2297 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2301 ;; FNEG, FABS and FSQRT predicated with a PTRUE.
2302 (define_insn "*<optab><mode>2"
2303 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2305 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2306 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
2307 UNSPEC_MERGE_PTRUE))]
2309 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2312 ;; Unpredicated FRINTy.
2313 (define_expand "<frint_pattern><mode>2"
2314 [(set (match_operand:SVE_F 0 "register_operand")
2317 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
2319 UNSPEC_MERGE_PTRUE))]
2322 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2326 ;; FRINTy predicated with a PTRUE.
2327 (define_insn "*<frint_pattern><mode>2"
2328 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2330 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2331 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
2333 UNSPEC_MERGE_PTRUE))]
2335 "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2338 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
2339 ;; SF to SI or DF to DI).
2340 (define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
2341 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2342 (unspec:<V_INT_EQUIV>
2344 (FIXUORS:<V_INT_EQUIV>
2345 (match_operand:SVE_F 1 "register_operand"))]
2346 UNSPEC_MERGE_PTRUE))]
2349 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2353 ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
2354 (define_insn "*<fix_trunc_optab>v16hsf<mode>2"
2355 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2357 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2359 (match_operand:VNx8HF 2 "register_operand" "w"))]
2360 UNSPEC_MERGE_PTRUE))]
2362 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
2365 ;; Conversion of SF to DI or SI, predicated with a PTRUE.
2366 (define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
2367 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2369 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2371 (match_operand:VNx4SF 2 "register_operand" "w"))]
2372 UNSPEC_MERGE_PTRUE))]
2374 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
2377 ;; Conversion of DF to DI or SI, predicated with a PTRUE.
2378 (define_insn "*<fix_trunc_optab>vnx2df<mode>2"
2379 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2381 [(match_operand:VNx2BI 1 "register_operand" "Upl")
2383 (match_operand:VNx2DF 2 "register_operand" "w"))]
2384 UNSPEC_MERGE_PTRUE))]
2386 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
2389 ;; Unpredicated conversion of integers to floats of the same size
2390 ;; (HI to HF, SI to SF or DI to DF).
2391 (define_expand "<optab><v_int_equiv><mode>2"
2392 [(set (match_operand:SVE_F 0 "register_operand")
2396 (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
2397 UNSPEC_MERGE_PTRUE))]
2400 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
2404 ;; Conversion of DI, SI or HI to the same number of HFs, predicated
2406 (define_insn "*<optab><mode>vnx8hf2"
2407 [(set (match_operand:VNx8HF 0 "register_operand" "=w")
2409 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2411 (match_operand:SVE_HSDI 2 "register_operand" "w"))]
2412 UNSPEC_MERGE_PTRUE))]
2414 "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
2417 ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
2418 (define_insn "*<optab><mode>vnx4sf2"
2419 [(set (match_operand:VNx4SF 0 "register_operand" "=w")
2421 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2423 (match_operand:SVE_SDI 2 "register_operand" "w"))]
2424 UNSPEC_MERGE_PTRUE))]
2426 "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
2429 ;; Conversion of DI or SI to DF, predicated with a PTRUE.
2430 (define_insn "aarch64_sve_<optab><mode>vnx2df2"
2431 [(set (match_operand:VNx2DF 0 "register_operand" "=w")
2433 [(match_operand:VNx2BI 1 "register_operand" "Upl")
2435 (match_operand:SVE_SDI 2 "register_operand" "w"))]
2436 UNSPEC_MERGE_PTRUE))]
2438 "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
2441 ;; Conversion of DFs to the same number of SFs, or SFs to the same number
2443 (define_insn "*trunc<Vwide><mode>2"
2444 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
2446 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2448 [(match_operand:<VWIDE> 2 "register_operand" "w")]
2449 UNSPEC_FLOAT_CONVERT)]
2450 UNSPEC_MERGE_PTRUE))]
2452 "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
2455 ;; Conversion of SFs to the same number of DFs, or HFs to the same number
2457 (define_insn "aarch64_sve_extend<mode><Vwide>2"
2458 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2460 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2462 [(match_operand:SVE_HSF 2 "register_operand" "w")]
2463 UNSPEC_FLOAT_CONVERT)]
2464 UNSPEC_MERGE_PTRUE))]
2466 "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
2469 ;; Unpack the low or high half of a predicate, where "high" refers to
2470 ;; the low-numbered lanes for big-endian and the high-numbered lanes
2471 ;; for little-endian.
2472 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
2473 [(match_operand:<VWIDE> 0 "register_operand")
2474 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
2478 emit_insn ((<hi_lanes_optab>
2479 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
2480 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
2481 (operands[0], operands[1]));
2486 ;; PUNPKHI and PUNPKLO.
2487 (define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
2488 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
2489 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
2492 "punpk<perm_hilo>\t%0.h, %1.b"
2495 ;; Unpack the low or high half of a vector, where "high" refers to
2496 ;; the low-numbered lanes for big-endian and the high-numbered lanes
2497 ;; for little-endian.
2498 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
2499 [(match_operand:<VWIDE> 0 "register_operand")
2500 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
2503 emit_insn ((<hi_lanes_optab>
2504 ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
2505 : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
2506 (operands[0], operands[1]));
2511 ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
2512 (define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
2513 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2514 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
2517 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
2520 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
2521 ;; First unpack the source without conversion, then float-convert the
2523 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
2524 [(match_operand:<VWIDE> 0 "register_operand")
2525 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
2529 /* Use ZIP to do the unpack, since we don't care about the upper halves
2530 and since it has the nice property of not needing any subregs.
2531 If using UUNPK* turns out to be preferable, we could model it as
2532 a ZIP whose first operand is zero. */
2533 rtx temp = gen_reg_rtx (<MODE>mode);
2534 emit_insn ((<hi_lanes_optab>
2535 ? gen_aarch64_sve_zip2<mode>
2536 : gen_aarch64_sve_zip1<mode>)
2537 (temp, operands[1], operands[1]));
2538 rtx ptrue = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
2539 emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
2545 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
2546 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
2547 ;; unpacked VNx4SI to VNx2DF.
2548 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
2549 [(match_operand:VNx2DF 0 "register_operand")
2551 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
2555 /* Use ZIP to do the unpack, since we don't care about the upper halves
2556 and since it has the nice property of not needing any subregs.
2557 If using UUNPK* turns out to be preferable, we could model it as
2558 a ZIP whose first operand is zero. */
2559 rtx temp = gen_reg_rtx (VNx4SImode);
2560 emit_insn ((<hi_lanes_optab>
2561 ? gen_aarch64_sve_zip2vnx4si
2562 : gen_aarch64_sve_zip1vnx4si)
2563 (temp, operands[1], operands[1]));
2564 rtx ptrue = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
2565 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
2571 ;; Predicate pack. Use UZP1 on the narrower type, which discards
2572 ;; the high part of each wide element.
2573 (define_insn "vec_pack_trunc_<Vwide>"
2574 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
2576 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
2577 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
2580 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2583 ;; Integer pack. Use UZP1 on the narrower type, which discards
2584 ;; the high part of each wide element.
2585 (define_insn "vec_pack_trunc_<Vwide>"
2586 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
2588 [(match_operand:<VWIDE> 1 "register_operand" "w")
2589 (match_operand:<VWIDE> 2 "register_operand" "w")]
2592 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2595 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
2596 ;; the results into a single vector.
2597 (define_expand "vec_pack_trunc_<Vwide>"
2601 (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
2602 UNSPEC_FLOAT_CONVERT)]
2603 UNSPEC_MERGE_PTRUE))
2607 (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
2608 UNSPEC_FLOAT_CONVERT)]
2609 UNSPEC_MERGE_PTRUE))
2610 (set (match_operand:SVE_HSF 0 "register_operand")
2611 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2614 operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
2615 operands[4] = gen_reg_rtx (<MODE>mode);
2616 operands[5] = gen_reg_rtx (<MODE>mode);
2620 ;; Convert two vectors of DF to SI and pack the results into a single vector.
2621 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
2625 (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
2626 UNSPEC_MERGE_PTRUE))
2630 (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
2631 UNSPEC_MERGE_PTRUE))
2632 (set (match_operand:VNx4SI 0 "register_operand")
2633 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2636 operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
2637 operands[4] = gen_reg_rtx (VNx4SImode);
2638 operands[5] = gen_reg_rtx (VNx4SImode);
2642 ;; Predicated floating-point operations with select.
2643 (define_expand "cond_<optab><mode>"
2644 [(set (match_operand:SVE_F 0 "register_operand")
2646 [(match_operand:<VPRED> 1 "register_operand")
2649 (match_operand:SVE_F 2 "register_operand")
2650 (match_operand:SVE_F 3 "register_operand")]
2652 (match_operand:SVE_F 4 "register_operand")]
2656 aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
2659 ;; Predicated floating-point operations.
2660 (define_insn "*cond_<optab><mode>"
2661 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2663 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2666 (match_operand:SVE_F 2 "register_operand" "0")
2667 (match_operand:SVE_F 3 "register_operand" "w")]
2672 "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2675 ;; Predicated floating-point operations with the operands reversed.
2676 (define_insn "*cond_<optab><mode>"
2677 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2679 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2682 (match_operand:SVE_F 2 "register_operand" "w")
2683 (match_operand:SVE_F 3 "register_operand" "0")]
2688 "<sve_fp_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
2691 ;; Shift an SVE vector left and insert a scalar into element 0.
2692 (define_insn "vec_shl_insert_<mode>"
2693 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
2695 [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
2696 (match_operand:<VEL> 2 "register_operand" "rZ, w")]
2700 insr\t%0.<Vetype>, %<vwcore>2
2701 insr\t%0.<Vetype>, %<Vetype>2"