1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2016 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; Note on the handling of big-endian SVE
22 ;; --------------------------------------
24 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
25 ;; same way as movdi or movti would: the first byte of memory goes
26 ;; into the most significant byte of the register and the last byte
27 ;; of memory goes into the least significant byte of the register.
28 ;; This is the most natural ordering for Advanced SIMD and matches
29 ;; the ABI layout for 64-bit and 128-bit vector types.
31 ;; As a result, the order of bytes within the register is what GCC
32 ;; expects for a big-endian target, and subreg offsets therefore work
33 ;; as expected, with the first element in memory having subreg offset 0
34 ;; and the last element in memory having the subreg offset associated
35 ;; with a big-endian lowpart. However, this ordering also means that
36 ;; GCC's lane numbering does not match the architecture's numbering:
37 ;; GCC always treats the element at the lowest address in memory
38 ;; (subreg offset 0) as element 0, while the architecture treats
39 ;; the least significant end of the register as element 0.
41 ;; The situation for SVE is different. We want the layout of the
42 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
43 ;; logically, a mov<mode> load must be indistinguishable from a
44 ;; maskload<mode> whose mask is all true. We therefore need the
45 ;; register layout to match LD1 rather than LDR. The ABI layout of
46 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
48 ;; As a result, the architecture lane numbering matches GCC's lane
49 ;; numbering, with element 0 always being the first in memory.
52 ;; - Applying a subreg offset to a register does not give the element
53 ;; that GCC expects: the first element in memory has the subreg offset
54 ;; associated with a big-endian lowpart while the last element in memory
55 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
57 ;; - We cannot use LDR and STR for spill slots that might be accessed
58 ;; via subregs, since although the elements have the order GCC expects,
59 ;; the order of the bytes within the elements is different. We instead
60 ;; access spill slots via LD1 and ST1, using secondary reloads to
61 ;; reserve a predicate register.
65 (define_expand "mov<mode>"
66 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
67 (match_operand:SVE_ALL 1 "general_operand"))]
70 /* Use the predicated load and store patterns where possible.
71 This is required for big-endian targets (see the comment at the
72 head of the file) and increases the addressing choices for
74 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
75 && can_create_pseudo_p ())
77 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
81 if (CONSTANT_P (operands[1]))
83 aarch64_expand_mov_immediate (operands[0], operands[1],
84 gen_vec_duplicate<mode>);
90 ;; Unpredicated moves (little-endian). Only allow memory operations
91 ;; during and after RA; before RA we want the predicated load and
92 ;; store patterns to be used instead.
93 (define_insn "*aarch64_sve_mov<mode>_le"
94 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
95 (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
98 && ((lra_in_progress || reload_completed)
99 || (register_operand (operands[0], <MODE>mode)
100 && nonmemory_operand (operands[1], <MODE>mode)))"
105 * return aarch64_output_sve_mov_immediate (operands[1]);"
108 ;; Unpredicated moves (big-endian). Memory accesses require secondary
110 (define_insn "*aarch64_sve_mov<mode>_be"
111 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
112 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
113 "TARGET_SVE && BYTES_BIG_ENDIAN"
116 * return aarch64_output_sve_mov_immediate (operands[1]);"
119 ;; Handle big-endian memory reloads. We use byte PTRUE for all modes
120 ;; to try to encourage reuse.
121 (define_expand "aarch64_sve_reload_be"
123 [(set (match_operand 0)
125 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
126 "TARGET_SVE && BYTES_BIG_ENDIAN"
128 /* Create a PTRUE. */
129 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
131 /* Refer to the PTRUE in the appropriate mode for this move. */
132 machine_mode mode = GET_MODE (operands[0]);
133 machine_mode pred_mode
134 = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
135 rtx pred = gen_lowpart (pred_mode, operands[2]);
137 /* Emit a predicated load or store. */
138 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
143 ;; A predicated load or store for which the predicate is known to be
144 ;; all-true. Note that this pattern is generated directly by
145 ;; aarch64_emit_sve_pred_move, so changes to this pattern will
146 ;; need changes there as well.
147 (define_insn "*pred_mov<mode>"
148 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m")
150 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
151 (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")]
152 UNSPEC_MERGE_PTRUE))]
154 && (register_operand (operands[0], <MODE>mode)
155 || register_operand (operands[2], <MODE>mode))"
157 ld1<Vesize>\t%0.<Vetype>, %1/z, %2
158 st1<Vesize>\t%2.<Vetype>, %1, %0"
161 (define_expand "movmisalign<mode>"
162 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
163 (match_operand:SVE_ALL 1 "general_operand"))]
166 /* Equivalent to a normal move for our purpooses. */
167 emit_move_insn (operands[0], operands[1]);
172 (define_insn "maskload<mode><vpred>"
173 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
175 [(match_operand:<VPRED> 2 "register_operand" "Upl")
176 (match_operand:SVE_ALL 1 "memory_operand" "m")]
179 "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
182 (define_insn "maskstore<mode><vpred>"
183 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
184 (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
185 (match_operand:SVE_ALL 1 "register_operand" "w")
189 "st1<Vesize>\t%1.<Vetype>, %2, %0"
192 ;; SVE structure moves.
193 (define_expand "mov<mode>"
194 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
195 (match_operand:SVE_STRUCT 1 "general_operand"))]
198 /* Big-endian loads and stores need to be done via LD1 and ST1;
199 see the comment at the head of the file for details. */
200 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
203 gcc_assert (can_create_pseudo_p ());
204 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
208 if (CONSTANT_P (operands[1]))
210 aarch64_expand_mov_immediate (operands[0], operands[1]);
216 ;; Unpredicated structure moves (little-endian).
217 (define_insn "*aarch64_sve_mov<mode>_le"
218 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
219 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
220 "TARGET_SVE && !BYTES_BIG_ENDIAN"
222 [(set_attr "length" "<insn_length>")]
225 ;; Unpredicated structure moves (big-endian). Memory accesses require
226 ;; secondary reloads.
227 (define_insn "*aarch64_sve_mov<mode>_le"
228 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
229 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
230 "TARGET_SVE && BYTES_BIG_ENDIAN"
232 [(set_attr "length" "<insn_length>")]
235 ;; Split unpredicated structure moves into pieces. This is the same
236 ;; for both big-endian and little-endian code, although it only needs
237 ;; to handle memory operands for little-endian code.
239 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
240 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
241 "TARGET_SVE && reload_completed"
244 rtx dest = operands[0];
245 rtx src = operands[1];
246 if (REG_P (dest) && REG_P (src))
247 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
249 for (unsigned int i = 0; i < <vector_count>; ++i)
251 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
252 i * BYTES_PER_SVE_VECTOR);
253 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
254 i * BYTES_PER_SVE_VECTOR);
255 emit_insn (gen_rtx_SET (subdest, subsrc));
261 ;; Predicated structure moves. This works for both endiannesses but in
262 ;; practice is only useful for big-endian.
263 (define_insn_and_split "pred_mov<mode>"
264 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, Utx")
266 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
267 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "Utx, w")]
268 UNSPEC_MERGE_PTRUE))]
270 && (register_operand (operands[0], <MODE>mode)
271 || register_operand (operands[2], <MODE>mode))"
273 "&& reload_completed"
276 for (unsigned int i = 0; i < <vector_count>; ++i)
278 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
280 i * BYTES_PER_SVE_VECTOR);
281 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
283 i * BYTES_PER_SVE_VECTOR);
284 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
288 [(set_attr "length" "<insn_length>")]
291 (define_expand "mov<mode>"
292 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
293 (match_operand:PRED_ALL 1 "general_operand"))]
296 if (GET_CODE (operands[0]) == MEM)
297 operands[1] = force_reg (<MODE>mode, operands[1]);
301 (define_insn "*aarch64_sve_mov<mode>"
302 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
303 (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
305 && (register_operand (operands[0], <MODE>mode)
306 || register_operand (operands[1], <MODE>mode))"
312 * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
315 ;; Handle extractions from a predicate by converting to an integer vector
316 ;; and extracting from there.
317 (define_expand "vec_extract<vpred><Vel>"
318 [(match_operand:<VEL> 0 "register_operand")
319 (match_operand:<VPRED> 1 "register_operand")
320 (match_operand:SI 2 "nonmemory_operand")
321 ;; Dummy operand to which we can attach the iterator.
322 (reg:SVE_I V0_REGNUM)]
325 rtx tmp = gen_reg_rtx (<MODE>mode);
326 emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
327 CONST1_RTX (<MODE>mode),
328 CONST0_RTX (<MODE>mode)));
329 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
334 (define_expand "vec_extract<mode><Vel>"
335 [(set (match_operand:<VEL> 0 "register_operand")
337 (match_operand:SVE_ALL 1 "register_operand")
338 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
342 if (poly_int_rtx_p (operands[2], &val)
343 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
345 /* The last element can be extracted with a LASTB and a false
347 rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode));
348 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
351 if (!CONST_INT_P (operands[2]))
353 /* Create an index with operand[2] as the base and -1 as the step.
354 It will then be zero for the element we care about. */
355 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
356 index = force_reg (<VEL_INT>mode, index);
357 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
358 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
360 /* Get a predicate that is true for only that element. */
361 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
362 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
363 rtx sel = gen_reg_rtx (<VPRED>mode);
364 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
366 /* Select the element using LASTB. */
367 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
373 ;; Extract an element from the Advanced SIMD portion of the register.
374 ;; We don't just reuse the aarch64-simd.md pattern because we don't
375 ;; want any chnage in lane number on big-endian targets.
376 (define_insn "*vec_extract<mode><Vel>_v128"
377 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
379 (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
380 (parallel [(match_operand:SI 2 "const_int_operand")])))]
382 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 15)"
384 operands[1] = gen_lowpart (<V128>mode, operands[1]);
385 switch (which_alternative)
388 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
390 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
392 return "st1\\t{%1.<Vetype>}[%2], %0";
397 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
400 ;; Extract an element in the range of DUP. This pattern allows the
401 ;; source and destination to be different.
402 (define_insn "*vec_extract<mode><Vel>_dup"
403 [(set (match_operand:<VEL> 0 "register_operand" "=w")
405 (match_operand:SVE_ALL 1 "register_operand" "w")
406 (parallel [(match_operand:SI 2 "const_int_operand")])))]
408 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
410 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
411 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
415 ;; Extract an element outside the range of DUP. This pattern requires the
416 ;; source and destination to be the same.
417 (define_insn "*vec_extract<mode><Vel>_ext"
418 [(set (match_operand:<VEL> 0 "register_operand" "=w")
420 (match_operand:SVE_ALL 1 "register_operand" "0")
421 (parallel [(match_operand:SI 2 "const_int_operand")])))]
422 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
424 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
425 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
426 return "ext\t%0.b, %0.b, %0.b, #%2";
430 ;; Extract the last active element of operand 1 into operand 0.
431 ;; If no elements are active, extract the last inactive element instead.
432 (define_insn "extract_last_<mode>"
433 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
435 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
436 (match_operand:SVE_ALL 2 "register_operand" "w, w")]
440 lastb\t%<vwcore>0, %1, %2.<Vetype>
441 lastb\t%<Vetype>0, %1, %2.<Vetype>"
444 (define_expand "vec_duplicate<mode>"
446 [(set (match_operand:SVE_ALL 0 "register_operand")
447 (vec_duplicate:SVE_ALL
448 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
449 (clobber (scratch:<VPRED>))])]
452 if (MEM_P (operands[1]))
454 rtx ptrue = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
455 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
456 CONST0_RTX (<MODE>mode)));
462 ;; Accept memory operands for the benefit of combine, and also in case
463 ;; the scalar input gets spilled to memory during RA. We want to split
464 ;; the load at the first opportunity in order to allow the PTRUE to be
465 ;; optimized with surrounding code.
466 (define_insn_and_split "*vec_duplicate<mode>_reg"
467 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
468 (vec_duplicate:SVE_ALL
469 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
470 (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
473 mov\t%0.<Vetype>, %<vwcore>1
474 mov\t%0.<Vetype>, %<Vetype>1
476 "&& MEM_P (operands[1])"
479 if (GET_CODE (operands[2]) == SCRATCH)
480 operands[2] = gen_reg_rtx (<VPRED>mode);
481 emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
482 emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
483 CONST0_RTX (<MODE>mode)));
486 [(set_attr "length" "4,4,8")]
489 ;; This is used for vec_duplicate<mode>s from memory, but can also
490 ;; be used by combine to optimize selects of a a vec_duplicate<mode>
492 (define_insn "sve_ld1r<mode>"
493 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
495 [(match_operand:<VPRED> 1 "register_operand" "Upl")
496 (vec_duplicate:SVE_ALL
497 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
498 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
501 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
504 ;; Load 128 bits from memory and duplicate to fill a vector. Since there
505 ;; are so few operations on 128-bit "elements", we don't define a VNx1TI
506 ;; and simply use vectors of bytes instead.
507 (define_insn "sve_ld1rq"
508 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
510 [(match_operand:VNx16BI 1 "register_operand" "Upl")
511 (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
514 "ld1rqb\t%0.b, %1/z, %2"
517 ;; Implement a predicate broadcast by shifting the low bit of the scalar
518 ;; input into the top bit and using a WHILELO. An alternative would be to
519 ;; duplicate the input and do a compare with zero.
520 (define_expand "vec_duplicate<mode>"
521 [(set (match_operand:PRED_ALL 0 "register_operand")
522 (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
525 rtx tmp = gen_reg_rtx (DImode);
526 rtx op1 = gen_lowpart (DImode, operands[1]);
527 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
528 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
533 (define_insn "vec_series<mode>"
534 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
536 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
537 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
540 index\t%0.<Vetype>, #%1, %<vw>2
541 index\t%0.<Vetype>, %<vw>1, #%2
542 index\t%0.<Vetype>, %<vw>1, %<vw>2"
545 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
546 ;; of an INDEX instruction.
547 (define_insn "*vec_series<mode>_plus"
548 [(set (match_operand:SVE_I 0 "register_operand" "=w")
551 (match_operand:<VEL> 1 "register_operand" "r"))
552 (match_operand:SVE_I 2 "immediate_operand")))]
553 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
555 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
556 return "index\t%0.<Vetype>, %<vw>1, #%2";
560 ;; Unpredicated LD[234].
561 (define_expand "vec_load_lanes<mode><vsingle>"
562 [(set (match_operand:SVE_STRUCT 0 "register_operand")
565 (match_operand:SVE_STRUCT 1 "memory_operand")]
569 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
573 ;; Predicated LD[234].
574 (define_insn "vec_mask_load_lanes<mode><vsingle>"
575 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
577 [(match_operand:<VPRED> 2 "register_operand" "Upl")
578 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
581 "ld<vector_count><Vesize>\t%0, %2/z, %1"
584 ;; Unpredicated ST[234]. This is always a full update, so the dependence
585 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
586 ;; There doesn't seem to be any obvious benefit to treating the all-true
587 ;; case differently though. In particular, it's very unlikely that we'll
588 ;; only find out during RTL that a store_lanes is dead.
589 (define_expand "vec_store_lanes<mode><vsingle>"
590 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
593 (match_operand:SVE_STRUCT 1 "register_operand")
598 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
602 ;; Predicated ST[234].
603 (define_insn "vec_mask_store_lanes<mode><vsingle>"
604 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
606 [(match_operand:<VPRED> 2 "register_operand" "Upl")
607 (match_operand:SVE_STRUCT 1 "register_operand" "w")
611 "st<vector_count><Vesize>\t%1, %2, %0"
614 (define_expand "vec_perm<mode>"
615 [(match_operand:SVE_ALL 0 "register_operand")
616 (match_operand:SVE_ALL 1 "register_operand")
617 (match_operand:SVE_ALL 2 "register_operand")
618 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
619 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
621 aarch64_expand_sve_vec_perm (operands[0], operands[1],
622 operands[2], operands[3]);
627 (define_insn "*aarch64_sve_tbl<mode>"
628 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
630 [(match_operand:SVE_ALL 1 "register_operand" "w")
631 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
634 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
637 (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
638 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
639 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
640 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
643 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
646 (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
647 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
648 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
649 (match_operand:SVE_ALL 2 "register_operand" "w")]
652 "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
655 (define_insn "*aarch64_sve_rev64<mode>"
656 [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
658 [(match_operand:VNx2BI 1 "register_operand" "Upl")
659 (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
661 UNSPEC_MERGE_PTRUE))]
663 "rev<Vesize>\t%0.d, %1/m, %2.d"
666 (define_insn "*aarch64_sve_rev32<mode>"
667 [(set (match_operand:SVE_BH 0 "register_operand" "=w")
669 [(match_operand:VNx4BI 1 "register_operand" "Upl")
670 (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
672 UNSPEC_MERGE_PTRUE))]
674 "rev<Vesize>\t%0.s, %1/m, %2.s"
677 (define_insn "*aarch64_sve_rev16vnx16qi"
678 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
680 [(match_operand:VNx8BI 1 "register_operand" "Upl")
681 (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
683 UNSPEC_MERGE_PTRUE))]
685 "revb\t%0.h, %1/m, %2.h"
688 (define_insn "*aarch64_sve_rev<mode>"
689 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
690 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
693 "rev\t%0.<Vetype>, %1.<Vetype>")
695 (define_insn "*aarch64_sve_dup_lane<mode>"
696 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
697 (vec_duplicate:SVE_ALL
699 (match_operand:SVE_ALL 1 "register_operand" "w")
700 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
702 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
703 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
706 ;; Note that the immediate (third) operand is the lane index not
708 (define_insn "*aarch64_sve_ext<mode>"
709 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
710 (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
711 (match_operand:SVE_ALL 2 "register_operand" "w")
712 (match_operand:SI 3 "const_int_operand")]
715 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
717 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
718 return "ext\\t%0.b, %0.b, %2.b, #%3";
722 (define_insn "add<mode>3"
723 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
725 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
726 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
729 add\t%0.<Vetype>, %0.<Vetype>, #%D2
730 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
731 * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
732 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
735 (define_insn "sub<mode>3"
736 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
738 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
739 (match_operand:SVE_I 2 "register_operand" "w, 0")))]
742 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
743 subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
746 ;; Unpredicated multiplication.
747 (define_expand "mul<mode>3"
748 [(set (match_operand:SVE_I 0 "register_operand")
752 (match_operand:SVE_I 1 "register_operand")
753 (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
754 UNSPEC_MERGE_PTRUE))]
757 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
761 ;; Multiplication predicated with a PTRUE. We don't actually need the
762 ;; predicate for the first alternative, but using Upa or X isn't likely
763 ;; to gain much and would make the instruction seem less uniform to the
764 ;; register allocator.
765 (define_insn "*mul<mode>3"
766 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
768 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
770 (match_operand:SVE_I 2 "register_operand" "%0, 0")
771 (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w"))]
772 UNSPEC_MERGE_PTRUE))]
775 mul\t%0.<Vetype>, %0.<Vetype>, #%3
776 mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
779 (define_insn "*madd<mode>"
780 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
783 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
784 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
785 (match_operand:SVE_I 3 "register_operand" "w, w"))]
787 (match_operand:SVE_I 4 "register_operand" "w, 0")))]
790 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
791 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
794 (define_insn "*msub<mode>3"
795 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
797 (match_operand:SVE_I 4 "register_operand" "w, 0")
799 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
800 (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
801 (match_operand:SVE_I 3 "register_operand" "w, w"))]
802 UNSPEC_MERGE_PTRUE)))]
805 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
806 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
809 ;; Unpredicated NEG, NOT and POPCOUNT.
810 (define_expand "<optab><mode>2"
811 [(set (match_operand:SVE_I 0 "register_operand")
814 (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
815 UNSPEC_MERGE_PTRUE))]
818 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
822 ;; NEG, NOT and POPCOUNT predicated with a PTRUE.
823 (define_insn "*<optab><mode>2"
824 [(set (match_operand:SVE_I 0 "register_operand" "=w")
826 [(match_operand:<VPRED> 1 "register_operand" "Upl")
828 (match_operand:SVE_I 2 "register_operand" "w"))]
829 UNSPEC_MERGE_PTRUE))]
831 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
834 ;; Vector AND, ORR and XOR.
835 (define_insn "<optab><mode>3"
836 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
838 (match_operand:SVE_I 1 "register_operand" "%0, w")
839 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
842 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
843 <logical>\t%0.d, %1.d, %2.d"
846 ;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs
847 ;; by providing this, but we need to use UNSPECs since rtx logical ops
848 ;; aren't defined for floating-point modes.
849 (define_insn "*<optab><mode>3"
850 [(set (match_operand:SVE_F 0 "register_operand" "=w")
851 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
852 (match_operand:SVE_F 2 "register_operand" "w")]
855 "<logicalf_op>\t%0.d, %1.d, %2.d"
858 ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
859 ;; this pattern even though the NOT instruction itself is predicated.
860 (define_insn "bic<mode>3"
861 [(set (match_operand:SVE_I 0 "register_operand" "=w")
863 (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
864 (match_operand:SVE_I 2 "register_operand" "w")))]
866 "bic\t%0.d, %2.d, %1.d"
869 ;; Predicate AND. We can reuse one of the inputs as the GP.
870 (define_insn "and<mode>3"
871 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
872 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
873 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
875 "and\t%0.b, %1/z, %1.b, %2.b"
878 ;; Unpredicated predicate ORR and XOR.
879 (define_expand "<optab><mode>3"
880 [(set (match_operand:PRED_ALL 0 "register_operand")
883 (match_operand:PRED_ALL 1 "register_operand")
884 (match_operand:PRED_ALL 2 "register_operand"))
888 operands[3] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
892 ;; Predicated predicate ORR and XOR.
893 (define_insn "pred_<optab><mode>3"
894 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
897 (match_operand:PRED_ALL 2 "register_operand" "Upa")
898 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
899 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
901 "<logical>\t%0.b, %1/z, %2.b, %3.b"
904 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
905 ;; the GP (which is known to be a PTRUE). Store the result in operand 0
906 ;; and set the flags in the same way as for PTEST. The (and ...) in the
907 ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
908 ;; value is structurally equivalent to rhs of the second set.
909 (define_insn "*<optab><mode>3_cc"
910 [(set (reg:CC CC_REGNUM)
912 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa")
915 (match_operand:PRED_ALL 2 "register_operand" "Upa")
916 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
920 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
921 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
924 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
927 ;; Unpredicated predicate inverse.
928 (define_expand "one_cmpl<mode>2"
929 [(set (match_operand:PRED_ALL 0 "register_operand")
931 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
935 operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
939 ;; Predicated predicate inverse.
940 (define_insn "*one_cmpl<mode>3"
941 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
943 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
944 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
946 "not\t%0.b, %1/z, %2.b"
949 ;; Predicated predicate BIC and ORN.
950 (define_insn "*<nlogical><mode>3"
951 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
954 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
955 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
956 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
958 "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
961 ;; Predicated predicate NAND and NOR.
962 (define_insn "*<logical_nn><mode>3"
963 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
966 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
967 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
968 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
970 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
973 ;; Unpredicated LSL, LSR and ASR by a vector.
974 (define_expand "v<optab><mode>3"
975 [(set (match_operand:SVE_I 0 "register_operand")
979 (match_operand:SVE_I 1 "register_operand")
980 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
981 UNSPEC_MERGE_PTRUE))]
984 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
988 ;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't
989 ;; actually need the predicate for the first alternative, but using Upa
990 ;; or X isn't likely to gain much and would make the instruction seem
991 ;; less uniform to the register allocator.
992 (define_insn "*v<optab><mode>3"
993 [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
995 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
997 (match_operand:SVE_I 2 "register_operand" "w, 0")
998 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w"))]
999 UNSPEC_MERGE_PTRUE))]
1002 <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
1003 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1006 ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
1008 (define_expand "<ASHIFT:optab><mode>3"
1009 [(set (match_operand:SVE_I 0 "register_operand")
1010 (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
1011 (match_operand:<VEL> 2 "general_operand")))]
1015 if (CONST_INT_P (operands[2]))
1017 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
1018 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
1019 amount = force_reg (<MODE>mode, amount);
1023 amount = gen_reg_rtx (<MODE>mode);
1024 emit_insn (gen_vec_duplicate<mode> (amount,
1025 convert_to_mode (<VEL>mode,
1028 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
1033 ;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
1035 ;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
1036 ;; is a PTRUE even if the optimizers haven't yet been able to propagate
1037 ;; the constant. We would use a separate unspec code for PTESTs involving
1038 ;; GPs that might not be PTRUEs.
1039 (define_insn "ptest_ptrue<mode>"
1040 [(set (reg:CC CC_REGNUM)
1042 (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa")
1043 (match_operand:PRED_ALL 1 "register_operand" "Upa")]
1050 ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
1051 ;; with the comparison being unsigned.
1052 (define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
1053 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1054 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
1055 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
1057 (clobber (reg:CC CC_REGNUM))]
1059 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
1062 ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
1063 ;; Handle the case in which both results are useful. The GP operand
1064 ;; to the PTEST isn't needed, so we allow it to be anything.
1065 (define_insn_and_split "while_ult<GPI:mode><PRED_ALL:mode>_cc"
1066 [(set (reg:CC CC_REGNUM)
1068 (unspec:SI [(match_operand:PRED_ALL 1)
1070 [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
1071 (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
1075 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1076 (unspec:PRED_ALL [(match_dup 2)
1080 "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
1081 ;; Force the compiler to drop the unused predicate operand, so that we
1082 ;; don't have an unnecessary PTRUE.
1083 "&& !CONSTANT_P (operands[1])"
1086 emit_insn (gen_while_ult<GPI:mode><PRED_ALL:mode>_cc
1087 (operands[0], CONSTM1_RTX (<MODE>mode),
1088 operands[2], operands[3]));
1093 ;; Predicated integer comparison.
1094 (define_insn "*vec_cmp<cmp_op>_<mode>"
1095 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1097 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1098 (match_operand:SVE_I 2 "register_operand" "w, w")
1099 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
1101 (clobber (reg:CC CC_REGNUM))]
1104 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1105 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1108 ;; Predicated integer comparison in which only the flags result is interesting.
1109 (define_insn "*vec_cmp<cmp_op>_<mode>_ptest"
1110 [(set (reg:CC CC_REGNUM)
1113 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1116 (match_operand:SVE_I 2 "register_operand" "w, w")
1117 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
1121 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
1124 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1125 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1128 ;; Predicated comparison in which both the flag and predicate results
1130 (define_insn "*vec_cmp<cmp_op>_<mode>_cc"
1131 [(set (reg:CC CC_REGNUM)
1134 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1137 (match_operand:SVE_I 2 "register_operand" "w, w")
1138 (match_operand:SVE_I 3 "aarch64_sve_cmp_<imm_con>_operand" "<imm_con>, w")]
1142 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1150 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1151 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1154 ;; Predicated floating-point comparison (excluding FCMUO, which doesn't
1155 ;; allow #0.0 as an operand).
1156 (define_insn "*vec_fcm<cmp_op><mode>"
1157 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1159 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1160 (match_operand:SVE_F 2 "register_operand" "w, w")
1161 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
1165 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1166 fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1169 ;; Predicated FCMUO.
1170 (define_insn "*vec_fcmuo<mode>"
1171 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1173 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1174 (match_operand:SVE_F 2 "register_operand" "w")
1175 (match_operand:SVE_F 3 "register_operand" "w")]
1178 "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1181 ;; vcond_mask operand order: true, false, mask
1182 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
1183 ;; SEL operand order: mask, true, false
1184 (define_insn "vcond_mask_<mode><vpred>"
1185 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1187 [(match_operand:<VPRED> 3 "register_operand" "Upa")
1188 (match_operand:SVE_ALL 1 "register_operand" "w")
1189 (match_operand:SVE_ALL 2 "register_operand" "w")]
1192 "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
1195 ;; Selects between a duplicated immediate and zero.
1196 (define_insn "aarch64_sve_dup<mode>_const"
1197 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1199 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1200 (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
1201 (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
1204 "mov\t%0.<Vetype>, %1/z, #%2"
1207 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it
1208 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1209 (define_expand "vcond<mode><v_int_equiv>"
1210 [(set (match_operand:SVE_ALL 0 "register_operand")
1211 (if_then_else:SVE_ALL
1212 (match_operator 3 "comparison_operator"
1213 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1214 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1215 (match_operand:SVE_ALL 1 "register_operand")
1216 (match_operand:SVE_ALL 2 "register_operand")))]
1219 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1224 ;; Integer vcondu. Don't enforce an immediate range here, since it
1225 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1226 (define_expand "vcondu<mode><v_int_equiv>"
1227 [(set (match_operand:SVE_ALL 0 "register_operand")
1228 (if_then_else:SVE_ALL
1229 (match_operator 3 "comparison_operator"
1230 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1231 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1232 (match_operand:SVE_ALL 1 "register_operand")
1233 (match_operand:SVE_ALL 2 "register_operand")))]
1236 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1241 ;; Floating-point vcond. All comparisons except FCMUO allow a zero
1242 ;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
1244 (define_expand "vcond<mode><v_fp_equiv>"
1245 [(set (match_operand:SVE_SD 0 "register_operand")
1246 (if_then_else:SVE_SD
1247 (match_operator 3 "comparison_operator"
1248 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
1249 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
1250 (match_operand:SVE_SD 1 "register_operand")
1251 (match_operand:SVE_SD 2 "register_operand")))]
1254 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
1259 ;; Signed integer comparisons. Don't enforce an immediate range here, since
1260 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1262 (define_expand "vec_cmp<mode><vpred>"
1264 [(set (match_operand:<VPRED> 0 "register_operand")
1265 (match_operator:<VPRED> 1 "comparison_operator"
1266 [(match_operand:SVE_I 2 "register_operand")
1267 (match_operand:SVE_I 3 "nonmemory_operand")]))
1268 (clobber (reg:CC CC_REGNUM))])]
1271 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1272 operands[2], operands[3]);
1277 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
1278 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1280 (define_expand "vec_cmpu<mode><vpred>"
1282 [(set (match_operand:<VPRED> 0 "register_operand")
1283 (match_operator:<VPRED> 1 "comparison_operator"
1284 [(match_operand:SVE_I 2 "register_operand")
1285 (match_operand:SVE_I 3 "nonmemory_operand")]))
1286 (clobber (reg:CC CC_REGNUM))])]
1289 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1290 operands[2], operands[3]);
1295 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
1296 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
1298 (define_expand "vec_cmp<mode><vpred>"
1299 [(set (match_operand:<VPRED> 0 "register_operand")
1300 (match_operator:<VPRED> 1 "comparison_operator"
1301 [(match_operand:SVE_F 2 "register_operand")
1302 (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
1305 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
1306 operands[2], operands[3], false);
1311 ;; Branch based on predicate equality or inequality.
1312 (define_expand "cbranch<mode>4"
1315 (match_operator 0 "aarch64_equality_operator"
1316 [(match_operand:PRED_ALL 1 "register_operand")
1317 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
1318 (label_ref (match_operand 3 ""))
1322 rtx ptrue = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
1324 if (operands[2] == CONST0_RTX (<MODE>mode))
1328 pred = gen_reg_rtx (<MODE>mode);
1329 emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
1332 emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
1333 operands[1] = gen_rtx_REG (CCmode, CC_REGNUM);
1334 operands[2] = const0_rtx;
1338 ;; Unpredicated integer MIN/MAX.
1339 (define_expand "<su><maxmin><mode>3"
1340 [(set (match_operand:SVE_I 0 "register_operand")
1343 (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
1344 (match_operand:SVE_I 2 "register_operand"))]
1345 UNSPEC_MERGE_PTRUE))]
1348 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1352 ;; Integer MIN/MAX predicated with a PTRUE.
1353 (define_insn "*<su><maxmin><mode>3"
1354 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1356 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1357 (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0")
1358 (match_operand:SVE_I 3 "register_operand" "w"))]
1359 UNSPEC_MERGE_PTRUE))]
1361 "<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1364 ;; Unpredicated floating-point MIN/MAX.
1365 (define_expand "<su><maxmin><mode>3"
1366 [(set (match_operand:SVE_F 0 "register_operand")
1369 (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
1370 (match_operand:SVE_F 2 "register_operand"))]
1371 UNSPEC_MERGE_PTRUE))]
1374 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1378 ;; Floating-point MIN/MAX predicated with a PTRUE.
1379 (define_insn "*<su><maxmin><mode>3"
1380 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1382 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1383 (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0")
1384 (match_operand:SVE_F 3 "register_operand" "w"))]
1385 UNSPEC_MERGE_PTRUE))]
1387 "f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1390 ;; Unpredicated fmin/fmax.
1391 (define_expand "<maxmin_uns><mode>3"
1392 [(set (match_operand:SVE_F 0 "register_operand")
1395 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
1396 (match_operand:SVE_F 2 "register_operand")]
1398 UNSPEC_MERGE_PTRUE))]
1401 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1405 ;; fmin/fmax predicated with a PTRUE.
1406 (define_insn "*<maxmin_uns><mode>3"
1407 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1409 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1410 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0")
1411 (match_operand:SVE_F 3 "register_operand" "w")]
1413 UNSPEC_MERGE_PTRUE))]
1415 "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1418 ;; Predicated integer operations.
1419 (define_insn "cond_<optab><mode>"
1420 [(set (match_operand:SVE_I 0 "register_operand" "=w")
1422 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1423 (match_operand:SVE_I 2 "register_operand" "0")
1424 (match_operand:SVE_I 3 "register_operand" "w")]
1427 "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1430 ;; Set operand 0 to the last active element in operand 3, or to tied
1431 ;; operand 1 if no elements are active.
1432 (define_insn "fold_extract_last_<mode>"
1433 [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
1435 [(match_operand:<VEL> 1 "register_operand" "0, 0")
1436 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
1437 (match_operand:SVE_ALL 3 "register_operand" "w, w")]
1441 clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
1442 clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
1445 ;; Unpredicated integer add reduction.
1446 (define_expand "reduc_plus_scal_<mode>"
1447 [(set (match_operand:<VEL> 0 "register_operand")
1448 (unspec:<VEL> [(match_dup 2)
1449 (match_operand:SVE_I 1 "register_operand")]
1453 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1457 ;; Predicated integer add reduction. The result is always 64-bits.
1458 (define_insn "*reduc_plus_scal_<mode>"
1459 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1460 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1461 (match_operand:SVE_I 2 "register_operand" "w")]
1464 "uaddv\t%d0, %1, %2.<Vetype>"
1467 ;; Unpredicated floating-point add reduction.
1468 (define_expand "reduc_plus_scal_<mode>"
1469 [(set (match_operand:<VEL> 0 "register_operand")
1470 (unspec:<VEL> [(match_dup 2)
1471 (match_operand:SVE_F 1 "register_operand")]
1475 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1479 ;; Predicated floating-point add reduction.
1480 (define_insn "*reduc_plus_scal_<mode>"
1481 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1482 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1483 (match_operand:SVE_F 2 "register_operand" "w")]
1486 "faddv\t%<Vetype>0, %1, %2.<Vetype>"
1489 ;; Unpredicated integer MIN/MAX reduction.
1490 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1491 [(set (match_operand:<VEL> 0 "register_operand")
1492 (unspec:<VEL> [(match_dup 2)
1493 (match_operand:SVE_I 1 "register_operand")]
1497 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1501 ;; Predicated integer MIN/MAX reduction.
1502 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
1503 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1504 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1505 (match_operand:SVE_I 2 "register_operand" "w")]
1508 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
1511 ;; Unpredicated floating-point MIN/MAX reduction.
1512 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1513 [(set (match_operand:<VEL> 0 "register_operand")
1514 (unspec:<VEL> [(match_dup 2)
1515 (match_operand:SVE_F 1 "register_operand")]
1519 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1523 ;; Predicated floating-point MIN/MAX reduction.
1524 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
1525 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1526 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1527 (match_operand:SVE_F 2 "register_operand" "w")]
1530 "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
1533 (define_expand "reduc_<optab>_scal_<mode>"
1534 [(set (match_operand:<VEL> 0 "register_operand")
1535 (unspec:<VEL> [(match_dup 2)
1536 (match_operand:SVE_I 1 "register_operand")]
1540 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1544 (define_insn "*reduc_<optab>_scal_<mode>"
1545 [(set (match_operand:<VEL> 0 "register_operand" "=w")
1546 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
1547 (match_operand:SVE_I 2 "register_operand" "w")]
1550 "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
1553 ;; Unpredicated floating-point addition.
1554 (define_expand "add<mode>3"
1555 [(set (match_operand:SVE_F 0 "register_operand")
1559 (match_operand:SVE_F 1 "register_operand")
1560 (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
1561 UNSPEC_MERGE_PTRUE))]
1564 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1568 ;; Floating-point addition predicated with a PTRUE.
1569 (define_insn "*add<mode>3"
1570 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
1572 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1574 (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
1575 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
1576 UNSPEC_MERGE_PTRUE))]
1579 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1580 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
1581 fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1584 ;; Unpredicated floating-point subtraction.
1585 (define_expand "sub<mode>3"
1586 [(set (match_operand:SVE_F 0 "register_operand")
1590 (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
1591 (match_operand:SVE_F 2 "register_operand"))]
1592 UNSPEC_MERGE_PTRUE))]
1595 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1599 ;; Floating-point subtraction predicated with a PTRUE.
1600 (define_insn "*sub<mode>3"
1601 [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
1603 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
1605 (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
1606 (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
1607 UNSPEC_MERGE_PTRUE))]
1609 && (register_operand (operands[2], <MODE>mode)
1610 || register_operand (operands[3], <MODE>mode))"
1612 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1613 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
1614 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
1615 fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1618 ;; Unpredicated floating-point multiplication.
1619 (define_expand "mul<mode>3"
1620 [(set (match_operand:SVE_F 0 "register_operand")
1624 (match_operand:SVE_F 1 "register_operand")
1625 (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
1626 UNSPEC_MERGE_PTRUE))]
1629 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1633 ;; Floating-point multiplication predicated with a PTRUE.
1634 (define_insn "*mul<mode>3"
1635 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1637 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1639 (match_operand:SVE_F 2 "register_operand" "%0, w")
1640 (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
1641 UNSPEC_MERGE_PTRUE))]
1644 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1645 fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
1648 ;; Unpredicated fma (%0 = (%1 * %2) + %3).
1649 (define_expand "fma<mode>4"
1650 [(set (match_operand:SVE_F 0 "register_operand")
1653 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
1654 (match_operand:SVE_F 2 "register_operand")
1655 (match_operand:SVE_F 3 "register_operand"))]
1656 UNSPEC_MERGE_PTRUE))]
1659 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1663 ;; fma predicated with a PTRUE.
1664 (define_insn "*fma<mode>4"
1665 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1667 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1668 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
1669 (match_operand:SVE_F 4 "register_operand" "w, w")
1670 (match_operand:SVE_F 2 "register_operand" "w, 0"))]
1671 UNSPEC_MERGE_PTRUE))]
1674 fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1675 fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1678 ;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
1679 (define_expand "fnma<mode>4"
1680 [(set (match_operand:SVE_F 0 "register_operand")
1683 (fma:SVE_F (neg:SVE_F
1684 (match_operand:SVE_F 1 "register_operand"))
1685 (match_operand:SVE_F 2 "register_operand")
1686 (match_operand:SVE_F 3 "register_operand"))]
1687 UNSPEC_MERGE_PTRUE))]
1690 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1694 ;; fnma predicated with a PTRUE.
1695 (define_insn "*fnma<mode>4"
1696 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1698 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1699 (fma:SVE_F (neg:SVE_F
1700 (match_operand:SVE_F 3 "register_operand" "%0, w"))
1701 (match_operand:SVE_F 4 "register_operand" "w, w")
1702 (match_operand:SVE_F 2 "register_operand" "w, 0"))]
1703 UNSPEC_MERGE_PTRUE))]
1706 fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1707 fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1710 ;; Unpredicated fms (%0 = (%1 * %2) - %3).
1711 (define_expand "fms<mode>4"
1712 [(set (match_operand:SVE_F 0 "register_operand")
1715 (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
1716 (match_operand:SVE_F 2 "register_operand")
1718 (match_operand:SVE_F 3 "register_operand")))]
1719 UNSPEC_MERGE_PTRUE))]
1722 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1726 ;; fms predicated with a PTRUE.
1727 (define_insn "*fms<mode>4"
1728 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1730 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1731 (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
1732 (match_operand:SVE_F 4 "register_operand" "w, w")
1734 (match_operand:SVE_F 2 "register_operand" "w, 0")))]
1735 UNSPEC_MERGE_PTRUE))]
1738 fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1739 fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1742 ;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
1743 (define_expand "fnms<mode>4"
1744 [(set (match_operand:SVE_F 0 "register_operand")
1747 (fma:SVE_F (neg:SVE_F
1748 (match_operand:SVE_F 1 "register_operand"))
1749 (match_operand:SVE_F 2 "register_operand")
1751 (match_operand:SVE_F 3 "register_operand")))]
1752 UNSPEC_MERGE_PTRUE))]
1755 operands[4] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1759 ;; fnms predicated with a PTRUE.
1760 (define_insn "*fnms<mode>4"
1761 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1763 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1764 (fma:SVE_F (neg:SVE_F
1765 (match_operand:SVE_F 3 "register_operand" "%0, w"))
1766 (match_operand:SVE_F 4 "register_operand" "w, w")
1768 (match_operand:SVE_F 2 "register_operand" "w, 0")))]
1769 UNSPEC_MERGE_PTRUE))]
1772 fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
1773 fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
1776 ;; Unpredicated floating-point division.
1777 (define_expand "div<mode>3"
1778 [(set (match_operand:SVE_F 0 "register_operand")
1781 (div:SVE_F (match_operand:SVE_F 1 "register_operand")
1782 (match_operand:SVE_F 2 "register_operand"))]
1783 UNSPEC_MERGE_PTRUE))]
1786 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1790 ;; Floating-point division predicated with a PTRUE.
1791 (define_insn "*div<mode>3"
1792 [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
1794 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1795 (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w")
1796 (match_operand:SVE_F 3 "register_operand" "w, 0"))]
1797 UNSPEC_MERGE_PTRUE))]
1800 fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1801 fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1804 ;; Unpredicated FNEG, FABS and FSQRT.
1805 (define_expand "<optab><mode>2"
1806 [(set (match_operand:SVE_F 0 "register_operand")
1809 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
1810 UNSPEC_MERGE_PTRUE))]
1813 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1817 ;; FNEG, FABS and FSQRT predicated with a PTRUE.
1818 (define_insn "*<optab><mode>2"
1819 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1821 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1822 (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
1823 UNSPEC_MERGE_PTRUE))]
1825 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1828 ;; Unpredicated FRINTy.
1829 (define_expand "<frint_pattern><mode>2"
1830 [(set (match_operand:SVE_F 0 "register_operand")
1833 (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
1835 UNSPEC_MERGE_PTRUE))]
1838 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1842 ;; FRINTy predicated with a PTRUE.
1843 (define_insn "*<frint_pattern><mode>2"
1844 [(set (match_operand:SVE_F 0 "register_operand" "=w")
1846 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1847 (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
1849 UNSPEC_MERGE_PTRUE))]
1851 "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1854 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
1855 ;; SF to SI or DF to DI).
1856 (define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
1857 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
1858 (unspec:<V_INT_EQUIV>
1860 (FIXUORS:<V_INT_EQUIV>
1861 (match_operand:SVE_F 1 "register_operand"))]
1862 UNSPEC_MERGE_PTRUE))]
1865 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1869 ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
1870 (define_insn "*<fix_trunc_optab>v16hsf<mode>2"
1871 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1873 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1875 (match_operand:VNx8HF 2 "register_operand" "w"))]
1876 UNSPEC_MERGE_PTRUE))]
1878 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
1881 ;; Conversion of SF to DI or SI, predicated with a PTRUE.
1882 (define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
1883 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
1885 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1887 (match_operand:VNx4SF 2 "register_operand" "w"))]
1888 UNSPEC_MERGE_PTRUE))]
1890 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
1893 ;; Conversion of DF to DI or SI, predicated with a PTRUE.
1894 (define_insn "*<fix_trunc_optab>vnx2df<mode>2"
1895 [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
1897 [(match_operand:VNx2BI 1 "register_operand" "Upl")
1899 (match_operand:VNx2DF 2 "register_operand" "w"))]
1900 UNSPEC_MERGE_PTRUE))]
1902 "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
1905 ;; Unpredicated conversion of integers to floats of the same size
1906 ;; (HI to HF, SI to SF or DI to DF).
1907 (define_expand "<optab><v_int_equiv><mode>2"
1908 [(set (match_operand:SVE_F 0 "register_operand")
1912 (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
1913 UNSPEC_MERGE_PTRUE))]
1916 operands[2] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
1920 ;; Conversion of DI, SI or HI to the same number of HFs, predicated
1922 (define_insn "*<optab><mode>vnx8hf2"
1923 [(set (match_operand:VNx8HF 0 "register_operand" "=w")
1925 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1927 (match_operand:SVE_HSDI 2 "register_operand" "w"))]
1928 UNSPEC_MERGE_PTRUE))]
1930 "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
1933 ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
1934 (define_insn "*<optab><mode>vnx4sf2"
1935 [(set (match_operand:VNx4SF 0 "register_operand" "=w")
1937 [(match_operand:<VPRED> 1 "register_operand" "Upl")
1939 (match_operand:SVE_SDI 2 "register_operand" "w"))]
1940 UNSPEC_MERGE_PTRUE))]
1942 "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
1945 ;; Conversion of DI or SI to DF, predicated with a PTRUE.
1946 (define_insn "*<optab><mode>vnx2df2"
1947 [(set (match_operand:VNx2DF 0 "register_operand" "=w")
1949 [(match_operand:VNx2BI 1 "register_operand" "Upl")
1951 (match_operand:SVE_SDI 2 "register_operand" "w"))]
1952 UNSPEC_MERGE_PTRUE))]
1954 "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
1957 ;; Conversion of DFs to the same number of SFs, or SFs to the same number
1959 (define_insn "*trunc<Vwide><mode>2"
1960 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
1962 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
1964 [(match_operand:<VWIDE> 2 "register_operand" "w")]
1965 UNSPEC_FLOAT_CONVERT)]
1966 UNSPEC_MERGE_PTRUE))]
1968 "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
1971 ;; Conversion of SFs to the same number of DFs, or HFs to the same number
1973 (define_insn "*extend<mode><Vwide>2"
1974 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1976 [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
1978 [(match_operand:SVE_HSF 2 "register_operand" "w")]
1979 UNSPEC_FLOAT_CONVERT)]
1980 UNSPEC_MERGE_PTRUE))]
1982 "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
1985 ;; PUNPKHI and PUNPKLO.
1986 (define_insn "vec_unpack<su>_<perm_hilo>_<mode>"
1987 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
1988 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
1991 "punpk<perm_hilo>\t%0.h, %1.b"
1994 ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
1995 (define_insn "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
1996 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1997 (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
2000 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
2003 ;; Used by the vec_unpacks_<perm_hilo>_<mode> expander to unpack the bit
2004 ;; representation of a VNx4SF or VNx8HF without conversion. The choice
2005 ;; between signed and unsigned isn't significant.
2006 (define_insn "*vec_unpacku_<perm_hilo>_<mode>_no_convert"
2007 [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
2008 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand" "w")]
2011 "uunpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
2014 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
2015 ;; First unpack the source without conversion, then float-convert the
2017 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
2019 (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
2021 (set (match_operand:<VWIDE> 0 "register_operand")
2022 (unspec:<VWIDE> [(match_dup 3)
2023 (unspec:<VWIDE> [(match_dup 2)] UNSPEC_FLOAT_CONVERT)]
2024 UNSPEC_MERGE_PTRUE))]
2027 operands[2] = gen_reg_rtx (<MODE>mode);
2028 operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
2032 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
2033 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
2034 ;; unpacked VNx4SI to VNx2DF.
2035 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
2037 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
2039 (set (match_operand:VNx2DF 0 "register_operand")
2040 (unspec:VNx2DF [(match_dup 3)
2041 (FLOATUORS:VNx2DF (match_dup 4))]
2042 UNSPEC_MERGE_PTRUE))]
2045 operands[2] = gen_reg_rtx (VNx2DImode);
2046 operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
2047 operands[4] = gen_rtx_SUBREG (VNx4SImode, operands[2], 0);
2051 ;; Predicate pack. Use UZP1 on the narrower type, which discards
2052 ;; the high part of each wide element.
2053 (define_insn "vec_pack_trunc_<Vwide>"
2054 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
2056 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
2057 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
2060 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2063 ;; Integer pack. Use UZP1 on the narrower type, which discards
2064 ;; the high part of each wide element.
2065 (define_insn "vec_pack_trunc_<Vwide>"
2066 [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
2068 [(match_operand:<VWIDE> 1 "register_operand" "w")
2069 (match_operand:<VWIDE> 2 "register_operand" "w")]
2072 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2075 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
2076 ;; the results into a single vector.
2077 (define_expand "vec_pack_trunc_<Vwide>"
2081 (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
2082 UNSPEC_FLOAT_CONVERT)]
2083 UNSPEC_MERGE_PTRUE))
2087 (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
2088 UNSPEC_FLOAT_CONVERT)]
2089 UNSPEC_MERGE_PTRUE))
2090 (set (match_operand:SVE_HSF 0 "register_operand")
2091 (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2094 operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
2095 operands[4] = gen_reg_rtx (<MODE>mode);
2096 operands[5] = gen_reg_rtx (<MODE>mode);
2100 ;; Convert two vectors of DF to SI and pack the results into a single vector.
2101 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
2105 (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
2106 UNSPEC_MERGE_PTRUE))
2110 (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
2111 UNSPEC_MERGE_PTRUE))
2112 (set (match_operand:VNx4SI 0 "register_operand")
2113 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2116 operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
2117 operands[4] = gen_reg_rtx (VNx4SImode);
2118 operands[5] = gen_reg_rtx (VNx4SImode);
2122 ;; Predicated floating-point operations.
2123 (define_insn "cond_<optab><mode>"
2124 [(set (match_operand:SVE_F 0 "register_operand" "=w")
2126 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2127 (match_operand:SVE_F 2 "register_operand" "0")
2128 (match_operand:SVE_F 3 "register_operand" "w")]
2131 "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2134 ;; Shift an SVE vector left and insert a scalar into element 0.
2135 (define_insn "vec_shl_insert_<mode>"
2136 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
2138 [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
2139 (match_operand:<VEL> 2 "register_operand" "rZ, w")]
2143 insr\t%0.<Vetype>, %<vwcore>2
2144 insr\t%0.<Vetype>, %<Vetype>2"