1 ;; Machine description for AArch64 SME.
2 ;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
4 ;; This file is part of GCC.
6 ;; GCC is free software; you can redistribute it and/or modify it
7 ;; under the terms of the GNU General Public License as published by
8 ;; the Free Software Foundation; either version 3, or (at your option)
11 ;; GCC is distributed in the hope that it will be useful, but
12 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;; General Public License for more details.
16 ;; You should have received a copy of the GNU General Public License
17 ;; along with GCC; see the file COPYING3. If not see
18 ;; <http://www.gnu.org/licenses/>.
20 ;; The file is organised into the following sections (search for the full
23 ;; == State management
24 ;; ---- Test current state
25 ;; ---- PSTATE.SM management
26 ;; ---- PSTATE.ZA management
28 ;; == Loads, stores and moves
29 ;; ---- Single-vector loads
31 ;; ---- Single-vector stores
33 ;; ---- Single-vector moves
34 ;; ---- Multi-vector moves
37 ;; == Binary arithmetic
38 ;; ---- Binary arithmetic on ZA tile
39 ;; ---- Binary arithmetic on ZA slice
40 ;; ---- Binary arithmetic, writing to ZA slice
42 ;; == Ternary arithmetic
43 ;; ---- [INT] Dot product
44 ;; ---- [INT] Ternary widening arithmetic on ZA slice
45 ;; ---- [INT] Sum of outer products
46 ;; ---- [FP] Dot product
47 ;; ---- [FP] Ternary arithmetic on ZA slice
48 ;; ---- [FP] Ternary widening arithmetic on ZA slice
49 ;; ---- [FP] Sum of outer products
54 ;; =========================================================================
55 ;; == State management
56 ;; =========================================================================
58 ;; Many of the instructions in this section are only valid when SME is
59 ;; present. However, they don't have a TARGET_SME condition since
60 ;; (a) they are only emitted under direct control of aarch64 code and
61 ;; (b) they are sometimes used conditionally, particularly in streaming-
64 ;; =========================================================================
66 ;; -------------------------------------------------------------------------
67 ;; ---- Test current state
68 ;; -------------------------------------------------------------------------
70 (define_c_enum "unspec" [
77 ;; A marker instruction to say that the old value of the DWARF VG register
78 ;; has been saved to the stack, for CFI purposes. Operand 0 is the old
79 ;; value of the register and operand 1 is the save slot.
80 (define_insn "aarch64_old_vg_saved"
81 [(set (reg:DI VG_REGNUM)
82 (unspec:DI [(match_operand 0)
83 (match_operand 1)] UNSPEC_OLD_VG_SAVED))]
86 [(set_attr "type" "no_insn")]
89 ;; A marker to indicate places where a call temporarily changes VG.
90 (define_insn "aarch64_update_vg"
91 [(set (reg:DI VG_REGNUM)
92 (unspec:DI [(reg:DI VG_REGNUM)] UNSPEC_UPDATE_VG))]
95 [(set_attr "type" "no_insn")]
98 (define_insn "aarch64_get_sme_state"
99 [(set (reg:TI R0_REGNUM)
100 (unspec_volatile:TI [(const_int 0)] UNSPEC_GET_SME_STATE))
101 (clobber (reg:DI R16_REGNUM))
102 (clobber (reg:DI R17_REGNUM))
103 (clobber (reg:DI R18_REGNUM))
104 (clobber (reg:DI R30_REGNUM))
105 (clobber (reg:CC CC_REGNUM))]
107 "bl\t__arm_sme_state"
108 [(set_attr "is_call" "yes")]
111 (define_insn "aarch64_read_svcr"
112 [(set (match_operand:DI 0 "register_operand" "=r")
113 (unspec_volatile:DI [(const_int 0)] UNSPEC_READ_SVCR))]
118 ;; -------------------------------------------------------------------------
119 ;; ---- PSTATE.SM management
120 ;; -------------------------------------------------------------------------
124 ;; -------------------------------------------------------------------------
126 (define_c_enum "unspec" [
131 ;; Turn on streaming mode. This clobbers all SVE state.
133 ;; Depend on VG_REGNUM to ensure that the VG save slot has already been
135 (define_insn "aarch64_smstart_sm"
136 [(unspec_volatile [(const_int 0)] UNSPEC_SMSTART_SM)
137 (use (reg:DI VG_REGNUM))
138 (clobber (reg:V4x16QI V0_REGNUM))
139 (clobber (reg:V4x16QI V4_REGNUM))
140 (clobber (reg:V4x16QI V8_REGNUM))
141 (clobber (reg:V4x16QI V12_REGNUM))
142 (clobber (reg:V4x16QI V16_REGNUM))
143 (clobber (reg:V4x16QI V20_REGNUM))
144 (clobber (reg:V4x16QI V24_REGNUM))
145 (clobber (reg:V4x16QI V28_REGNUM))
146 (clobber (reg:VNx16BI P0_REGNUM))
147 (clobber (reg:VNx16BI P1_REGNUM))
148 (clobber (reg:VNx16BI P2_REGNUM))
149 (clobber (reg:VNx16BI P3_REGNUM))
150 (clobber (reg:VNx16BI P4_REGNUM))
151 (clobber (reg:VNx16BI P5_REGNUM))
152 (clobber (reg:VNx16BI P6_REGNUM))
153 (clobber (reg:VNx16BI P7_REGNUM))
154 (clobber (reg:VNx16BI P8_REGNUM))
155 (clobber (reg:VNx16BI P9_REGNUM))
156 (clobber (reg:VNx16BI P10_REGNUM))
157 (clobber (reg:VNx16BI P11_REGNUM))
158 (clobber (reg:VNx16BI P12_REGNUM))
159 (clobber (reg:VNx16BI P13_REGNUM))
160 (clobber (reg:VNx16BI P14_REGNUM))
161 (clobber (reg:VNx16BI P15_REGNUM))]
166 ;; Turn off streaming mode. This clobbers all SVE state.
168 ;; Depend on VG_REGNUM to ensure that the VG save slot has already been
170 (define_insn "aarch64_smstop_sm"
171 [(unspec_volatile [(const_int 0)] UNSPEC_SMSTOP_SM)
172 (use (reg:DI VG_REGNUM))
173 (clobber (reg:V4x16QI V0_REGNUM))
174 (clobber (reg:V4x16QI V4_REGNUM))
175 (clobber (reg:V4x16QI V8_REGNUM))
176 (clobber (reg:V4x16QI V12_REGNUM))
177 (clobber (reg:V4x16QI V16_REGNUM))
178 (clobber (reg:V4x16QI V20_REGNUM))
179 (clobber (reg:V4x16QI V24_REGNUM))
180 (clobber (reg:V4x16QI V28_REGNUM))
181 (clobber (reg:VNx16BI P0_REGNUM))
182 (clobber (reg:VNx16BI P1_REGNUM))
183 (clobber (reg:VNx16BI P2_REGNUM))
184 (clobber (reg:VNx16BI P3_REGNUM))
185 (clobber (reg:VNx16BI P4_REGNUM))
186 (clobber (reg:VNx16BI P5_REGNUM))
187 (clobber (reg:VNx16BI P6_REGNUM))
188 (clobber (reg:VNx16BI P7_REGNUM))
189 (clobber (reg:VNx16BI P8_REGNUM))
190 (clobber (reg:VNx16BI P9_REGNUM))
191 (clobber (reg:VNx16BI P10_REGNUM))
192 (clobber (reg:VNx16BI P11_REGNUM))
193 (clobber (reg:VNx16BI P12_REGNUM))
194 (clobber (reg:VNx16BI P13_REGNUM))
195 (clobber (reg:VNx16BI P14_REGNUM))
196 (clobber (reg:VNx16BI P15_REGNUM))]
201 ;; -------------------------------------------------------------------------
202 ;; ---- PSTATE.ZA management
203 ;; -------------------------------------------------------------------------
207 ;; plus calls to support routines.
208 ;; -------------------------------------------------------------------------
210 (define_c_enum "unspec" [
212 UNSPEC_INITIAL_ZERO_ZA
214 UNSPEC_TPIDR2_RESTORE
217 UNSPEC_SETUP_LOCAL_TPIDR2
219 UNSPEC_START_PRIVATE_ZA_CALL
220 UNSPEC_END_PRIVATE_ZA_CALL
221 UNSPEC_COMMIT_LAZY_SAVE
224 (define_c_enum "unspecv" [
225 UNSPECV_ASM_UPDATE_ZA
226 UNSPECV_ASM_UPDATE_ZT0
229 ;; Use the ABI-defined routine to commit an uncommitted lazy save.
230 ;; This relies on the current PSTATE.ZA, so depends on SME_STATE_REGNUM.
231 ;; The fake TPIDR2_SETUP_REGNUM register initially holds the incoming
232 ;; value of the architected TPIDR2_EL0.
233 (define_insn "aarch64_tpidr2_save"
234 [(set (reg:DI ZA_FREE_REGNUM)
235 (unspec:DI [(reg:DI SME_STATE_REGNUM)
236 (reg:DI TPIDR2_SETUP_REGNUM)] UNSPEC_TPIDR2_SAVE))
237 (clobber (reg:DI R14_REGNUM))
238 (clobber (reg:DI R15_REGNUM))
239 (clobber (reg:DI R16_REGNUM))
240 (clobber (reg:DI R17_REGNUM))
241 (clobber (reg:DI R18_REGNUM))
242 (clobber (reg:DI R30_REGNUM))
243 (clobber (reg:CC CC_REGNUM))]
245 "bl\t__arm_tpidr2_save"
246 [(set_attr "is_call" "yes")]
249 ;; Set PSTATE.ZA to 1. If ZA was previously dormant or active,
250 ;; it remains in the same state afterwards, with the same contents.
251 ;; Otherwise, it goes from off to on with zeroed contents.
253 ;; Later writes of TPIDR2_EL0 to a nonzero value must not be moved
254 ;; up past this instruction, since that could create an invalid
255 ;; combination of having an active lazy save while ZA is off.
256 ;; Create an anti-dependence by reading the current contents
257 ;; of TPIDR2_SETUP_REGNUM.
259 ;; Making this depend on ZA_FREE_REGNUM ensures that contents belonging
260 ;; to the caller have already been saved. That isn't necessary for this
261 ;; instruction itself, since PSTATE.ZA is already 1 if it contains data.
262 ;; But doing this here means that other uses of ZA can just depend on
263 ;; SME_STATE_REGNUM, rather than both SME_STATE_REGNUM and ZA_FREE_REGNUM.
264 (define_insn "aarch64_smstart_za"
265 [(set (reg:DI SME_STATE_REGNUM)
267 (use (reg:DI TPIDR2_SETUP_REGNUM))
268 (use (reg:DI ZA_FREE_REGNUM))]
273 ;; Disable ZA and discard its current contents.
275 ;; The ABI says that the ZA save buffer must be null whenever PSTATE.ZA
276 ;; is zero, so earlier writes to TPIDR2_EL0 must not be moved down past
277 ;; this instruction. Depend on TPIDR2_SETUP_REGNUM to ensure this.
279 ;; We can only turn ZA off once we know that it is free (i.e. doesn't
280 ;; contain data belonging to the caller). Depend on ZA_FREE_REGNUM
283 ;; We only turn ZA off when the current function's ZA state is dead,
284 ;; or perhaps if we're sure that the contents are saved. Either way,
285 ;; we know whether ZA is saved or not.
286 (define_insn "aarch64_smstop_za"
287 [(set (reg:DI SME_STATE_REGNUM)
289 (set (reg:DI ZA_SAVED_REGNUM)
290 (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM)
291 (reg:DI ZA_FREE_REGNUM)] UNSPEC_SMSTOP_ZA))]
296 ;; Zero ZA after committing a lazy save. The sequencing is enforced
297 ;; by reading ZA_FREE_REGNUM.
298 (define_insn "aarch64_initial_zero_za"
299 [(set (reg:DI ZA_REGNUM)
300 (unspec:DI [(reg:DI SME_STATE_REGNUM)
301 (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))]
306 ;; Initialize the abstract TPIDR2_BLOCK_REGNUM from the contents of
307 ;; the current function's TPIDR2 block. Other instructions can then
308 ;; depend on TPIDR2_BLOCK_REGNUM rather than on the memory block.
309 (define_insn "aarch64_setup_local_tpidr2"
310 [(set (reg:DI TPIDR2_BLOCK_REGNUM)
311 (unspec:DI [(match_operand:V16QI 0 "memory_operand" "m")]
312 UNSPEC_SETUP_LOCAL_TPIDR2))]
315 [(set_attr "type" "no_insn")]
318 ;; Clear TPIDR2_EL0, cancelling any uncommitted lazy save.
319 (define_insn "aarch64_clear_tpidr2"
320 [(set (reg:DI TPIDR2_SETUP_REGNUM)
323 "msr\ttpidr2_el0, xzr"
326 ;; Point TPIDR2_EL0 to the current function's TPIDR2 block, whose address
327 ;; is given by operand 0. TPIDR2_BLOCK_REGNUM represents the contents of the
329 (define_insn "aarch64_write_tpidr2"
330 [(set (reg:DI TPIDR2_SETUP_REGNUM)
331 (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
332 (reg:DI TPIDR2_BLOCK_REGNUM)] UNSPEC_WRITE_TPIDR2))]
334 "msr\ttpidr2_el0, %0"
337 ;; Check whether ZA has been saved. The system depends on the value that
338 ;; we wrote to TPIDR2_EL0 previously, so it depends on TPDIR2_SETUP_REGNUM.
339 (define_insn "aarch64_read_tpidr2"
340 [(set (match_operand:DI 0 "register_operand" "=r")
341 (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM)
342 (reg:DI ZA_SAVED_REGNUM)] UNSPEC_READ_TPIDR2))]
344 "mrs\t%0, tpidr2_el0"
347 ;; Use the ABI-defined routine to restore lazy-saved ZA contents
348 ;; from the TPIDR2 block pointed to by X0. ZA must already be active.
349 (define_insn "aarch64_tpidr2_restore"
350 [(set (reg:DI ZA_SAVED_REGNUM)
351 (unspec:DI [(reg:DI R0_REGNUM)] UNSPEC_TPIDR2_RESTORE))
352 (set (reg:DI SME_STATE_REGNUM)
353 (unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE))
354 (clobber (reg:DI R14_REGNUM))
355 (clobber (reg:DI R15_REGNUM))
356 (clobber (reg:DI R16_REGNUM))
357 (clobber (reg:DI R17_REGNUM))
358 (clobber (reg:DI R18_REGNUM))
359 (clobber (reg:DI R30_REGNUM))
360 (clobber (reg:CC CC_REGNUM))]
362 "bl\t__arm_tpidr2_restore"
363 [(set_attr "is_call" "yes")]
366 ;; Check whether a lazy save set up by aarch64_save_za was committed
367 ;; and restore the saved contents if so.
369 ;; Operand 0 is the address of the current function's TPIDR2 block.
370 (define_insn_and_split "aarch64_restore_za"
371 [(set (reg:DI ZA_SAVED_REGNUM)
372 (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
373 (reg:DI SME_STATE_REGNUM)
374 (reg:DI TPIDR2_SETUP_REGNUM)
375 (reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA))
376 (clobber (reg:DI R0_REGNUM))
377 (clobber (reg:DI R14_REGNUM))
378 (clobber (reg:DI R15_REGNUM))
379 (clobber (reg:DI R16_REGNUM))
380 (clobber (reg:DI R17_REGNUM))
381 (clobber (reg:DI R18_REGNUM))
382 (clobber (reg:DI R30_REGNUM))
383 (clobber (reg:CC CC_REGNUM))]
386 "&& epilogue_completed"
389 auto label = gen_label_rtx ();
390 auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM);
391 emit_insn (gen_aarch64_read_tpidr2 (tpidr2));
392 auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label));
393 JUMP_LABEL (jump) = label;
395 aarch64_restore_za (operands[0]);
401 ;; This instruction is emitted after asms that alter ZA, in order to model
402 ;; the effect on dataflow. The asm itself can't have ZA as an input or
403 ;; an output, since there is no associated data type. Instead it retains
404 ;; the original "za" clobber, which on its own would indicate that ZA
407 ;; The operand is a unique identifier.
408 (define_insn "aarch64_asm_update_za"
409 [(set (reg:VNx16QI ZA_REGNUM)
410 (unspec_volatile:VNx16QI
411 [(reg:VNx16QI ZA_REGNUM)
412 (reg:DI SME_STATE_REGNUM)
413 (match_operand 0 "const_int_operand")]
414 UNSPECV_ASM_UPDATE_ZA))]
417 [(set_attr "type" "no_insn")]
420 ;; A similar pattern for ZT0.
421 (define_insn "aarch64_asm_update_zt0"
422 [(set (reg:V8DI ZT0_REGNUM)
423 (unspec_volatile:V8DI
424 [(reg:V8DI ZT0_REGNUM)
425 (reg:DI SME_STATE_REGNUM)
426 (match_operand 0 "const_int_operand")]
427 UNSPECV_ASM_UPDATE_ZT0))]
430 [(set_attr "type" "no_insn")]
433 ;; This pseudo-instruction is emitted as part of a call to a private-ZA
434 ;; function from a function with ZA state. It marks a natural place to set
435 ;; up a lazy save, if that turns out to be necessary. The save itself
436 ;; is managed by the mode-switching pass.
437 (define_insn "aarch64_start_private_za_call"
438 [(set (reg:DI LOWERING_REGNUM)
439 (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_START_PRIVATE_ZA_CALL))]
442 [(set_attr "type" "no_insn")]
445 ;; This pseudo-instruction is emitted as part of a call to a private-ZA
446 ;; function from a function with ZA state. It marks a natural place to restore
447 ;; the current function's ZA contents from the lazy save buffer, if that
448 ;; turns out to be necessary. The save itself is managed by the
449 ;; mode-switching pass.
450 (define_insn "aarch64_end_private_za_call"
451 [(set (reg:DI LOWERING_REGNUM)
452 (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_END_PRIVATE_ZA_CALL))]
455 [(set_attr "type" "no_insn")]
458 ;; =========================================================================
459 ;; == Loads, stores and moves
460 ;; =========================================================================
462 ;; -------------------------------------------------------------------------
463 ;; ---- Single-vector loads
464 ;; -------------------------------------------------------------------------
468 ;; -------------------------------------------------------------------------
470 (define_c_enum "unspec" [
474 (define_insn "@aarch64_sme_<optab><mode>"
475 [(set (reg:SME_ZA_I ZA_REGNUM)
477 [(reg:SME_ZA_I ZA_REGNUM)
478 (reg:DI SME_STATE_REGNUM)
479 (match_operand:DI 0 "const_int_operand")
480 (match_operand:SI 1 "register_operand" "Ucj")
481 (match_operand:<VPRED> 2 "register_operand" "Upl")
482 (match_operand:SME_ZA_I 3 "aarch64_sve_ldff1_operand" "Utf")]
484 "TARGET_STREAMING_SME"
485 "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, 0] }, %2/z, %3"
488 (define_insn "@aarch64_sme_<optab><mode>_plus"
489 [(set (reg:SME_ZA_I ZA_REGNUM)
491 [(reg:SME_ZA_I ZA_REGNUM)
492 (reg:DI SME_STATE_REGNUM)
493 (match_operand:DI 0 "const_int_operand")
494 (plus:SI (match_operand:SI 1 "register_operand" "Ucj")
495 (match_operand:SI 2 "const_int_operand"))
496 (match_operand:<VPRED> 3 "register_operand" "Upl")
497 (match_operand:SME_ZA_I 4 "aarch64_sve_ldff1_operand" "Utf")]
499 "TARGET_STREAMING_SME
500 && UINTVAL (operands[2]) < 128 / <elem_bits>"
501 "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, %2] }, %3/z, %4"
504 (define_insn "aarch64_sme_ldr0"
505 [(set (reg:VNx16QI ZA_REGNUM)
507 [(reg:VNx16QI ZA_REGNUM)
508 (reg:DI SME_STATE_REGNUM)
509 (match_operand:SI 0 "register_operand" "Ucj")
510 (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk"))]
513 "ldr\tza[%w0, 0], [%1, #0, mul vl]"
516 (define_insn "@aarch64_sme_ldrn<mode>"
517 [(set (reg:VNx16QI ZA_REGNUM)
519 [(reg:VNx16QI ZA_REGNUM)
520 (reg:DI SME_STATE_REGNUM)
521 (plus:SI (match_operand:SI 0 "register_operand" "Ucj")
522 (match_operand:SI 1 "const_int_operand"))
524 (plus:P (match_operand:P 2 "register_operand" "rk")
525 (match_operand:P 3 "aarch64_mov_operand")))]
528 && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])"
529 "ldr\tza[%w0, %1], [%2, #%1, mul vl]"
532 ;; -------------------------------------------------------------------------
534 ;; -------------------------------------------------------------------------
537 ;; -------------------------------------------------------------------------
539 (define_c_enum "unspec" [
543 (define_insn "aarch64_sme_ldr_zt0"
544 [(set (reg:V8DI ZT0_REGNUM)
545 (match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q"))
546 (use (reg:DI SME_STATE_REGNUM))]
551 ;; This version is used after calls to private-ZA functions. Since ZT0_REGNUM
552 ;; represents the current function's state, it isn't clobbered by private-ZA
553 ;; functions, so we need to make it depend on the ZA reinitialization code.
554 (define_insn "aarch64_restore_zt0"
555 [(set (reg:V8DI ZT0_REGNUM)
557 [(reg:DI SME_STATE_REGNUM)
558 (match_operand:V8DI 0 "aarch64_sync_memory_operand" "Q")]
559 UNSPEC_RESTORE_ZT0))]
564 ;; -------------------------------------------------------------------------
565 ;; ---- Single-vector stores
566 ;; -------------------------------------------------------------------------
570 ;; -------------------------------------------------------------------------
572 (define_c_enum "unspec" [
576 (define_insn "@aarch64_sme_<optab><mode>"
577 [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf")
579 [(reg:SME_ZA_I ZA_REGNUM)
580 (reg:DI SME_STATE_REGNUM)
582 (match_operand:DI 1 "const_int_operand")
583 (match_operand:SI 2 "register_operand" "Ucj")
584 (match_operand:<VPRED> 3 "register_operand" "Upl")]
586 "TARGET_STREAMING_SME"
587 "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, 0] }, %3, %0"
590 (define_insn "@aarch64_sme_<optab><mode>_plus"
591 [(set (match_operand:SME_ZA_I 0 "aarch64_sve_ldff1_operand" "+Utf")
593 [(reg:SME_ZA_I ZA_REGNUM)
594 (reg:DI SME_STATE_REGNUM)
596 (match_operand:DI 1 "const_int_operand")
597 (plus:SI (match_operand:SI 2 "register_operand" "Ucj")
598 (match_operand:SI 3 "const_int_operand"))
599 (match_operand:<VPRED> 4 "register_operand" "Upl")]
601 "TARGET_STREAMING_SME
602 && UINTVAL (operands[3]) < 128 / <elem_bits>"
603 "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, %3] }, %4, %0"
606 (define_insn "aarch64_sme_str0"
607 [(set (mem:VNx16QI (match_operand 1 "pmode_register_operand" "rk"))
609 [(reg:VNx16QI ZA_REGNUM)
610 (reg:DI SME_STATE_REGNUM)
611 (mem:VNx16QI (match_dup 1))
612 (match_operand:SI 0 "register_operand" "Ucj")]
615 "str\tza[%w0, 0], [%1, #0, mul vl]"
618 (define_insn "@aarch64_sme_strn<mode>"
620 (plus:P (match_operand:P 2 "register_operand" "rk")
621 (match_operand:P 3 "aarch64_mov_operand")))
623 [(reg:VNx16QI ZA_REGNUM)
624 (reg:DI SME_STATE_REGNUM)
625 (mem:VNx16QI (plus:P (match_dup 2) (match_dup 3)))
626 (plus:SI (match_operand:SI 0 "register_operand" "Ucj")
627 (match_operand:SI 1 "const_int_operand"))]
630 && aarch64_sme_ldr_vnum_offset_p (operands[1], operands[3])"
631 "str\tza[%w0, %1], [%2, #%1, mul vl]"
634 ;; -------------------------------------------------------------------------
636 ;; -------------------------------------------------------------------------
639 ;; -------------------------------------------------------------------------
641 (define_insn "aarch64_sme_str_zt0"
642 [(set (match_operand:V8DI 0 "aarch64_sync_memory_operand" "=Q")
643 (reg:V8DI ZT0_REGNUM))
644 (use (reg:DI SME_STATE_REGNUM))]
649 ;; -------------------------------------------------------------------------
650 ;; ---- Single-vector moves
651 ;; -------------------------------------------------------------------------
654 ;; -------------------------------------------------------------------------
656 (define_insn "@aarch64_sme_<optab><v_int_container><mode>"
657 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
659 [(reg:<V_INT_CONTAINER> ZA_REGNUM)
660 (reg:DI SME_STATE_REGNUM)
661 (match_operand:SVE_FULL 1 "register_operand" "0")
662 (match_operand:<VPRED> 2 "register_operand" "Upl")
663 (match_operand:DI 3 "const_int_operand")
664 (match_operand:SI 4 "register_operand" "Ucj")]
666 "TARGET_STREAMING_SME"
667 "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, 0]"
670 (define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
671 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
673 [(reg:<V_INT_CONTAINER> ZA_REGNUM)
674 (reg:DI SME_STATE_REGNUM)
675 (match_operand:SVE_FULL 1 "register_operand" "0")
676 (match_operand:<VPRED> 2 "register_operand" "Upl")
677 (match_operand:DI 3 "const_int_operand")
678 (plus:SI (match_operand:SI 4 "register_operand" "Ucj")
679 (match_operand:SI 5 "const_int_operand"))]
681 "TARGET_STREAMING_SME
682 && UINTVAL (operands[5]) < 128 / <elem_bits>"
683 "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, %5]"
686 (define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
687 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
689 [(reg:VNx1TI_ONLY ZA_REGNUM)
690 (reg:DI SME_STATE_REGNUM)
691 (match_operand:SVE_FULL 1 "register_operand" "0")
692 (match_operand:VNx2BI 2 "register_operand" "Upl")
693 (match_operand:DI 3 "const_int_operand")
694 (match_operand:SI 4 "register_operand" "Ucj")]
696 "TARGET_STREAMING_SME"
697 "mova\t%0.q, %2/m, za%3<hv>.q[%w4, 0]"
700 (define_insn "@aarch64_sme_<optab><v_int_container><mode>"
701 [(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
702 (unspec:<V_INT_CONTAINER>
703 [(reg:SVE_FULL ZA_REGNUM)
704 (reg:DI SME_STATE_REGNUM)
705 (match_operand:DI 0 "const_int_operand")
706 (match_operand:SI 1 "register_operand" "Ucj")
707 (match_operand:<VPRED> 2 "register_operand" "Upl")
708 (match_operand:SVE_FULL 3 "register_operand" "w")]
710 "TARGET_STREAMING_SME"
711 "mova\tza%0<hv>.<Vetype>[%w1, 0], %2/m, %3.<Vetype>"
714 (define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
715 [(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
716 (unspec:<V_INT_CONTAINER>
717 [(reg:SVE_FULL ZA_REGNUM)
718 (reg:DI SME_STATE_REGNUM)
719 (match_operand:DI 0 "const_int_operand")
720 (plus:SI (match_operand:SI 1 "register_operand" "Ucj")
721 (match_operand:SI 2 "const_int_operand"))
722 (match_operand:<VPRED> 3 "register_operand" "Upl")
723 (match_operand:SVE_FULL 4 "register_operand" "w")]
725 "TARGET_STREAMING_SME
726 && UINTVAL (operands[2]) < 128 / <elem_bits>"
727 "mova\tza%0<hv>.<Vetype>[%w1, %2], %3/m, %4.<Vetype>"
730 (define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
731 [(set (reg:VNx1TI_ONLY ZA_REGNUM)
733 [(reg:VNx1TI_ONLY ZA_REGNUM)
734 (reg:DI SME_STATE_REGNUM)
735 (match_operand:DI 0 "const_int_operand")
736 (match_operand:SI 1 "register_operand" "Ucj")
737 (match_operand:VNx2BI 2 "register_operand" "Upl")
738 (match_operand:SVE_FULL 3 "register_operand" "w")]
740 "TARGET_STREAMING_SME"
741 "mova\tza%0<hv>.q[%w1, 0], %2/m, %3.q"
744 ;; -------------------------------------------------------------------------
745 ;; ---- Multi-vector moves
746 ;; -------------------------------------------------------------------------
749 ;; -------------------------------------------------------------------------
751 (define_insn "@aarch64_sme_<optab><mode><mode>"
752 [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
754 [(reg:SVE_FULLx24 ZA_REGNUM)
755 (reg:DI SME_STATE_REGNUM)
756 (match_operand:DI 1 "const_int_operand")
757 (match_operand:SI 2 "register_operand" "Ucj")]
759 "TARGET_STREAMING_SME2"
761 operands[3] = GEN_INT (<vector_count> - 1);
762 return "mova\t%0, za%1<hv>.<Vetype>[%w2, 0:%3]";
766 (define_insn "*aarch64_sme_<optab><mode><mode>_plus"
767 [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
769 [(reg:SVE_FULLx24 ZA_REGNUM)
770 (reg:DI SME_STATE_REGNUM)
771 (match_operand:DI 1 "const_int_operand")
773 (match_operand:SI 2 "register_operand" "Ucj")
774 (match_operand:SI 3 "const_int_operand"))]
776 "TARGET_STREAMING_SME2
777 && UINTVAL (operands[3]) % <vector_count> == 0
778 && UINTVAL (operands[3]) < 128 / <elem_bits>"
780 operands[4] = GEN_INT (INTVAL (operands[3]) + <vector_count> - 1);
781 return "mova\t%0, za%1<hv>.<Vetype>[%w2, %3:%4]";
785 (define_insn "@aarch64_sme_read<mode>"
786 [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
788 [(reg:SVE_DIx24 ZA_REGNUM)
789 (reg:DI SME_STATE_REGNUM)
790 (match_operand:SI 1 "register_operand" "Uci")]
792 "TARGET_STREAMING_SME2"
793 "mova\t%0, za.d[%w1, 0, vgx<vector_count>]"
796 (define_insn "*aarch64_sme_read<mode>_plus"
797 [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
799 [(reg:SVE_DIx24 ZA_REGNUM)
800 (reg:DI SME_STATE_REGNUM)
801 (plus:SI (match_operand:SI 1 "register_operand" "Uci")
802 (match_operand:SI 2 "const_0_to_7_operand"))]
804 "TARGET_STREAMING_SME2"
805 "mova\t%0, za.d[%w1, %2, vgx<vector_count>]"
808 (define_insn "@aarch64_sme_<optab><mode><mode>"
809 [(set (reg:SVE_FULLx24 ZA_REGNUM)
811 [(reg:SVE_FULLx24 ZA_REGNUM)
812 (reg:DI SME_STATE_REGNUM)
813 (match_operand:DI 0 "const_int_operand")
814 (match_operand:SI 1 "register_operand" "Ucj")
815 (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")]
817 "TARGET_STREAMING_SME2"
819 operands[3] = GEN_INT (<vector_count> - 1);
820 return "mova\tza%0<hv>.<Vetype>[%w1, 0:%3], %2";
824 (define_insn "*aarch64_sme_<optab><mode><mode>_plus"
825 [(set (reg:SVE_FULLx24 ZA_REGNUM)
827 [(reg:SVE_FULLx24 ZA_REGNUM)
828 (reg:DI SME_STATE_REGNUM)
829 (match_operand:DI 0 "const_int_operand")
831 (match_operand:SI 1 "register_operand" "Ucj")
832 (match_operand:SI 2 "const_int_operand"))
833 (match_operand:SVE_FULLx24 3 "aligned_register_operand" "Uw<vector_count>")]
835 "TARGET_STREAMING_SME2
836 && UINTVAL (operands[2]) % <vector_count> == 0
837 && UINTVAL (operands[2]) < 128 / <elem_bits>"
839 operands[4] = GEN_INT (INTVAL (operands[2]) + <vector_count> - 1);
840 return "mova\tza%0<hv>.<Vetype>[%w1, %2:%4], %3";
844 (define_insn "@aarch64_sme_write<mode>"
845 [(set (reg:SVE_DIx24 ZA_REGNUM)
847 [(reg:SVE_DIx24 ZA_REGNUM)
848 (reg:DI SME_STATE_REGNUM)
849 (match_operand:SI 0 "register_operand" "Uci")
850 (match_operand:SVE_DIx24 1 "aligned_register_operand" "Uw<vector_count>")]
852 "TARGET_STREAMING_SME2"
853 "mova\tza.d[%w0, 0, vgx<vector_count>], %1"
856 (define_insn "*aarch64_sme_write<mode>_plus"
857 [(set (reg:SVE_DIx24 ZA_REGNUM)
859 [(reg:SVE_DIx24 ZA_REGNUM)
860 (reg:DI SME_STATE_REGNUM)
861 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
862 (match_operand:SI 1 "const_0_to_7_operand"))
863 (match_operand:SVE_DIx24 2 "aligned_register_operand" "Uw<vector_count>")]
865 "TARGET_STREAMING_SME2"
866 "mova\tza.d[%w0, %1, vgx<vector_count>], %2"
869 ;; -------------------------------------------------------------------------
871 ;; -------------------------------------------------------------------------
874 ;; -------------------------------------------------------------------------
876 (define_c_enum "unspec" [UNSPEC_SME_ZERO])
878 (define_insn "aarch64_sme_zero_za"
879 [(set (reg:VNx16QI ZA_REGNUM)
880 (unspec:VNx16QI [(reg:VNx16QI ZA_REGNUM)
881 (reg:DI SME_STATE_REGNUM)
882 (match_operand:DI 0 "const_int_operand")]
886 return aarch64_output_sme_zero_za (operands[0]);
890 (define_insn "aarch64_sme_zero_zt0"
891 [(set (reg:V8DI ZT0_REGNUM)
893 (use (reg:DI SME_STATE_REGNUM))]
898 ;; =========================================================================
899 ;; == Binary arithmetic
900 ;; =========================================================================
902 ;; -------------------------------------------------------------------------
903 ;; ---- Binary arithmetic on ZA tile
904 ;; -------------------------------------------------------------------------
908 ;; -------------------------------------------------------------------------
910 (define_insn "@aarch64_sme_<optab><mode>"
911 [(set (reg:SME_ZA_SDI ZA_REGNUM)
913 [(reg:SME_ZA_SDI ZA_REGNUM)
914 (reg:DI SME_STATE_REGNUM)
915 (match_operand:DI 0 "const_int_operand")
916 (match_operand:<VPRED> 1 "register_operand" "Upl")
917 (match_operand:<VPRED> 2 "register_operand" "Upl")
918 (match_operand:SME_ZA_SDI 3 "register_operand" "w")]
920 "TARGET_STREAMING_SME"
921 "<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>"
924 ;; -------------------------------------------------------------------------
925 ;; ---- Binary arithmetic on ZA slice
926 ;; -------------------------------------------------------------------------
929 ;; -------------------------------------------------------------------------
931 (define_insn "@aarch64_sme_<optab><mode>"
932 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
933 (unspec:SME_ZA_SDIx24
934 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
935 (reg:DI SME_STATE_REGNUM)
936 (match_operand:SI 0 "register_operand" "Uci")
937 (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>")]
938 SME_BINARY_SLICE_SDI))]
939 "TARGET_STREAMING_SME2"
940 "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
943 (define_insn "*aarch64_sme_<optab><mode>_plus"
944 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
945 (unspec:SME_ZA_SDIx24
946 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
947 (reg:DI SME_STATE_REGNUM)
948 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
949 (match_operand:SI 1 "const_0_to_7_operand"))
950 (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")]
951 SME_BINARY_SLICE_SDI))]
952 "TARGET_STREAMING_SME2"
953 "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
956 (define_insn "@aarch64_sme_<optab><mode>"
957 [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
958 (unspec:SME_ZA_SDFx24
959 [(reg:SME_ZA_SDFx24 ZA_REGNUM)
960 (reg:DI SME_STATE_REGNUM)
961 (match_operand:SI 0 "register_operand" "Uci")
962 (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")]
963 SME_BINARY_SLICE_SDF))]
964 "TARGET_STREAMING_SME2"
965 "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
968 (define_insn "*aarch64_sme_<optab><mode>_plus"
969 [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
970 (unspec:SME_ZA_SDFx24
971 [(reg:SME_ZA_SDFx24 ZA_REGNUM)
972 (reg:DI SME_STATE_REGNUM)
973 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
974 (match_operand:SI 1 "const_0_to_7_operand"))
975 (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
976 SME_BINARY_SLICE_SDF))]
977 "TARGET_STREAMING_SME2"
978 "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
981 ;; -------------------------------------------------------------------------
982 ;; ---- Binary arithmetic, writing to ZA slice
983 ;; -------------------------------------------------------------------------
987 ;; -------------------------------------------------------------------------
989 (define_insn "@aarch64_sme_<optab><mode>"
990 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
991 (unspec:SME_ZA_SDIx24
992 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
993 (reg:DI SME_STATE_REGNUM)
994 (match_operand:SI 0 "register_operand" "Uci")
995 (match_operand:SME_ZA_SDIx24 1 "aligned_register_operand" "Uw<vector_count>")
996 (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")]
997 SME_BINARY_WRITE_SLICE_SDI))]
998 "TARGET_STREAMING_SME2"
999 "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
1002 (define_insn "*aarch64_sme_<optab><mode>_plus"
1003 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
1004 (unspec:SME_ZA_SDIx24
1005 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
1006 (reg:DI SME_STATE_REGNUM)
1007 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1008 (match_operand:SI 1 "const_0_to_7_operand"))
1009 (match_operand:SME_ZA_SDIx24 2 "aligned_register_operand" "Uw<vector_count>")
1010 (match_operand:SME_ZA_SDIx24 3 "aligned_register_operand" "Uw<vector_count>")]
1011 SME_BINARY_WRITE_SLICE_SDI))]
1012 "TARGET_STREAMING_SME2"
1013 "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
1016 (define_insn "@aarch64_sme_single_<optab><mode>"
1017 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
1018 (unspec:SME_ZA_SDIx24
1019 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
1020 (reg:DI SME_STATE_REGNUM)
1021 (match_operand:SI 0 "register_operand" "Uci")
1022 (match_operand:SME_ZA_SDIx24 1 "register_operand" "w")
1023 (vec_duplicate:SME_ZA_SDIx24
1024 (match_operand:<VSINGLE> 2 "register_operand" "x"))]
1025 SME_BINARY_WRITE_SLICE_SDI))]
1026 "TARGET_STREAMING_SME2"
1027 "<sme_int_op>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
1030 (define_insn "*aarch64_sme_single_<optab><mode>_plus"
1031 [(set (reg:SME_ZA_SDIx24 ZA_REGNUM)
1032 (unspec:SME_ZA_SDIx24
1033 [(reg:SME_ZA_SDIx24 ZA_REGNUM)
1034 (reg:DI SME_STATE_REGNUM)
1035 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1036 (match_operand:SI 1 "const_0_to_7_operand"))
1037 (match_operand:SME_ZA_SDIx24 2 "register_operand" "w")
1038 (vec_duplicate:SME_ZA_SDIx24
1039 (match_operand:<VSINGLE> 3 "register_operand" "x"))]
1040 SME_BINARY_WRITE_SLICE_SDI))]
1041 "TARGET_STREAMING_SME2"
1042 "<sme_int_op>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
1045 ;; =========================================================================
1046 ;; == Ternary arithmetic
1047 ;; =========================================================================
1049 ;; -------------------------------------------------------------------------
1050 ;; ---- [INT] Dot product
1051 ;; -------------------------------------------------------------------------
1057 ;; -------------------------------------------------------------------------
1059 (define_insn "@aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
1060 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1062 [(reg:SME_ZA_SDI ZA_REGNUM)
1063 (reg:DI SME_STATE_REGNUM)
1064 (match_operand:SI 0 "register_operand" "Uci")
1065 (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
1066 (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")]
1068 "TARGET_STREAMING_SME2
1069 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1070 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1071 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
1074 (define_insn "*aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
1075 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1077 [(reg:SME_ZA_SDI ZA_REGNUM)
1078 (reg:DI SME_STATE_REGNUM)
1079 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1080 (match_operand:SI 1 "const_0_to_7_operand"))
1081 (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
1082 (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")]
1084 "TARGET_STREAMING_SME2
1085 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1086 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1087 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
1090 (define_insn "@aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
1091 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1093 [(reg:SME_ZA_SDI ZA_REGNUM)
1094 (reg:DI SME_STATE_REGNUM)
1095 (match_operand:SI 0 "register_operand" "Uci")
1096 (match_operand:SME_ZA_BHIx24 1 "register_operand" "w")
1097 (vec_duplicate:SME_ZA_BHIx24
1098 (match_operand:<VSINGLE> 2 "register_operand" "x"))]
1100 "TARGET_STREAMING_SME2
1101 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1102 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1103 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>"
1106 (define_insn "*aarch64_sme_single_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
1107 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1109 [(reg:SME_ZA_SDI ZA_REGNUM)
1110 (reg:DI SME_STATE_REGNUM)
1111 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1112 (match_operand:SI 1 "const_0_to_7_operand"))
1113 (match_operand:SME_ZA_BHIx24 2 "register_operand" "w")
1114 (vec_duplicate:SME_ZA_BHIx24
1115 (match_operand:<VSINGLE> 3 "register_operand" "x"))]
1117 "TARGET_STREAMING_SME2
1118 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1119 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1120 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>"
1123 ;; SUDOT is USDOT with the operands swapped.
1124 (define_insn "@aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>"
1125 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1127 [(reg:VNx4SI_ONLY ZA_REGNUM)
1128 (reg:DI SME_STATE_REGNUM)
1129 (match_operand:SI 0 "register_operand" "Uci")
1130 (vec_duplicate:SME_ZA_BIx24
1131 (match_operand:<VSINGLE> 2 "register_operand" "x"))
1132 (match_operand:SME_ZA_BIx24 1 "register_operand" "w")]
1134 "TARGET_STREAMING_SME2"
1135 "sudot\tza.s[%w0, 0, vgx<vector_count>], %1, %2.b"
1138 (define_insn "*aarch64_sme_single_sudot<VNx4SI_ONLY:mode><SME_ZA_BIx24:mode>_plus"
1139 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1141 [(reg:VNx4SI_ONLY ZA_REGNUM)
1142 (reg:DI SME_STATE_REGNUM)
1143 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1144 (match_operand:SI 1 "const_0_to_7_operand"))
1145 (vec_duplicate:SME_ZA_BIx24
1146 (match_operand:<VSINGLE> 3 "register_operand" "x"))
1147 (match_operand:SME_ZA_BIx24 2 "register_operand" "w")]
1149 "TARGET_STREAMING_SME2"
1150 "sudot\tza.s[%w0, %1, vgx<vector_count>], %2, %3.b"
1153 (define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
1154 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1156 [(reg:SME_ZA_SDI ZA_REGNUM)
1157 (reg:DI SME_STATE_REGNUM)
1158 (match_operand:SI 0 "register_operand" "Uci")
1159 (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
1160 (unspec:SME_ZA_BHIx24
1161 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1162 (match_operand:SI 3 "const_int_operand")]
1163 UNSPEC_SVE_LANE_SELECT)]
1164 SME_INT_DOTPROD_LANE))]
1165 "TARGET_STREAMING_SME2
1166 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1167 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1168 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>[%3]"
1171 (define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>_plus"
1172 [(set (reg:SME_ZA_SDI ZA_REGNUM)
1174 [(reg:SME_ZA_SDI ZA_REGNUM)
1175 (reg:DI SME_STATE_REGNUM)
1176 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1177 (match_operand:SI 1 "const_0_to_7_operand"))
1178 (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
1179 (unspec:SME_ZA_BHIx24
1180 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1181 (match_operand:SI 4 "const_int_operand")]
1182 UNSPEC_SVE_LANE_SELECT)]
1183 SME_INT_DOTPROD_LANE))]
1184 "TARGET_STREAMING_SME2
1185 && (<SME_ZA_SDI:elem_bits> == 32 || <SME_ZA_BHIx24:elem_bits> == 16)
1186 && (<SME_ZA_BHIx24:elem_bits> == 8 || <has_16bit_form>)"
1187 "<optab>\tza.<SME_ZA_SDI:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>[%4]"
1190 ;; -------------------------------------------------------------------------
1191 ;; ---- [INT] Ternary widening arithmetic on ZA slice
1192 ;; -------------------------------------------------------------------------
1198 ;; -------------------------------------------------------------------------
1200 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>"
1201 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1203 [(reg:VNx4SI_ONLY ZA_REGNUM)
1204 (reg:DI SME_STATE_REGNUM)
1205 (match_operand:SI 0 "register_operand" "Uci")
1206 (match_operand:SVE_FULL_BHI 1 "register_operand" "w")
1207 (match_operand:SVE_FULL_BHI 2 "register_operand" "x")]
1208 SME_INT_TERNARY_SLICE))]
1209 "TARGET_STREAMING_SME2"
1210 "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>], %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>"
1213 (define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>_plus"
1214 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1216 [(reg:VNx4SI_ONLY ZA_REGNUM)
1217 (reg:DI SME_STATE_REGNUM)
1218 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1219 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1220 (match_operand:SVE_FULL_BHI 2 "register_operand" "w")
1221 (match_operand:SVE_FULL_BHI 3 "register_operand" "x")]
1222 SME_INT_TERNARY_SLICE))]
1223 "TARGET_STREAMING_SME2"
1225 operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
1226 return "<optab><za32_long>\tza.s[%w0, %1:%4], %2.<SVE_FULL_BHI:Vetype>, %3.<SVE_FULL_BHI:Vetype>";
1230 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>"
1231 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1233 [(reg:VNx4SI_ONLY ZA_REGNUM)
1234 (reg:DI SME_STATE_REGNUM)
1235 (match_operand:SI 0 "register_operand" "Uci")
1236 (match_operand:SME_ZA_BHIx24 1 "aligned_register_operand" "Uw<vector_count>")
1237 (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")]
1238 SME_INT_TERNARY_SLICE))]
1239 "TARGET_STREAMING_SME2"
1240 "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2"
1243 (define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus"
1244 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1246 [(reg:VNx4SI_ONLY ZA_REGNUM)
1247 (reg:DI SME_STATE_REGNUM)
1248 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1249 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1250 (match_operand:SME_ZA_BHIx24 2 "aligned_register_operand" "Uw<vector_count>")
1251 (match_operand:SME_ZA_BHIx24 3 "aligned_register_operand" "Uw<vector_count>")]
1252 SME_INT_TERNARY_SLICE))]
1253 "TARGET_STREAMING_SME2"
1255 operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
1256 return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3";
1260 (define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>"
1261 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1263 [(reg:VNx4SI_ONLY ZA_REGNUM)
1264 (reg:DI SME_STATE_REGNUM)
1265 (match_operand:SI 0 "register_operand" "Uci")
1266 (match_operand:SME_ZA_BHIx24 1 "register_operand" "w")
1267 (vec_duplicate:SME_ZA_BHIx24
1268 (match_operand:<SME_ZA_BHIx24:VSINGLE> 2 "register_operand" "x"))]
1269 SME_INT_TERNARY_SLICE))]
1270 "TARGET_STREAMING_SME2"
1271 "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset>, vgx<vector_count>], %1, %2.<SME_ZA_BHIx24:Vetype>"
1274 (define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx24:mode>_plus"
1275 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1277 [(reg:VNx4SI_ONLY ZA_REGNUM)
1278 (reg:DI SME_STATE_REGNUM)
1279 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1280 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1281 (match_operand:SME_ZA_BHIx24 2 "register_operand" "w")
1282 (vec_duplicate:SME_ZA_BHIx24
1283 (match_operand:<SME_ZA_BHIx24:VSINGLE> 3 "register_operand" "x"))]
1284 SME_INT_TERNARY_SLICE))]
1285 "TARGET_STREAMING_SME2"
1287 operands[4] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
1288 return "<optab><za32_long>\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.<SME_ZA_BHIx24:Vetype>";
1292 (define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>"
1293 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1295 [(reg:VNx4SI_ONLY ZA_REGNUM)
1296 (reg:DI SME_STATE_REGNUM)
1297 (match_operand:SI 0 "register_operand" "Uci")
1298 (match_operand:SME_ZA_BHIx124 1 "<aligned_operand>" "<aligned_fpr>")
1299 (unspec:SME_ZA_BHIx124
1300 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1301 (match_operand:SI 3 "const_int_operand")]
1302 UNSPEC_SVE_LANE_SELECT)]
1303 SME_INT_TERNARY_SLICE))]
1304 "TARGET_STREAMING_SME2"
1305 "<optab><za32_long>\tza.s[%w0, 0:<za32_last_offset><vg_modifier>], %1<z_suffix>, %2.<SME_ZA_BHIx124:Vetype>[%3]"
1308 (define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_BHIx124:mode>"
1309 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1311 [(reg:VNx4SI_ONLY ZA_REGNUM)
1312 (reg:DI SME_STATE_REGNUM)
1313 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1314 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1315 (match_operand:SME_ZA_BHIx124 2 "<aligned_operand>" "<aligned_fpr>")
1316 (unspec:SME_ZA_BHIx124
1317 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1318 (match_operand:SI 4 "const_int_operand")]
1319 UNSPEC_SVE_LANE_SELECT)]
1320 SME_INT_TERNARY_SLICE))]
1321 "TARGET_STREAMING_SME2"
1323 operands[5] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
1324 return "<optab><za32_long>\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.<SME_ZA_BHIx124:Vetype>[%4]";
1328 (define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>"
1329 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1331 [(reg:VNx2DI_ONLY ZA_REGNUM)
1332 (reg:DI SME_STATE_REGNUM)
1333 (match_operand:SI 0 "register_operand" "Uci")
1334 (match_operand:VNx8HI_ONLY 1 "register_operand" "w")
1335 (match_operand:VNx8HI_ONLY 2 "register_operand" "x")]
1336 SME_INT_TERNARY_SLICE))]
1337 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1338 "<optab>ll\tza.d[%w0, 0:3], %1.h, %2.h"
1341 (define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>_plus"
1342 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1344 [(reg:VNx2DI_ONLY ZA_REGNUM)
1345 (reg:DI SME_STATE_REGNUM)
1346 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1347 (match_operand:SI 1 "const_<za64_offset_range>_operand"))
1348 (match_operand:VNx8HI_ONLY 2 "register_operand" "w")
1349 (match_operand:VNx8HI_ONLY 3 "register_operand" "x")]
1350 SME_INT_TERNARY_SLICE))]
1351 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1353 operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
1354 return "<optab>ll\tza.d[%w0, %1:%4], %2.h, %3.h";
1358 (define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>"
1359 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1361 [(reg:VNx2DI_ONLY ZA_REGNUM)
1362 (reg:DI SME_STATE_REGNUM)
1363 (match_operand:SI 0 "register_operand" "Uci")
1364 (match_operand:SME_ZA_HIx24 1 "aligned_register_operand" "Uw<vector_count>")
1365 (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")]
1366 SME_INT_TERNARY_SLICE))]
1367 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1368 "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2"
1371 (define_insn "*aarch64_sme_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus"
1372 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1374 [(reg:VNx2DI_ONLY ZA_REGNUM)
1375 (reg:DI SME_STATE_REGNUM)
1376 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1377 (match_operand:SI 1 "const_<za64_offset_range>_operand"))
1378 (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")
1379 (match_operand:SME_ZA_HIx24 3 "aligned_register_operand" "Uw<vector_count>")]
1380 SME_INT_TERNARY_SLICE))]
1381 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1383 operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
1384 return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3";
1388 (define_insn "@aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>"
1389 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1391 [(reg:VNx2DI_ONLY ZA_REGNUM)
1392 (reg:DI SME_STATE_REGNUM)
1393 (match_operand:SI 0 "register_operand" "Uci")
1394 (match_operand:SME_ZA_HIx24 1 "register_operand" "w")
1395 (vec_duplicate:SME_ZA_HIx24
1396 (match_operand:<SME_ZA_HIx24:VSINGLE> 2 "register_operand" "x"))]
1397 SME_INT_TERNARY_SLICE))]
1398 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1399 "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2.h"
1402 (define_insn "*aarch64_sme_single_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx24:mode>_plus"
1403 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1405 [(reg:VNx2DI_ONLY ZA_REGNUM)
1406 (reg:DI SME_STATE_REGNUM)
1407 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1408 (match_operand:SI 1 "const_<za64_offset_range>_operand"))
1409 (match_operand:SME_ZA_HIx24 2 "register_operand" "w")
1410 (vec_duplicate:SME_ZA_HIx24
1411 (match_operand:<SME_ZA_HIx24:VSINGLE> 3 "register_operand" "x"))]
1412 SME_INT_TERNARY_SLICE))]
1413 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1415 operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
1416 return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3.h";
1420 (define_insn "@aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>"
1421 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1423 [(reg:VNx2DI_ONLY ZA_REGNUM)
1424 (reg:DI SME_STATE_REGNUM)
1425 (match_operand:SI 0 "register_operand" "Uci")
1426 (match_operand:SME_ZA_HIx124 1 "<aligned_operand>" "<aligned_fpr>")
1427 (unspec:SME_ZA_HIx124
1428 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1429 (match_operand:SI 3 "const_int_operand")]
1430 UNSPEC_SVE_LANE_SELECT)]
1431 SME_INT_TERNARY_SLICE))]
1432 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1433 "<optab>ll\tza.d[%w0, 0:3<vg_modifier>], %1<z_suffix>, %2.h[%3]"
1436 (define_insn "*aarch64_sme_lane_<optab><VNx2DI_ONLY:mode><SME_ZA_HIx124:mode>"
1437 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1439 [(reg:VNx2DI_ONLY ZA_REGNUM)
1440 (reg:DI SME_STATE_REGNUM)
1441 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1442 (match_operand:SI 1 "const_<za64_offset_range>_operand"))
1443 (match_operand:SME_ZA_HIx124 2 "<aligned_operand>" "<aligned_fpr>")
1444 (unspec:SME_ZA_HIx124
1445 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1446 (match_operand:SI 4 "const_int_operand")]
1447 UNSPEC_SVE_LANE_SELECT)]
1448 SME_INT_TERNARY_SLICE))]
1449 "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
1451 operands[5] = GEN_INT (INTVAL (operands[1]) + 3);
1452 return "<optab>ll\tza.d[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]";
1456 ;; -------------------------------------------------------------------------
1457 ;; ---- [INT] Sum of outer products
1458 ;; -------------------------------------------------------------------------
1469 ;; -------------------------------------------------------------------------
1471 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>"
1472 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1474 [(reg:VNx4SI_ONLY ZA_REGNUM)
1475 (reg:DI SME_STATE_REGNUM)
1476 (match_operand:DI 0 "const_int_operand")
1477 (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
1478 (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
1479 (match_operand:VNx16QI_ONLY 3 "register_operand" "w")
1480 (match_operand:VNx16QI_ONLY 4 "register_operand" "w")]
1482 "TARGET_STREAMING_SME"
1483 "<optab>\tza%0.s, %1/m, %2/m, %3.b, %4.b"
1486 (define_insn "@aarch64_sme_<optab><VNx2DI_ONLY:mode><VNx8HI_ONLY:mode>"
1487 [(set (reg:VNx2DI_ONLY ZA_REGNUM)
1489 [(reg:VNx2DI_ONLY ZA_REGNUM)
1490 (reg:DI SME_STATE_REGNUM)
1491 (match_operand:DI 0 "const_int_operand")
1492 (match_operand:<VNx2DI_ONLY:VPRED> 1 "register_operand" "Upl")
1493 (match_operand:<VNx2DI_ONLY:VPRED> 2 "register_operand" "Upl")
1494 (match_operand:VNx8HI_ONLY 3 "register_operand" "w")
1495 (match_operand:VNx8HI_ONLY 4 "register_operand" "w")]
1497 "TARGET_STREAMING_SME && TARGET_SME_I16I64"
1498 "<optab>\tza%0.d, %1/m, %2/m, %3.h, %4.h"
1501 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx8HI_ONLY:mode>"
1502 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1504 [(reg:VNx4SI_ONLY ZA_REGNUM)
1505 (reg:DI SME_STATE_REGNUM)
1506 (match_operand:DI 0 "const_int_operand")
1507 (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
1508 (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
1509 (match_operand:VNx8HI_ONLY 3 "register_operand" "w")
1510 (match_operand:VNx8HI_ONLY 4 "register_operand" "w")]
1512 "TARGET_STREAMING_SME2"
1513 "<optab>\tza%0.s, %1/m, %2/m, %3.h, %4.h"
1516 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><VNx4SI_ONLY:mode>"
1517 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1519 [(reg:VNx4SI_ONLY ZA_REGNUM)
1520 (reg:DI SME_STATE_REGNUM)
1521 (match_operand:DI 0 "const_int_operand")
1522 (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
1523 (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
1524 (match_operand:VNx4SI_ONLY 3 "register_operand" "w")
1525 (match_operand:VNx4SI_ONLY 4 "register_operand" "w")]
1527 "TARGET_STREAMING_SME2"
1528 "<optab>\tza%0.s, %1/m, %2/m, %3.s, %4.s"
1531 ;; -------------------------------------------------------------------------
1532 ;; ---- [FP] Dot product
1533 ;; -------------------------------------------------------------------------
1537 ;; -------------------------------------------------------------------------
1539 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
1540 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1542 [(reg:VNx4SI_ONLY ZA_REGNUM)
1543 (reg:DI SME_STATE_REGNUM)
1544 (match_operand:SI 0 "register_operand" "Uci")
1545 (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
1546 (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")]
1548 "TARGET_STREAMING_SME2"
1549 "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2"
1552 (define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
1553 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1555 [(reg:VNx4SI_ONLY ZA_REGNUM)
1556 (reg:DI SME_STATE_REGNUM)
1557 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1558 (match_operand:SI 1 "const_0_to_7_operand"))
1559 (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
1560 (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")]
1562 "TARGET_STREAMING_SME2"
1563 "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3"
1566 (define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
1567 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1569 [(reg:VNx4SI_ONLY ZA_REGNUM)
1570 (reg:DI SME_STATE_REGNUM)
1571 (match_operand:SI 0 "register_operand" "Uci")
1572 (match_operand:SME_ZA_HFx24 1 "register_operand" "w")
1573 (vec_duplicate:SME_ZA_HFx24
1574 (match_operand:<VSINGLE> 2 "register_operand" "x"))]
1576 "TARGET_STREAMING_SME2"
1577 "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h"
1580 (define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
1581 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1583 [(reg:VNx4SI_ONLY ZA_REGNUM)
1584 (reg:DI SME_STATE_REGNUM)
1585 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1586 (match_operand:SI 1 "const_0_to_7_operand"))
1587 (match_operand:SME_ZA_HFx24 2 "register_operand" "w")
1588 (vec_duplicate:SME_ZA_HFx24
1589 (match_operand:<VSINGLE> 3 "register_operand" "x"))]
1591 "TARGET_STREAMING_SME2"
1592 "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h"
1595 (define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
1596 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1598 [(reg:VNx4SI_ONLY ZA_REGNUM)
1599 (reg:DI SME_STATE_REGNUM)
1600 (match_operand:SI 0 "register_operand" "Uci")
1601 (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
1602 (unspec:SME_ZA_HFx24
1603 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1604 (match_operand:SI 3 "const_int_operand")]
1605 UNSPEC_SVE_LANE_SELECT)]
1606 SME_FP_DOTPROD_LANE))]
1607 "TARGET_STREAMING_SME2"
1608 "<b><optab>\tza.s[%w0, 0, vgx<vector_count>], %1, %2.h[%3]"
1611 (define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
1612 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1614 [(reg:VNx4SI_ONLY ZA_REGNUM)
1615 (reg:DI SME_STATE_REGNUM)
1616 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1617 (match_operand:SI 1 "const_0_to_7_operand"))
1618 (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
1619 (unspec:SME_ZA_HFx24
1620 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1621 (match_operand:SI 4 "const_int_operand")]
1622 UNSPEC_SVE_LANE_SELECT)]
1623 SME_FP_DOTPROD_LANE))]
1624 "TARGET_STREAMING_SME2"
1625 "<b><optab>\tza.s[%w0, %1, vgx<vector_count>], %2, %3.h[%4]"
1628 ;; -------------------------------------------------------------------------
1629 ;; ---- [FP] Ternary arithmetic on ZA slice
1630 ;; -------------------------------------------------------------------------
1634 ;; -------------------------------------------------------------------------
1636 (define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
1637 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1638 (unspec:SME_ZA_SDF_I
1639 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1640 (reg:DI SME_STATE_REGNUM)
1641 (match_operand:SI 0 "register_operand" "Uci")
1642 (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
1643 (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
1644 SME_FP_TERNARY_SLICE))]
1646 && TARGET_STREAMING_SME
1647 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1648 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
1651 (define_insn "*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
1652 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1653 (unspec:SME_ZA_SDF_I
1654 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1655 (reg:DI SME_STATE_REGNUM)
1656 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1657 (match_operand:SI 1 "const_0_to_7_operand"))
1658 (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
1659 (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")]
1660 SME_FP_TERNARY_SLICE))]
1662 && TARGET_STREAMING_SME
1663 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1664 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
1667 (define_insn "@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
1668 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1669 (unspec:SME_ZA_SDF_I
1670 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1671 (reg:DI SME_STATE_REGNUM)
1672 (match_operand:SI 0 "register_operand" "Uci")
1673 (match_operand:SME_ZA_SDFx24 1 "register_operand" "w")
1674 (vec_duplicate:SME_ZA_SDFx24
1675 (match_operand:<VSINGLE> 2 "register_operand" "x"))]
1676 SME_FP_TERNARY_SLICE))]
1678 && TARGET_STREAMING_SME
1679 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1680 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>"
1683 (define_insn "*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
1684 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1685 (unspec:SME_ZA_SDF_I
1686 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1687 (reg:DI SME_STATE_REGNUM)
1688 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1689 (match_operand:SI 1 "const_0_to_7_operand"))
1690 (match_operand:SME_ZA_SDFx24 2 "register_operand" "w")
1691 (vec_duplicate:SME_ZA_SDFx24
1692 (match_operand:<VSINGLE> 3 "register_operand" "x"))]
1693 SME_FP_TERNARY_SLICE))]
1695 && TARGET_STREAMING_SME
1696 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1697 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>"
1700 (define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
1701 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1702 (unspec:SME_ZA_SDF_I
1703 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1704 (reg:DI SME_STATE_REGNUM)
1705 (match_operand:SI 0 "register_operand" "Uci")
1706 (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
1707 (unspec:SME_ZA_SDFx24
1708 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1709 (match_operand:SI 3 "const_int_operand")]
1710 UNSPEC_SVE_LANE_SELECT)]
1711 SME_FP_TERNARY_SLICE))]
1713 && TARGET_STREAMING_SME
1714 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1715 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>[%3]"
1718 (define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
1719 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1720 (unspec:SME_ZA_SDF_I
1721 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1722 (reg:DI SME_STATE_REGNUM)
1723 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1724 (match_operand:SI 1 "const_0_to_7_operand"))
1725 (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
1726 (unspec:SME_ZA_SDFx24
1727 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1728 (match_operand:SI 4 "const_int_operand")]
1729 UNSPEC_SVE_LANE_SELECT)]
1730 SME_FP_TERNARY_SLICE))]
1732 && TARGET_STREAMING_SME
1733 && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
1734 "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>[%4]"
1737 ;; -------------------------------------------------------------------------
1738 ;; ---- [FP] Ternary widening arithmetic on ZA slice
1739 ;; -------------------------------------------------------------------------
1745 ;; -------------------------------------------------------------------------
1747 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>"
1748 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1750 [(reg:VNx4SI_ONLY ZA_REGNUM)
1751 (reg:DI SME_STATE_REGNUM)
1752 (match_operand:SI 0 "register_operand" "Uci")
1753 (match_operand:SVE_FULL_HF 1 "register_operand" "w")
1754 (match_operand:SVE_FULL_HF 2 "register_operand" "x")]
1755 SME_FP_TERNARY_SLICE))]
1756 "TARGET_STREAMING_SME2"
1757 "<b><optab>l\tza.s[%w0, 0:1], %1.h, %2.h"
1760 (define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>_plus"
1761 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1763 [(reg:VNx4SI_ONLY ZA_REGNUM)
1764 (reg:DI SME_STATE_REGNUM)
1765 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1766 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1767 (match_operand:SVE_FULL_HF 2 "register_operand" "w")
1768 (match_operand:SVE_FULL_HF 3 "register_operand" "x")]
1769 SME_FP_TERNARY_SLICE))]
1770 "TARGET_STREAMING_SME2"
1772 operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
1773 return "<b><optab>l\tza.s[%w0, %1:%4], %2.h, %3.h";
1777 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
1778 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1780 [(reg:VNx4SI_ONLY ZA_REGNUM)
1781 (reg:DI SME_STATE_REGNUM)
1782 (match_operand:SI 0 "register_operand" "Uci")
1783 (match_operand:SME_ZA_HFx24 1 "aligned_register_operand" "Uw<vector_count>")
1784 (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")]
1785 SME_FP_TERNARY_SLICE))]
1786 "TARGET_STREAMING_SME2"
1787 "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2"
1790 (define_insn "*aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
1791 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1793 [(reg:VNx4SI_ONLY ZA_REGNUM)
1794 (reg:DI SME_STATE_REGNUM)
1795 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1796 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1797 (match_operand:SME_ZA_HFx24 2 "aligned_register_operand" "Uw<vector_count>")
1798 (match_operand:SME_ZA_HFx24 3 "aligned_register_operand" "Uw<vector_count>")]
1799 SME_FP_TERNARY_SLICE))]
1800 "TARGET_STREAMING_SME2"
1802 operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
1803 return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3";
1807 (define_insn "@aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
1808 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1810 [(reg:VNx4SI_ONLY ZA_REGNUM)
1811 (reg:DI SME_STATE_REGNUM)
1812 (match_operand:SI 0 "register_operand" "Uci")
1813 (match_operand:SME_ZA_HFx24 1 "register_operand" "w")
1814 (vec_duplicate:SME_ZA_HFx24
1815 (match_operand:<SME_ZA_HFx24:VSINGLE> 2 "register_operand" "x"))]
1816 SME_FP_TERNARY_SLICE))]
1817 "TARGET_STREAMING_SME2"
1818 "<b><optab>l\tza.s[%w0, 0:1, vgx<vector_count>], %1, %2.h"
1821 (define_insn "*aarch64_sme_single_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plus"
1822 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1824 [(reg:VNx4SI_ONLY ZA_REGNUM)
1825 (reg:DI SME_STATE_REGNUM)
1826 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1827 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1828 (match_operand:SME_ZA_HFx24 2 "register_operand" "w")
1829 (vec_duplicate:SME_ZA_HFx24
1830 (match_operand:<SME_ZA_HFx24:VSINGLE> 3 "register_operand" "x"))]
1831 SME_FP_TERNARY_SLICE))]
1832 "TARGET_STREAMING_SME2"
1834 operands[4] = GEN_INT (INTVAL (operands[1]) + 1);
1835 return "<b><optab>l\tza.s[%w0, %1:%4, vgx<vector_count>], %2, %3.h";
1839 (define_insn "@aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>"
1840 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1842 [(reg:VNx4SI_ONLY ZA_REGNUM)
1843 (reg:DI SME_STATE_REGNUM)
1844 (match_operand:SI 0 "register_operand" "Uci")
1845 (match_operand:SME_ZA_HFx124 1 "<aligned_operand>" "<aligned_fpr>")
1846 (unspec:SME_ZA_HFx124
1847 [(match_operand:<VSINGLE> 2 "register_operand" "x")
1848 (match_operand:SI 3 "const_int_operand")]
1849 UNSPEC_SVE_LANE_SELECT)]
1850 SME_FP_TERNARY_SLICE))]
1851 "TARGET_STREAMING_SME2"
1852 "<b><optab>l\tza.s[%w0, 0:1<vg_modifier>], %1<z_suffix>, %2.h[%3]"
1855 (define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>"
1856 [(set (reg:VNx4SI_ONLY ZA_REGNUM)
1858 [(reg:VNx4SI_ONLY ZA_REGNUM)
1859 (reg:DI SME_STATE_REGNUM)
1860 (plus:SI (match_operand:SI 0 "register_operand" "Uci")
1861 (match_operand:SI 1 "const_<za32_offset_range>_operand"))
1862 (match_operand:SME_ZA_HFx124 2 "<aligned_operand>" "<aligned_fpr>")
1863 (unspec:SME_ZA_HFx124
1864 [(match_operand:<VSINGLE> 3 "register_operand" "x")
1865 (match_operand:SI 4 "const_int_operand")]
1866 UNSPEC_SVE_LANE_SELECT)]
1867 SME_FP_TERNARY_SLICE))]
1868 "TARGET_STREAMING_SME2"
1870 operands[5] = GEN_INT (INTVAL (operands[1]) + 1);
1871 return "<b><optab>l\tza.s[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]";
1875 ;; -------------------------------------------------------------------------
1876 ;; ---- [FP] Sum of outer products
1877 ;; -------------------------------------------------------------------------
1883 ;; -------------------------------------------------------------------------
1885 (define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>"
1886 [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
1887 (unspec:SME_ZA_SDF_I
1888 [(reg:SME_ZA_SDF_I ZA_REGNUM)
1889 (reg:DI SME_STATE_REGNUM)
1890 (match_operand:DI 0 "const_int_operand")
1891 (match_operand:<SME_ZA_SDF_I:VPRED> 1 "register_operand" "Upl")
1892 (match_operand:<SME_ZA_SDF_I:VPRED> 2 "register_operand" "Upl")
1893 (match_operand:SME_MOP_HSDF 3 "register_operand" "w")
1894 (match_operand:SME_MOP_HSDF 4 "register_operand" "w")]
1896 "TARGET_STREAMING_SME
1897 && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)"
1898 "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>"
1901 ;; =========================================================================
1903 ;; =========================================================================
1905 ;; -------------------------------------------------------------------------
1906 ;; ---- Table lookup
1907 ;; -------------------------------------------------------------------------
1911 ;; -------------------------------------------------------------------------
1913 (define_c_enum "unspec" [
1917 (define_insn "@aarch64_sme_lut<LUTI_BITS><mode>"
1918 [(set (match_operand:SVE_FULL_BHS 0 "register_operand" "=w")
1919 (unspec:SVE_FULL_BHS
1920 [(reg:V8DI ZT0_REGNUM)
1921 (reg:DI SME_STATE_REGNUM)
1922 (match_operand:VNx16QI 1 "register_operand" "w")
1923 (match_operand:DI 2 "const_int_operand")
1924 (const_int LUTI_BITS)]
1926 "TARGET_STREAMING_SME2"
1927 "luti<LUTI_BITS>\t%0.<Vetype>, zt0, %1[%2]"
1930 (define_insn "@aarch64_sme_lut<LUTI_BITS><mode>"
1931 [(set (match_operand:SVE_BHSx24 0 "aligned_register_operand" "=Uw<vector_count>")
1933 [(reg:V8DI ZT0_REGNUM)
1934 (reg:DI SME_STATE_REGNUM)
1935 (match_operand:VNx16QI 1 "register_operand" "w")
1936 (match_operand:DI 2 "const_int_operand")
1937 (const_int LUTI_BITS)]
1939 "TARGET_STREAMING_SME2
1940 && !(<LUTI_BITS> == 4 && <vector_count> == 4 && <elem_bits> == 8)"
1941 "luti<LUTI_BITS>\t%0, zt0, %1[%2]"