Daily bump.
[official-gcc.git] / gcc / config / aarch64 / aarch64-sve-builtins-sme.cc
blobb66b35ae60b7a4a6ae0fb7044c85811a9ad88dd1
1 /* ACLE support for AArch64 SME.
2 Copyright (C) 2023-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "tree.h"
25 #include "rtl.h"
26 #include "tm_p.h"
27 #include "memmodel.h"
28 #include "insn-codes.h"
29 #include "optabs.h"
30 #include "recog.h"
31 #include "expr.h"
32 #include "basic-block.h"
33 #include "function.h"
34 #include "fold-const.h"
35 #include "gimple.h"
36 #include "gimple-iterator.h"
37 #include "gimplify.h"
38 #include "explow.h"
39 #include "emit-rtl.h"
40 #include "aarch64-sve-builtins.h"
41 #include "aarch64-sve-builtins-shapes.h"
42 #include "aarch64-sve-builtins-base.h"
43 #include "aarch64-sve-builtins-sme.h"
44 #include "aarch64-sve-builtins-functions.h"
46 using namespace aarch64_sve;
48 namespace {
50 class load_store_za_zt0_base : public function_base
52 public:
53 tree
54 memory_scalar_type (const function_instance &) const override
56 return void_type_node;
60 class read_write_za_base : public function_base
62 public:
63 constexpr read_write_za_base (int unspec) : m_unspec (unspec) {}
65 rtx
66 expand (function_expander &e) const override
68 auto za_mode = e.vector_mode (0);
69 auto z_mode = e.tuple_mode (1);
70 auto icode = (za_mode == VNx1TImode
71 ? code_for_aarch64_sme (m_unspec, za_mode, z_mode)
72 : code_for_aarch64_sme (m_unspec, z_mode, z_mode));
73 return e.use_exact_insn (icode);
76 int m_unspec;
79 using load_za_base = add_call_properties<load_store_za_zt0_base,
80 CP_READ_MEMORY | CP_READ_ZA
81 | CP_WRITE_ZA>;
83 using store_za_base = add_call_properties<load_store_za_zt0_base,
84 CP_WRITE_MEMORY | CP_READ_ZA>;
86 /* E is a load or store intrinsic that accesses a ZA slice of mode MEM_MODE.
87 The intrinsic has a vnum parameter at index ARGNO. Return true if the
88 vnum argument is a constant that is a valid ZA offset for the underlying
89 instruction. */
91 static bool
92 has_in_range_vnum_arg (function_expander &e, machine_mode mem_mode,
93 unsigned int argno)
95 return (e.mode_suffix_id == MODE_vnum
96 && CONST_INT_P (e.args[argno])
97 && UINTVAL (e.args[argno]) < 16 / GET_MODE_UNIT_SIZE (mem_mode));
100 /* E is a ZA load or store intrinsic that uses instruction ICODE. Add a
101 32-bit operand that gives the total ZA slice. (The instruction hard-codes
102 the constant offset to 0, so there is no operand for that.)
104 Argument ARGNO is the intrinsic's slice argument. If the intrinsic is
105 a _vnum intrinsic, argument VNUM_ARGNO is the intrinsic's vnum operand,
106 which must be added to the slice argument. */
108 static void
109 add_load_store_slice_operand (function_expander &e, insn_code icode,
110 unsigned int argno, unsigned int vnum_argno)
112 rtx base = e.args[argno];
113 if (e.mode_suffix_id == MODE_vnum)
115 rtx vnum = force_lowpart_subreg (SImode, e.args[vnum_argno], DImode);
116 base = simplify_gen_binary (PLUS, SImode, base, vnum);
118 e.add_input_operand (icode, base);
121 /* Add a memory operand for ZA LD1 or ST1 intrinsic E. BASE_ARGNO is
122 the index of the base argument. */
124 static void
125 add_load_store_operand (function_expander &e, unsigned int base_argno)
127 auto mode = e.vector_mode (0);
128 rtx base = e.get_contiguous_base (mode, base_argno, base_argno + 1,
129 AARCH64_FL_SM_ON);
130 auto mem = gen_rtx_MEM (mode, force_reg (Pmode, base));
131 set_mem_align (mem, BITS_PER_UNIT);
132 e.add_fixed_operand (mem);
135 /* Expand ZA LDR or STR intrinsic E. There are two underlying instructions:
137 - BASE_CODE has a zero ZA slice offset
138 - VNUM_CODE has a constant operand for the ZA slice offset. */
140 static rtx
141 expand_ldr_str_za (function_expander &e, insn_code base_code,
142 insn_code vnum_code)
144 if (has_in_range_vnum_arg (e, VNx16QImode, 2))
146 rtx mem_offset = aarch64_sme_vq_immediate (Pmode,
147 UINTVAL (e.args[2]) * 16,
148 AARCH64_ISA_MODE);
149 e.add_input_operand (vnum_code, e.args[0]);
150 e.add_input_operand (vnum_code, e.args[2]);
151 e.add_input_operand (vnum_code, e.args[1]);
152 e.add_input_operand (vnum_code, mem_offset);
153 return e.generate_insn (vnum_code);
155 else
157 rtx base = e.get_contiguous_base (VNx16QImode, 1, 2, AARCH64_FL_SM_ON);
158 add_load_store_slice_operand (e, base_code, 0, 2);
159 e.add_input_operand (base_code, base);
160 return e.generate_insn (base_code);
164 /* Use instruction ICODE to expand ZT0 load or store E. */
166 static rtx
167 expand_ldr_str_zt0 (function_expander &e, insn_code icode)
169 rtx base = e.convert_to_pmode (e.args[1]);
170 rtx mem = gen_rtx_MEM (V8DImode, force_reg (Pmode, base));
171 e.add_fixed_operand (mem);
172 return e.generate_insn (icode);
175 /* Expand ZA LD1 or ST1 intrinsic E. UNSPEC is the load or store unspec.
176 IS_LOAD is true if E is a load, false if it is a store. */
178 static rtx
179 expand_ld1_st1 (function_expander &e, int unspec, bool is_load)
181 bool is_vnum = has_in_range_vnum_arg (e, e.vector_mode (0), 4);
182 auto icode = (is_vnum
183 ? code_for_aarch64_sme_plus (unspec, e.vector_mode (0))
184 : code_for_aarch64_sme (unspec, e.vector_mode (0)));
185 if (!is_load)
186 add_load_store_operand (e, 3);
187 e.add_input_operand (icode, e.args[0]);
188 if (is_vnum)
190 e.add_input_operand (icode, e.args[1]);
191 e.add_input_operand (icode, e.args[4]);
193 else
194 add_load_store_slice_operand (e, icode, 1, 4);
195 e.add_input_operand (icode, e.args[2]);
196 if (is_load)
197 add_load_store_operand (e, 3);
198 return e.generate_insn (icode);
201 class arm_has_sme_impl : public function_base
203 gimple *
204 fold (gimple_folder &f) const override
206 if (TARGET_SME)
207 return f.fold_to_cstu (1);
208 return nullptr;
212 expand (function_expander &e) const override
214 if (TARGET_SME)
215 return const1_rtx;
216 emit_insn (gen_aarch64_get_sme_state ());
217 return expand_simple_binop (DImode, LSHIFTRT,
218 gen_rtx_REG (DImode, R0_REGNUM),
219 gen_int_mode (63, QImode),
220 e.possible_target, true, OPTAB_LIB_WIDEN);
224 class arm_in_streaming_mode_impl : public function_base
226 gimple *
227 fold (gimple_folder &f) const override
229 if (TARGET_STREAMING)
230 return f.fold_to_cstu (1);
231 if (TARGET_NON_STREAMING)
232 return f.fold_to_cstu (0);
233 return nullptr;
237 expand (function_expander &e) const override
239 if (TARGET_STREAMING)
240 return const1_rtx;
242 if (TARGET_NON_STREAMING)
243 return const0_rtx;
245 rtx reg;
246 if (TARGET_SME)
248 reg = gen_reg_rtx (DImode);
249 emit_insn (gen_aarch64_read_svcr (reg));
251 else
253 emit_insn (gen_aarch64_get_sme_state ());
254 reg = gen_rtx_REG (DImode, R0_REGNUM);
256 return expand_simple_binop (DImode, AND, reg, gen_int_mode (1, DImode),
257 e.possible_target, true, OPTAB_LIB_WIDEN);
261 /* Implements svcnts[bhwd]. */
262 class svcnts_bhwd_impl : public function_base
264 public:
265 constexpr svcnts_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {}
267 unsigned int
268 get_shift () const
270 return exact_log2 (GET_MODE_UNIT_SIZE (m_ref_mode));
273 gimple *
274 fold (gimple_folder &f) const override
276 if (TARGET_STREAMING)
277 return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode));
278 return nullptr;
282 expand (function_expander &e) const override
284 rtx cntsb = aarch64_sme_vq_immediate (DImode, 16, AARCH64_ISA_MODE);
285 auto shift = get_shift ();
286 if (!shift)
287 return cntsb;
289 return expand_simple_binop (DImode, LSHIFTRT, cntsb,
290 gen_int_mode (shift, QImode),
291 e.possible_target, true, OPTAB_LIB_WIDEN);
294 /* The mode of the vector associated with the [bhwd] suffix. */
295 machine_mode m_ref_mode;
298 class svld1_za_impl : public load_za_base
300 public:
301 constexpr svld1_za_impl (int unspec) : m_unspec (unspec) {}
304 expand (function_expander &e) const override
306 return expand_ld1_st1 (e, m_unspec, true);
309 int m_unspec;
312 class svldr_za_impl : public load_za_base
314 public:
316 expand (function_expander &e) const override
318 return expand_ldr_str_za (e, CODE_FOR_aarch64_sme_ldr0,
319 code_for_aarch64_sme_ldrn (Pmode));
323 class svldr_zt_impl : public load_store_za_zt0_base
325 public:
326 unsigned int
327 call_properties (const function_instance &) const override
329 return CP_READ_MEMORY | CP_WRITE_ZT0;
333 expand (function_expander &e) const override
335 return expand_ldr_str_zt0 (e, CODE_FOR_aarch64_sme_ldr_zt0);
339 class svluti_lane_zt_impl : public read_zt0<function_base>
341 public:
342 CONSTEXPR svluti_lane_zt_impl (unsigned int bits) : m_bits (bits) {}
345 expand (function_expander &e) const override
347 auto mode = e.tuple_mode (0);
348 e.args.ordered_remove (0);
349 return e.use_exact_insn (code_for_aarch64_sme_lut (m_bits, mode));
352 unsigned int m_bits;
355 class svread_za_impl : public function_base
357 public:
358 unsigned int
359 call_properties (const function_instance &) const override
361 return CP_READ_ZA;
365 expand (function_expander &e) const override
367 machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
368 rtx res = e.use_exact_insn (code_for_aarch64_sme_read (mode));
369 return aarch64_sve_reinterpret (e.result_mode (), res);
373 using svread_za_tile_impl = add_call_properties<read_write_za_base,
374 CP_READ_ZA>;
376 class svst1_za_impl : public store_za_base
378 public:
379 constexpr svst1_za_impl (int unspec) : m_unspec (unspec) {}
382 expand (function_expander &e) const override
384 return expand_ld1_st1 (e, m_unspec, false);
387 int m_unspec;
390 class svstr_za_impl : public store_za_base
392 public:
394 expand (function_expander &e) const override
396 return expand_ldr_str_za (e, CODE_FOR_aarch64_sme_str0,
397 code_for_aarch64_sme_strn (Pmode));
401 class svstr_zt_impl : public load_store_za_zt0_base
403 public:
404 unsigned int
405 call_properties (const function_instance &) const override
407 return CP_WRITE_MEMORY | CP_READ_ZT0;
411 expand (function_expander &e) const override
413 return expand_ldr_str_zt0 (e, CODE_FOR_aarch64_sme_str_zt0);
417 class svsudot_za_impl : public read_write_za<function_base>
419 public:
421 expand (function_expander &e) const override
423 if (e.mode_suffix_id == MODE_single)
425 auto icode = code_for_aarch64_sme_single_sudot (e.vector_mode (0),
426 e.tuple_mode (1));
427 return e.use_exact_insn (icode);
429 std::swap (e.args[1], e.args[2]);
430 return e.use_exact_insn (code_for_aarch64_sme (UNSPEC_SME_USDOT,
431 e.vector_mode (0),
432 e.tuple_mode (1)));
436 class svundef_za_impl : public write_za<function_base>
438 public:
440 expand (function_expander &) const override
442 rtx target = gen_rtx_REG (VNx16QImode, ZA_REGNUM);
443 emit_clobber (copy_rtx (target));
444 return const0_rtx;
448 class svwrite_za_impl : public function_base
450 public:
451 unsigned int
452 call_properties (const function_instance &) const override
454 return CP_WRITE_ZA;
458 expand (function_expander &e) const override
460 machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
461 e.args[1] = aarch64_sve_reinterpret (mode, e.args[1]);
462 return e.use_exact_insn (code_for_aarch64_sme_write (mode));
466 using svwrite_za_tile_impl = add_call_properties<read_write_za_base,
467 CP_READ_ZA | CP_WRITE_ZA>;
469 class svzero_mask_za_impl : public write_za<function_base>
471 public:
473 expand (function_expander &e) const override
475 return e.use_exact_insn (CODE_FOR_aarch64_sme_zero_za);
479 class svzero_za_impl : public write_za<function_base>
481 public:
483 expand (function_expander &) const override
485 emit_insn (gen_aarch64_sme_zero_za (gen_int_mode (0xff, SImode)));
486 return const0_rtx;
490 class svzero_zt_impl : public write_zt0<function_base>
492 public:
494 expand (function_expander &) const override
496 emit_insn (gen_aarch64_sme_zero_zt0 ());
497 return const0_rtx;
501 } /* end anonymous namespace */
503 namespace aarch64_sve {
505 FUNCTION (arm_has_sme, arm_has_sme_impl, )
506 FUNCTION (arm_in_streaming_mode, arm_in_streaming_mode_impl, )
507 FUNCTION (svadd_za, sme_1mode_function, (UNSPEC_SME_ADD, UNSPEC_SME_ADD,
508 UNSPEC_SME_FADD))
509 FUNCTION (svadd_write_za, sme_1mode_function, (UNSPEC_SME_ADD_WRITE,
510 UNSPEC_SME_ADD_WRITE, -1))
511 FUNCTION (svaddha_za, sme_1mode_function, (UNSPEC_SME_ADDHA,
512 UNSPEC_SME_ADDHA, -1))
513 FUNCTION (svaddva_za, sme_1mode_function, (UNSPEC_SME_ADDVA,
514 UNSPEC_SME_ADDVA, -1))
515 FUNCTION (svbmopa_za, sme_2mode_function, (-1, UNSPEC_SME_BMOPA, -1))
516 FUNCTION (svbmops_za, sme_2mode_function, (-1, UNSPEC_SME_BMOPS, -1))
517 FUNCTION (svcntsb, svcnts_bhwd_impl, (VNx16QImode))
518 FUNCTION (svcntsd, svcnts_bhwd_impl, (VNx2DImode))
519 FUNCTION (svcntsh, svcnts_bhwd_impl, (VNx8HImode))
520 FUNCTION (svcntsw, svcnts_bhwd_impl, (VNx4SImode))
521 FUNCTION (svdot_za, sme_2mode_function, (UNSPEC_SME_SDOT, UNSPEC_SME_UDOT,
522 UNSPEC_SME_FDOT))
523 FUNCTION (svdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SDOT,
524 UNSPEC_SME_UDOT,
525 UNSPEC_SME_FDOT))
526 FUNCTION (svld1_hor_za, svld1_za_impl, (UNSPEC_SME_LD1_HOR))
527 FUNCTION (svld1_ver_za, svld1_za_impl, (UNSPEC_SME_LD1_VER))
528 FUNCTION (svldr_za, svldr_za_impl, )
529 FUNCTION (svldr_zt, svldr_zt_impl, )
530 FUNCTION (svluti2_lane_zt, svluti_lane_zt_impl, (2))
531 FUNCTION (svluti4_lane_zt, svluti_lane_zt_impl, (4))
532 FUNCTION (svmla_za, sme_2mode_function, (UNSPEC_SME_SMLA, UNSPEC_SME_UMLA,
533 UNSPEC_SME_FMLA))
534 FUNCTION (svmla_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLA,
535 UNSPEC_SME_UMLA,
536 UNSPEC_SME_FMLA))
537 FUNCTION (svmls_za, sme_2mode_function, (UNSPEC_SME_SMLS, UNSPEC_SME_UMLS,
538 UNSPEC_SME_FMLS))
539 FUNCTION (svmls_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLS,
540 UNSPEC_SME_UMLS,
541 UNSPEC_SME_FMLS))
542 FUNCTION (svmopa_za, sme_2mode_function, (UNSPEC_SME_SMOPA, UNSPEC_SME_UMOPA,
543 UNSPEC_SME_FMOPA))
544 FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS,
545 UNSPEC_SME_FMOPS))
546 FUNCTION (svread_za, svread_za_impl,)
547 FUNCTION (svread_hor_za, svread_za_tile_impl, (UNSPEC_SME_READ_HOR))
548 FUNCTION (svread_ver_za, svread_za_tile_impl, (UNSPEC_SME_READ_VER))
549 FUNCTION (svst1_hor_za, svst1_za_impl, (UNSPEC_SME_ST1_HOR))
550 FUNCTION (svst1_ver_za, svst1_za_impl, (UNSPEC_SME_ST1_VER))
551 FUNCTION (svstr_za, svstr_za_impl, )
552 FUNCTION (svstr_zt, svstr_zt_impl, )
553 FUNCTION (svsub_za, sme_1mode_function, (UNSPEC_SME_SUB, UNSPEC_SME_SUB,
554 UNSPEC_SME_FSUB))
555 FUNCTION (svsub_write_za, sme_1mode_function, (UNSPEC_SME_SUB_WRITE,
556 UNSPEC_SME_SUB_WRITE, -1))
557 FUNCTION (svsudot_za, svsudot_za_impl,)
558 FUNCTION (svsudot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SUDOT, -1, -1))
559 FUNCTION (svsuvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SUVDOT,
560 -1, -1))
561 FUNCTION (svsumopa_za, sme_2mode_function, (UNSPEC_SME_SUMOPA, -1, -1))
562 FUNCTION (svsumops_za, sme_2mode_function, (UNSPEC_SME_SUMOPS, -1, -1))
563 FUNCTION (svundef_za, svundef_za_impl, )
564 FUNCTION (svusdot_za, sme_2mode_function, (-1, UNSPEC_SME_USDOT, -1))
565 FUNCTION (svusdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USDOT, -1))
566 FUNCTION (svusvdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USVDOT,
567 -1))
568 FUNCTION (svusmopa_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPA, -1))
569 FUNCTION (svusmops_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPS, -1))
570 FUNCTION (svvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SVDOT,
571 UNSPEC_SME_UVDOT,
572 UNSPEC_SME_FVDOT))
573 FUNCTION (svwrite_za, svwrite_za_impl,)
574 FUNCTION (svwrite_hor_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_HOR))
575 FUNCTION (svwrite_ver_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_VER))
576 FUNCTION (svzero_mask_za, svzero_mask_za_impl, )
577 FUNCTION (svzero_za, svzero_za_impl, )
578 FUNCTION (svzero_zt, svzero_zt_impl, )
580 } /* end namespace aarch64_sve */