target/arm: Split out make_svemte_desc
[qemu/kevin.git] / target / arm / tcg / translate-sme.c
blob46c7fce8b4eb5033434872bdfdbf550f4b1784f4
1 /*
2 * AArch64 SME translation
4 * Copyright (c) 2022 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "translate.h"
22 #include "translate-a64.h"
25 * Include the generated decoder.
28 #include "decode-sme.c.inc"
32 * Resolve tile.size[index] to a host pointer, where tile and index
33 * are always decoded together, dependent on the element size.
35 static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
36 int tile_index, bool vertical)
38 int tile = tile_index >> (4 - esz);
39 int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
40 int pos, len, offset;
41 TCGv_i32 tmp;
42 TCGv_ptr addr;
44 /* Compute the final index, which is Rs+imm. */
45 tmp = tcg_temp_new_i32();
46 tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
47 tcg_gen_addi_i32(tmp, tmp, index);
49 /* Prepare a power-of-two modulo via extraction of @len bits. */
50 len = ctz32(streaming_vec_reg_size(s)) - esz;
52 if (vertical) {
54 * Compute the byte offset of the index within the tile:
55 * (index % (svl / size)) * size
56 * = (index % (svl >> esz)) << esz
57 * Perform the power-of-two modulo via extraction of the low @len bits.
58 * Perform the multiply by shifting left by @pos bits.
59 * Perform these operations simultaneously via deposit into zero.
61 pos = esz;
62 tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
65 * For big-endian, adjust the indexed column byte offset within
66 * the uint64_t host words that make up env->zarray[].
68 if (HOST_BIG_ENDIAN && esz < MO_64) {
69 tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
71 } else {
73 * Compute the byte offset of the index within the tile:
74 * (index % (svl / size)) * (size * sizeof(row))
75 * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
77 pos = esz + ctz32(sizeof(ARMVectorReg));
78 tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
80 /* Row slices are always aligned and need no endian adjustment. */
83 /* The tile byte offset within env->zarray is the row. */
84 offset = tile * sizeof(ARMVectorReg);
86 /* Include the byte offset of zarray to make this relative to env. */
87 offset += offsetof(CPUARMState, zarray);
88 tcg_gen_addi_i32(tmp, tmp, offset);
90 /* Add the byte offset to env to produce the final pointer. */
91 addr = tcg_temp_new_ptr();
92 tcg_gen_ext_i32_ptr(addr, tmp);
93 tcg_gen_add_ptr(addr, addr, tcg_env);
95 return addr;
99 * Resolve tile.size[0] to a host pointer.
100 * Used by e.g. outer product insns where we require the entire tile.
102 static TCGv_ptr get_tile(DisasContext *s, int esz, int tile)
104 TCGv_ptr addr = tcg_temp_new_ptr();
105 int offset;
107 offset = tile * sizeof(ARMVectorReg) + offsetof(CPUARMState, zarray);
109 tcg_gen_addi_ptr(addr, tcg_env, offset);
110 return addr;
113 static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
115 if (!dc_isar_feature(aa64_sme, s)) {
116 return false;
118 if (sme_za_enabled_check(s)) {
119 gen_helper_sme_zero(tcg_env, tcg_constant_i32(a->imm),
120 tcg_constant_i32(streaming_vec_reg_size(s)));
122 return true;
125 static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
127 static gen_helper_gvec_4 * const h_fns[5] = {
128 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
129 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
130 gen_helper_sve_sel_zpzz_q
132 static gen_helper_gvec_3 * const cz_fns[5] = {
133 gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
134 gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
135 gen_helper_sme_mova_cz_q,
137 static gen_helper_gvec_3 * const zc_fns[5] = {
138 gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
139 gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
140 gen_helper_sme_mova_zc_q,
143 TCGv_ptr t_za, t_zr, t_pg;
144 TCGv_i32 t_desc;
145 int svl;
147 if (!dc_isar_feature(aa64_sme, s)) {
148 return false;
150 if (!sme_smza_enabled_check(s)) {
151 return true;
154 t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
155 t_zr = vec_full_reg_ptr(s, a->zr);
156 t_pg = pred_full_reg_ptr(s, a->pg);
158 svl = streaming_vec_reg_size(s);
159 t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
161 if (a->v) {
162 /* Vertical slice -- use sme mova helpers. */
163 if (a->to_vec) {
164 zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
165 } else {
166 cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
168 } else {
169 /* Horizontal slice -- reuse sve sel helpers. */
170 if (a->to_vec) {
171 h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
172 } else {
173 h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
176 return true;
179 static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
181 typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
184 * Indexed by [esz][be][v][mte][st], which is (except for load/store)
185 * also the order in which the elements appear in the function names,
186 * and so how we must concatenate the pieces.
189 #define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
190 #define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
191 #define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
192 #define FN_END(L, B) { FN_HV(L), FN_HV(B) }
194 static GenLdSt1 * const fns[5][2][2][2][2] = {
195 FN_END(b, b),
196 FN_END(h_le, h_be),
197 FN_END(s_le, s_be),
198 FN_END(d_le, d_be),
199 FN_END(q_le, q_be),
202 #undef FN_LS
203 #undef FN_MTE
204 #undef FN_HV
205 #undef FN_END
207 TCGv_ptr t_za, t_pg;
208 TCGv_i64 addr;
209 uint32_t desc;
210 bool be = s->be_data == MO_BE;
211 bool mte = s->mte_active[0];
213 if (!dc_isar_feature(aa64_sme, s)) {
214 return false;
216 if (!sme_smza_enabled_check(s)) {
217 return true;
220 t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
221 t_pg = pred_full_reg_ptr(s, a->pg);
222 addr = tcg_temp_new_i64();
224 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
225 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
227 if (!mte) {
228 addr = clean_data_tbi(s, addr);
231 desc = make_svemte_desc(s, streaming_vec_reg_size(s), 1, a->esz, a->st, 0);
233 fns[a->esz][be][a->v][mte][a->st](tcg_env, t_za, t_pg, addr,
234 tcg_constant_i32(desc));
235 return true;
238 typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
240 static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
242 int svl = streaming_vec_reg_size(s);
243 int imm = a->imm;
244 TCGv_ptr base;
246 if (!sme_za_enabled_check(s)) {
247 return true;
250 /* ZA[n] equates to ZA0H.B[n]. */
251 base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
253 fn(s, base, 0, svl, a->rn, imm * svl);
254 return true;
257 TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
258 TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
260 static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
261 gen_helper_gvec_4 *fn)
263 int svl = streaming_vec_reg_size(s);
264 uint32_t desc = simd_desc(svl, svl, 0);
265 TCGv_ptr za, zn, pn, pm;
267 if (!sme_smza_enabled_check(s)) {
268 return true;
271 za = get_tile(s, esz, a->zad);
272 zn = vec_full_reg_ptr(s, a->zn);
273 pn = pred_full_reg_ptr(s, a->pn);
274 pm = pred_full_reg_ptr(s, a->pm);
276 fn(za, zn, pn, pm, tcg_constant_i32(desc));
277 return true;
280 TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
281 TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
282 TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
283 TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
285 static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
286 gen_helper_gvec_5 *fn)
288 int svl = streaming_vec_reg_size(s);
289 uint32_t desc = simd_desc(svl, svl, a->sub);
290 TCGv_ptr za, zn, zm, pn, pm;
292 if (!sme_smza_enabled_check(s)) {
293 return true;
296 za = get_tile(s, esz, a->zad);
297 zn = vec_full_reg_ptr(s, a->zn);
298 zm = vec_full_reg_ptr(s, a->zm);
299 pn = pred_full_reg_ptr(s, a->pn);
300 pm = pred_full_reg_ptr(s, a->pm);
302 fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
303 return true;
306 static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
307 gen_helper_gvec_5_ptr *fn)
309 int svl = streaming_vec_reg_size(s);
310 uint32_t desc = simd_desc(svl, svl, a->sub);
311 TCGv_ptr za, zn, zm, pn, pm, fpst;
313 if (!sme_smza_enabled_check(s)) {
314 return true;
317 za = get_tile(s, esz, a->zad);
318 zn = vec_full_reg_ptr(s, a->zn);
319 zm = vec_full_reg_ptr(s, a->zm);
320 pn = pred_full_reg_ptr(s, a->pn);
321 pm = pred_full_reg_ptr(s, a->pm);
322 fpst = fpstatus_ptr(FPST_FPCR);
324 fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
325 return true;
328 TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
329 TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
330 TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
332 /* TODO: FEAT_EBF16 */
333 TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
335 TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
336 TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
337 TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
338 TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
340 TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
341 TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
342 TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
343 TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)