s390x/pv: Implement a CGS check helper
[qemu/kevin.git] / target / arm / translate-sme.c
blob7b87a9df63c167f09a5fd6fe67548710508f1075
1 /*
2 * AArch64 SME translation
4 * Copyright (c) 2022 Linaro, Ltd
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "tcg/tcg-op.h"
23 #include "tcg/tcg-op-gvec.h"
24 #include "tcg/tcg-gvec-desc.h"
25 #include "translate.h"
26 #include "exec/helper-gen.h"
27 #include "translate-a64.h"
28 #include "fpu/softfloat.h"
32 * Include the generated decoder.
35 #include "decode-sme.c.inc"
39 * Resolve tile.size[index] to a host pointer, where tile and index
40 * are always decoded together, dependent on the element size.
42 static TCGv_ptr get_tile_rowcol(DisasContext *s, int esz, int rs,
43 int tile_index, bool vertical)
45 int tile = tile_index >> (4 - esz);
46 int index = esz == MO_128 ? 0 : extract32(tile_index, 0, 4 - esz);
47 int pos, len, offset;
48 TCGv_i32 tmp;
49 TCGv_ptr addr;
51 /* Compute the final index, which is Rs+imm. */
52 tmp = tcg_temp_new_i32();
53 tcg_gen_trunc_tl_i32(tmp, cpu_reg(s, rs));
54 tcg_gen_addi_i32(tmp, tmp, index);
56 /* Prepare a power-of-two modulo via extraction of @len bits. */
57 len = ctz32(streaming_vec_reg_size(s)) - esz;
59 if (vertical) {
61 * Compute the byte offset of the index within the tile:
62 * (index % (svl / size)) * size
63 * = (index % (svl >> esz)) << esz
64 * Perform the power-of-two modulo via extraction of the low @len bits.
65 * Perform the multiply by shifting left by @pos bits.
66 * Perform these operations simultaneously via deposit into zero.
68 pos = esz;
69 tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
72 * For big-endian, adjust the indexed column byte offset within
73 * the uint64_t host words that make up env->zarray[].
75 if (HOST_BIG_ENDIAN && esz < MO_64) {
76 tcg_gen_xori_i32(tmp, tmp, 8 - (1 << esz));
78 } else {
80 * Compute the byte offset of the index within the tile:
81 * (index % (svl / size)) * (size * sizeof(row))
82 * = (index % (svl >> esz)) << (esz + log2(sizeof(row)))
84 pos = esz + ctz32(sizeof(ARMVectorReg));
85 tcg_gen_deposit_z_i32(tmp, tmp, pos, len);
87 /* Row slices are always aligned and need no endian adjustment. */
90 /* The tile byte offset within env->zarray is the row. */
91 offset = tile * sizeof(ARMVectorReg);
93 /* Include the byte offset of zarray to make this relative to env. */
94 offset += offsetof(CPUARMState, zarray);
95 tcg_gen_addi_i32(tmp, tmp, offset);
97 /* Add the byte offset to env to produce the final pointer. */
98 addr = tcg_temp_new_ptr();
99 tcg_gen_ext_i32_ptr(addr, tmp);
100 tcg_temp_free_i32(tmp);
101 tcg_gen_add_ptr(addr, addr, cpu_env);
103 return addr;
106 static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
108 if (!dc_isar_feature(aa64_sme, s)) {
109 return false;
111 if (sme_za_enabled_check(s)) {
112 gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
113 tcg_constant_i32(streaming_vec_reg_size(s)));
115 return true;
118 static bool trans_MOVA(DisasContext *s, arg_MOVA *a)
120 static gen_helper_gvec_4 * const h_fns[5] = {
121 gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
122 gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d,
123 gen_helper_sve_sel_zpzz_q
125 static gen_helper_gvec_3 * const cz_fns[5] = {
126 gen_helper_sme_mova_cz_b, gen_helper_sme_mova_cz_h,
127 gen_helper_sme_mova_cz_s, gen_helper_sme_mova_cz_d,
128 gen_helper_sme_mova_cz_q,
130 static gen_helper_gvec_3 * const zc_fns[5] = {
131 gen_helper_sme_mova_zc_b, gen_helper_sme_mova_zc_h,
132 gen_helper_sme_mova_zc_s, gen_helper_sme_mova_zc_d,
133 gen_helper_sme_mova_zc_q,
136 TCGv_ptr t_za, t_zr, t_pg;
137 TCGv_i32 t_desc;
138 int svl;
140 if (!dc_isar_feature(aa64_sme, s)) {
141 return false;
143 if (!sme_smza_enabled_check(s)) {
144 return true;
147 t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
148 t_zr = vec_full_reg_ptr(s, a->zr);
149 t_pg = pred_full_reg_ptr(s, a->pg);
151 svl = streaming_vec_reg_size(s);
152 t_desc = tcg_constant_i32(simd_desc(svl, svl, 0));
154 if (a->v) {
155 /* Vertical slice -- use sme mova helpers. */
156 if (a->to_vec) {
157 zc_fns[a->esz](t_zr, t_za, t_pg, t_desc);
158 } else {
159 cz_fns[a->esz](t_za, t_zr, t_pg, t_desc);
161 } else {
162 /* Horizontal slice -- reuse sve sel helpers. */
163 if (a->to_vec) {
164 h_fns[a->esz](t_zr, t_za, t_zr, t_pg, t_desc);
165 } else {
166 h_fns[a->esz](t_za, t_zr, t_za, t_pg, t_desc);
170 tcg_temp_free_ptr(t_za);
171 tcg_temp_free_ptr(t_zr);
172 tcg_temp_free_ptr(t_pg);
174 return true;
177 static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
179 typedef void GenLdSt1(TCGv_env, TCGv_ptr, TCGv_ptr, TCGv, TCGv_i32);
182 * Indexed by [esz][be][v][mte][st], which is (except for load/store)
183 * also the order in which the elements appear in the function names,
184 * and so how we must concatenate the pieces.
187 #define FN_LS(F) { gen_helper_sme_ld1##F, gen_helper_sme_st1##F }
188 #define FN_MTE(F) { FN_LS(F), FN_LS(F##_mte) }
189 #define FN_HV(F) { FN_MTE(F##_h), FN_MTE(F##_v) }
190 #define FN_END(L, B) { FN_HV(L), FN_HV(B) }
192 static GenLdSt1 * const fns[5][2][2][2][2] = {
193 FN_END(b, b),
194 FN_END(h_le, h_be),
195 FN_END(s_le, s_be),
196 FN_END(d_le, d_be),
197 FN_END(q_le, q_be),
200 #undef FN_LS
201 #undef FN_MTE
202 #undef FN_HV
203 #undef FN_END
205 TCGv_ptr t_za, t_pg;
206 TCGv_i64 addr;
207 int svl, desc = 0;
208 bool be = s->be_data == MO_BE;
209 bool mte = s->mte_active[0];
211 if (!dc_isar_feature(aa64_sme, s)) {
212 return false;
214 if (!sme_smza_enabled_check(s)) {
215 return true;
218 t_za = get_tile_rowcol(s, a->esz, a->rs, a->za_imm, a->v);
219 t_pg = pred_full_reg_ptr(s, a->pg);
220 addr = tcg_temp_new_i64();
222 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->esz);
223 tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
225 if (mte) {
226 desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s));
227 desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid);
228 desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma);
229 desc = FIELD_DP32(desc, MTEDESC, WRITE, a->st);
230 desc = FIELD_DP32(desc, MTEDESC, SIZEM1, (1 << a->esz) - 1);
231 desc <<= SVE_MTEDESC_SHIFT;
232 } else {
233 addr = clean_data_tbi(s, addr);
235 svl = streaming_vec_reg_size(s);
236 desc = simd_desc(svl, svl, desc);
238 fns[a->esz][be][a->v][mte][a->st](cpu_env, t_za, t_pg, addr,
239 tcg_constant_i32(desc));
241 tcg_temp_free_ptr(t_za);
242 tcg_temp_free_ptr(t_pg);
243 tcg_temp_free_i64(addr);
244 return true;
247 typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
249 static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
251 int svl = streaming_vec_reg_size(s);
252 int imm = a->imm;
253 TCGv_ptr base;
255 if (!sme_za_enabled_check(s)) {
256 return true;
259 /* ZA[n] equates to ZA0H.B[n]. */
260 base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
262 fn(s, base, 0, svl, a->rn, imm * svl);
264 tcg_temp_free_ptr(base);
265 return true;
268 TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
269 TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
271 static bool do_adda(DisasContext *s, arg_adda *a, MemOp esz,
272 gen_helper_gvec_4 *fn)
274 int svl = streaming_vec_reg_size(s);
275 uint32_t desc = simd_desc(svl, svl, 0);
276 TCGv_ptr za, zn, pn, pm;
278 if (!sme_smza_enabled_check(s)) {
279 return true;
282 /* Sum XZR+zad to find ZAd. */
283 za = get_tile_rowcol(s, esz, 31, a->zad, false);
284 zn = vec_full_reg_ptr(s, a->zn);
285 pn = pred_full_reg_ptr(s, a->pn);
286 pm = pred_full_reg_ptr(s, a->pm);
288 fn(za, zn, pn, pm, tcg_constant_i32(desc));
290 tcg_temp_free_ptr(za);
291 tcg_temp_free_ptr(zn);
292 tcg_temp_free_ptr(pn);
293 tcg_temp_free_ptr(pm);
294 return true;
297 TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addha_s)
298 TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
299 TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
300 TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
302 static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
303 gen_helper_gvec_5 *fn)
305 int svl = streaming_vec_reg_size(s);
306 uint32_t desc = simd_desc(svl, svl, a->sub);
307 TCGv_ptr za, zn, zm, pn, pm;
309 if (!sme_smza_enabled_check(s)) {
310 return true;
313 /* Sum XZR+zad to find ZAd. */
314 za = get_tile_rowcol(s, esz, 31, a->zad, false);
315 zn = vec_full_reg_ptr(s, a->zn);
316 zm = vec_full_reg_ptr(s, a->zm);
317 pn = pred_full_reg_ptr(s, a->pn);
318 pm = pred_full_reg_ptr(s, a->pm);
320 fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
322 tcg_temp_free_ptr(za);
323 tcg_temp_free_ptr(zn);
324 tcg_temp_free_ptr(pn);
325 tcg_temp_free_ptr(pm);
326 return true;
329 static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
330 gen_helper_gvec_5_ptr *fn)
332 int svl = streaming_vec_reg_size(s);
333 uint32_t desc = simd_desc(svl, svl, a->sub);
334 TCGv_ptr za, zn, zm, pn, pm, fpst;
336 if (!sme_smza_enabled_check(s)) {
337 return true;
340 /* Sum XZR+zad to find ZAd. */
341 za = get_tile_rowcol(s, esz, 31, a->zad, false);
342 zn = vec_full_reg_ptr(s, a->zn);
343 zm = vec_full_reg_ptr(s, a->zm);
344 pn = pred_full_reg_ptr(s, a->pn);
345 pm = pred_full_reg_ptr(s, a->pm);
346 fpst = fpstatus_ptr(FPST_FPCR);
348 fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
350 tcg_temp_free_ptr(za);
351 tcg_temp_free_ptr(zn);
352 tcg_temp_free_ptr(pn);
353 tcg_temp_free_ptr(pm);
354 tcg_temp_free_ptr(fpst);
355 return true;
358 TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
359 TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
360 TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
362 /* TODO: FEAT_EBF16 */
363 TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
365 TRANS_FEAT(SMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_smopa_s)
366 TRANS_FEAT(UMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_umopa_s)
367 TRANS_FEAT(SUMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_sumopa_s)
368 TRANS_FEAT(USMOPA_s, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_usmopa_s)
370 TRANS_FEAT(SMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_smopa_d)
371 TRANS_FEAT(UMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_umopa_d)
372 TRANS_FEAT(SUMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_sumopa_d)
373 TRANS_FEAT(USMOPA_d, aa64_sme_i16i64, do_outprod, a, MO_64, gen_helper_sme_usmopa_d)