target/riscv: rvv: Add tail agnostic for vector integer merge and move instructions
[qemu/ar7.git] / target / riscv / vector_helper.c
blob128238786e5f8aaee009a218996047c8b4beb6ef
1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/helper-proto.h"
26 #include "fpu/softfloat.h"
27 #include "tcg/tcg-gvec-desc.h"
28 #include "internals.h"
29 #include <math.h>
31 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
32 target_ulong s2)
34 int vlmax, vl;
35 RISCVCPU *cpu = env_archcpu(env);
36 uint64_t lmul = FIELD_EX64(s2, VTYPE, VLMUL);
37 uint16_t sew = 8 << FIELD_EX64(s2, VTYPE, VSEW);
38 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
39 int xlen = riscv_cpu_xlen(env);
40 bool vill = (s2 >> (xlen - 1)) & 0x1;
41 target_ulong reserved = s2 &
42 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
43 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
45 if (lmul & 4) {
46 /* Fractional LMUL. */
47 if (lmul == 4 ||
48 cpu->cfg.elen >> (8 - lmul) < sew) {
49 vill = true;
53 if ((sew > cpu->cfg.elen)
54 || vill
55 || (ediv != 0)
56 || (reserved != 0)) {
57 /* only set vill bit. */
58 env->vill = 1;
59 env->vtype = 0;
60 env->vl = 0;
61 env->vstart = 0;
62 return 0;
65 vlmax = vext_get_vlmax(cpu, s2);
66 if (s1 <= vlmax) {
67 vl = s1;
68 } else {
69 vl = vlmax;
71 env->vl = vl;
72 env->vtype = s2;
73 env->vstart = 0;
74 env->vill = 0;
75 return vl;
79 * Note that vector data is stored in host-endian 64-bit chunks,
80 * so addressing units smaller than that needs a host-endian fixup.
82 #if HOST_BIG_ENDIAN
83 #define H1(x) ((x) ^ 7)
84 #define H1_2(x) ((x) ^ 6)
85 #define H1_4(x) ((x) ^ 4)
86 #define H2(x) ((x) ^ 3)
87 #define H4(x) ((x) ^ 1)
88 #define H8(x) ((x))
89 #else
90 #define H1(x) (x)
91 #define H1_2(x) (x)
92 #define H1_4(x) (x)
93 #define H2(x) (x)
94 #define H4(x) (x)
95 #define H8(x) (x)
96 #endif
98 static inline uint32_t vext_nf(uint32_t desc)
100 return FIELD_EX32(simd_data(desc), VDATA, NF);
103 static inline uint32_t vext_vm(uint32_t desc)
105 return FIELD_EX32(simd_data(desc), VDATA, VM);
109 * Encode LMUL to lmul as following:
110 * LMUL vlmul lmul
111 * 1 000 0
112 * 2 001 1
113 * 4 010 2
114 * 8 011 3
115 * - 100 -
116 * 1/8 101 -3
117 * 1/4 110 -2
118 * 1/2 111 -1
120 static inline int32_t vext_lmul(uint32_t desc)
122 return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3);
125 static inline uint32_t vext_vta(uint32_t desc)
127 return FIELD_EX32(simd_data(desc), VDATA, VTA);
130 static inline uint32_t vext_vta_all_1s(uint32_t desc)
132 return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S);
136 * Get the maximum number of elements can be operated.
138 * log2_esz: log2 of element size in bytes.
140 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
143 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
144 * so vlen in bytes (vlenb) is encoded as maxsz.
146 uint32_t vlenb = simd_maxsz(desc);
148 /* Return VLMAX */
149 int scale = vext_lmul(desc) - log2_esz;
150 return scale < 0 ? vlenb >> -scale : vlenb << scale;
154 * Get number of total elements, including prestart, body and tail elements.
155 * Note that when LMUL < 1, the tail includes the elements past VLMAX that
156 * are held in the same vector register.
158 static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc,
159 uint32_t esz)
161 uint32_t vlenb = simd_maxsz(desc);
162 uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
163 int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 :
164 ctzl(esz) - ctzl(sew) + vext_lmul(desc);
165 return (vlenb << emul) / esz;
168 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
170 return (addr & env->cur_pmmask) | env->cur_pmbase;
174 * This function checks watchpoint before real load operation.
176 * In softmmu mode, the TLB API probe_access is enough for watchpoint check.
177 * In user mode, there is no watchpoint support now.
179 * It will trigger an exception if there is no mapping in TLB
180 * and page table walk can't fill the TLB entry. Then the guest
181 * software can return here after process the exception or never return.
183 static void probe_pages(CPURISCVState *env, target_ulong addr,
184 target_ulong len, uintptr_t ra,
185 MMUAccessType access_type)
187 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
188 target_ulong curlen = MIN(pagelen, len);
190 probe_access(env, adjust_addr(env, addr), curlen, access_type,
191 cpu_mmu_index(env, false), ra);
192 if (len > curlen) {
193 addr += curlen;
194 curlen = len - curlen;
195 probe_access(env, adjust_addr(env, addr), curlen, access_type,
196 cpu_mmu_index(env, false), ra);
200 /* set agnostic elements to 1s */
201 static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt,
202 uint32_t tot)
204 if (is_agnostic == 0) {
205 /* policy undisturbed */
206 return;
208 if (tot - cnt == 0) {
209 return ;
211 memset(base + cnt, -1, tot - cnt);
214 static inline void vext_set_elem_mask(void *v0, int index,
215 uint8_t value)
217 int idx = index / 64;
218 int pos = index % 64;
219 uint64_t old = ((uint64_t *)v0)[idx];
220 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
224 * Earlier designs (pre-0.9) had a varying number of bits
225 * per mask value (MLEN). In the 0.9 design, MLEN=1.
226 * (Section 4.5)
228 static inline int vext_elem_mask(void *v0, int index)
230 int idx = index / 64;
231 int pos = index % 64;
232 return (((uint64_t *)v0)[idx] >> pos) & 1;
235 /* elements operations for load and store */
236 typedef void vext_ldst_elem_fn(CPURISCVState *env, target_ulong addr,
237 uint32_t idx, void *vd, uintptr_t retaddr);
239 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
240 static void NAME(CPURISCVState *env, abi_ptr addr, \
241 uint32_t idx, void *vd, uintptr_t retaddr)\
243 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
244 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
247 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
248 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
249 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
250 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
252 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
253 static void NAME(CPURISCVState *env, abi_ptr addr, \
254 uint32_t idx, void *vd, uintptr_t retaddr)\
256 ETYPE data = *((ETYPE *)vd + H(idx)); \
257 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
260 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
261 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
262 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
263 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
266 *** stride: access vector element from strided memory
268 static void
269 vext_ldst_stride(void *vd, void *v0, target_ulong base,
270 target_ulong stride, CPURISCVState *env,
271 uint32_t desc, uint32_t vm,
272 vext_ldst_elem_fn *ldst_elem,
273 uint32_t log2_esz, uintptr_t ra)
275 uint32_t i, k;
276 uint32_t nf = vext_nf(desc);
277 uint32_t max_elems = vext_max_elems(desc, log2_esz);
278 uint32_t esz = 1 << log2_esz;
279 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
280 uint32_t vta = vext_vta(desc);
282 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
283 if (!vm && !vext_elem_mask(v0, i)) {
284 continue;
287 k = 0;
288 while (k < nf) {
289 target_ulong addr = base + stride * i + (k << log2_esz);
290 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
291 k++;
294 env->vstart = 0;
295 /* set tail elements to 1s */
296 for (k = 0; k < nf; ++k) {
297 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
298 (k * max_elems + max_elems) * esz);
300 if (nf * max_elems % total_elems != 0) {
301 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
302 uint32_t registers_used =
303 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
304 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
305 registers_used * vlenb);
309 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
310 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
311 target_ulong stride, CPURISCVState *env, \
312 uint32_t desc) \
314 uint32_t vm = vext_vm(desc); \
315 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
316 ctzl(sizeof(ETYPE)), GETPC()); \
319 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
320 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
321 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
322 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
324 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
325 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
326 target_ulong stride, CPURISCVState *env, \
327 uint32_t desc) \
329 uint32_t vm = vext_vm(desc); \
330 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
331 ctzl(sizeof(ETYPE)), GETPC()); \
334 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
335 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
336 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
337 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
340 *** unit-stride: access elements stored contiguously in memory
343 /* unmasked unit-stride load and store operation*/
344 static void
345 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
346 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
347 uintptr_t ra)
349 uint32_t i, k;
350 uint32_t nf = vext_nf(desc);
351 uint32_t max_elems = vext_max_elems(desc, log2_esz);
352 uint32_t esz = 1 << log2_esz;
353 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
354 uint32_t vta = vext_vta(desc);
356 /* load bytes from guest memory */
357 for (i = env->vstart; i < evl; i++, env->vstart++) {
358 k = 0;
359 while (k < nf) {
360 target_ulong addr = base + ((i * nf + k) << log2_esz);
361 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
362 k++;
365 env->vstart = 0;
366 /* set tail elements to 1s */
367 for (k = 0; k < nf; ++k) {
368 vext_set_elems_1s(vd, vta, (k * max_elems + evl) * esz,
369 (k * max_elems + max_elems) * esz);
371 if (nf * max_elems % total_elems != 0) {
372 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
373 uint32_t registers_used =
374 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
375 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
376 registers_used * vlenb);
381 * masked unit-stride load and store operation will be a special case of stride,
382 * stride = NF * sizeof (MTYPE)
385 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
386 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
387 CPURISCVState *env, uint32_t desc) \
389 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
390 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
391 ctzl(sizeof(ETYPE)), GETPC()); \
394 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
395 CPURISCVState *env, uint32_t desc) \
397 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
398 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
401 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
402 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
403 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
404 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
406 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
407 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
408 CPURISCVState *env, uint32_t desc) \
410 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
411 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
412 ctzl(sizeof(ETYPE)), GETPC()); \
415 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
416 CPURISCVState *env, uint32_t desc) \
418 vext_ldst_us(vd, base, env, desc, STORE_FN, \
419 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
422 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
423 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
424 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
425 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
428 *** unit stride mask load and store, EEW = 1
430 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
431 CPURISCVState *env, uint32_t desc)
433 /* evl = ceil(vl/8) */
434 uint8_t evl = (env->vl + 7) >> 3;
435 vext_ldst_us(vd, base, env, desc, lde_b,
436 0, evl, GETPC());
439 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
440 CPURISCVState *env, uint32_t desc)
442 /* evl = ceil(vl/8) */
443 uint8_t evl = (env->vl + 7) >> 3;
444 vext_ldst_us(vd, base, env, desc, ste_b,
445 0, evl, GETPC());
449 *** index: access vector element from indexed memory
451 typedef target_ulong vext_get_index_addr(target_ulong base,
452 uint32_t idx, void *vs2);
454 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
455 static target_ulong NAME(target_ulong base, \
456 uint32_t idx, void *vs2) \
458 return (base + *((ETYPE *)vs2 + H(idx))); \
461 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
462 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
463 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
464 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
466 static inline void
467 vext_ldst_index(void *vd, void *v0, target_ulong base,
468 void *vs2, CPURISCVState *env, uint32_t desc,
469 vext_get_index_addr get_index_addr,
470 vext_ldst_elem_fn *ldst_elem,
471 uint32_t log2_esz, uintptr_t ra)
473 uint32_t i, k;
474 uint32_t nf = vext_nf(desc);
475 uint32_t vm = vext_vm(desc);
476 uint32_t max_elems = vext_max_elems(desc, log2_esz);
477 uint32_t esz = 1 << log2_esz;
478 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
479 uint32_t vta = vext_vta(desc);
481 /* load bytes from guest memory */
482 for (i = env->vstart; i < env->vl; i++, env->vstart++) {
483 if (!vm && !vext_elem_mask(v0, i)) {
484 continue;
487 k = 0;
488 while (k < nf) {
489 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
490 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
491 k++;
494 env->vstart = 0;
495 /* set tail elements to 1s */
496 for (k = 0; k < nf; ++k) {
497 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
498 (k * max_elems + max_elems) * esz);
500 if (nf * max_elems % total_elems != 0) {
501 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
502 uint32_t registers_used =
503 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
504 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
505 registers_used * vlenb);
509 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
510 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
511 void *vs2, CPURISCVState *env, uint32_t desc) \
513 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
514 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
517 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
518 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
519 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
520 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
521 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
522 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
523 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
524 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
525 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
526 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
527 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
528 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
529 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
530 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
531 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
532 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
534 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
535 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
536 void *vs2, CPURISCVState *env, uint32_t desc) \
538 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
539 STORE_FN, ctzl(sizeof(ETYPE)), \
540 GETPC()); \
543 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
544 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
545 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
546 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
547 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
548 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
549 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
550 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
551 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
552 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
553 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
554 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
555 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
556 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
557 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
558 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
561 *** unit-stride fault-only-fisrt load instructions
563 static inline void
564 vext_ldff(void *vd, void *v0, target_ulong base,
565 CPURISCVState *env, uint32_t desc,
566 vext_ldst_elem_fn *ldst_elem,
567 uint32_t log2_esz, uintptr_t ra)
569 void *host;
570 uint32_t i, k, vl = 0;
571 uint32_t nf = vext_nf(desc);
572 uint32_t vm = vext_vm(desc);
573 uint32_t max_elems = vext_max_elems(desc, log2_esz);
574 uint32_t esz = 1 << log2_esz;
575 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
576 uint32_t vta = vext_vta(desc);
577 target_ulong addr, offset, remain;
579 /* probe every access*/
580 for (i = env->vstart; i < env->vl; i++) {
581 if (!vm && !vext_elem_mask(v0, i)) {
582 continue;
584 addr = adjust_addr(env, base + i * (nf << log2_esz));
585 if (i == 0) {
586 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
587 } else {
588 /* if it triggers an exception, no need to check watchpoint */
589 remain = nf << log2_esz;
590 while (remain > 0) {
591 offset = -(addr | TARGET_PAGE_MASK);
592 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD,
593 cpu_mmu_index(env, false));
594 if (host) {
595 #ifdef CONFIG_USER_ONLY
596 if (page_check_range(addr, offset, PAGE_READ) < 0) {
597 vl = i;
598 goto ProbeSuccess;
600 #else
601 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
602 #endif
603 } else {
604 vl = i;
605 goto ProbeSuccess;
607 if (remain <= offset) {
608 break;
610 remain -= offset;
611 addr = adjust_addr(env, addr + offset);
615 ProbeSuccess:
616 /* load bytes from guest memory */
617 if (vl != 0) {
618 env->vl = vl;
620 for (i = env->vstart; i < env->vl; i++) {
621 k = 0;
622 if (!vm && !vext_elem_mask(v0, i)) {
623 continue;
625 while (k < nf) {
626 target_ulong addr = base + ((i * nf + k) << log2_esz);
627 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
628 k++;
631 env->vstart = 0;
632 /* set tail elements to 1s */
633 for (k = 0; k < nf; ++k) {
634 vext_set_elems_1s(vd, vta, (k * max_elems + env->vl) * esz,
635 (k * max_elems + max_elems) * esz);
637 if (nf * max_elems % total_elems != 0) {
638 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
639 uint32_t registers_used =
640 ((nf * max_elems) * esz + (vlenb - 1)) / vlenb;
641 vext_set_elems_1s(vd, vta, (nf * max_elems) * esz,
642 registers_used * vlenb);
646 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
647 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
648 CPURISCVState *env, uint32_t desc) \
650 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
651 ctzl(sizeof(ETYPE)), GETPC()); \
654 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
655 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
656 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
657 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
659 #define DO_SWAP(N, M) (M)
660 #define DO_AND(N, M) (N & M)
661 #define DO_XOR(N, M) (N ^ M)
662 #define DO_OR(N, M) (N | M)
663 #define DO_ADD(N, M) (N + M)
665 /* Signed min/max */
666 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
667 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
669 /* Unsigned min/max */
670 #define DO_MAXU(N, M) DO_MAX((UMTYPE)N, (UMTYPE)M)
671 #define DO_MINU(N, M) DO_MIN((UMTYPE)N, (UMTYPE)M)
674 *** load and store whole register instructions
676 static void
677 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
678 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
680 uint32_t i, k, off, pos;
681 uint32_t nf = vext_nf(desc);
682 uint32_t vlenb = env_archcpu(env)->cfg.vlen >> 3;
683 uint32_t max_elems = vlenb >> log2_esz;
685 k = env->vstart / max_elems;
686 off = env->vstart % max_elems;
688 if (off) {
689 /* load/store rest of elements of current segment pointed by vstart */
690 for (pos = off; pos < max_elems; pos++, env->vstart++) {
691 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
692 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd, ra);
694 k++;
697 /* load/store elements for rest of segments */
698 for (; k < nf; k++) {
699 for (i = 0; i < max_elems; i++, env->vstart++) {
700 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
701 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
705 env->vstart = 0;
708 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
709 void HELPER(NAME)(void *vd, target_ulong base, \
710 CPURISCVState *env, uint32_t desc) \
712 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
713 ctzl(sizeof(ETYPE)), GETPC()); \
716 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
717 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
718 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
719 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
720 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
721 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
722 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
723 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
724 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
725 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
726 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
727 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
728 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
729 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
730 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
731 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
733 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
734 void HELPER(NAME)(void *vd, target_ulong base, \
735 CPURISCVState *env, uint32_t desc) \
737 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
738 ctzl(sizeof(ETYPE)), GETPC()); \
741 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
742 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
743 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
744 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
747 *** Vector Integer Arithmetic Instructions
750 /* expand macro args before macro */
751 #define RVVCALL(macro, ...) macro(__VA_ARGS__)
753 /* (TD, T1, T2, TX1, TX2) */
754 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
755 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
756 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
757 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
758 #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t
759 #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t
760 #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t
761 #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t
762 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
763 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
764 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
765 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
766 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
767 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
768 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
769 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
770 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
771 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
772 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
773 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
774 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
775 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
776 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
777 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
778 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
779 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
780 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
781 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
782 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
783 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
785 /* operation of two vector elements */
786 typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i);
788 #define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
789 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
791 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
792 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
793 *((TD *)vd + HD(i)) = OP(s2, s1); \
795 #define DO_SUB(N, M) (N - M)
796 #define DO_RSUB(N, M) (M - N)
798 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
799 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
800 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
801 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
802 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
803 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
804 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
805 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
807 static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2,
808 CPURISCVState *env, uint32_t desc,
809 opivv2_fn *fn, uint32_t esz)
811 uint32_t vm = vext_vm(desc);
812 uint32_t vl = env->vl;
813 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
814 uint32_t vta = vext_vta(desc);
815 uint32_t i;
817 for (i = env->vstart; i < vl; i++) {
818 if (!vm && !vext_elem_mask(v0, i)) {
819 continue;
821 fn(vd, vs1, vs2, i);
823 env->vstart = 0;
824 /* set tail elements to 1s */
825 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
828 /* generate the helpers for OPIVV */
829 #define GEN_VEXT_VV(NAME, ESZ) \
830 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
831 void *vs2, CPURISCVState *env, \
832 uint32_t desc) \
834 do_vext_vv(vd, v0, vs1, vs2, env, desc, \
835 do_##NAME, ESZ); \
838 GEN_VEXT_VV(vadd_vv_b, 1)
839 GEN_VEXT_VV(vadd_vv_h, 2)
840 GEN_VEXT_VV(vadd_vv_w, 4)
841 GEN_VEXT_VV(vadd_vv_d, 8)
842 GEN_VEXT_VV(vsub_vv_b, 1)
843 GEN_VEXT_VV(vsub_vv_h, 2)
844 GEN_VEXT_VV(vsub_vv_w, 4)
845 GEN_VEXT_VV(vsub_vv_d, 8)
847 typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i);
850 * (T1)s1 gives the real operator type.
851 * (TX1)(T1)s1 expands the operator type of widen or narrow operations.
853 #define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
854 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
856 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
857 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \
860 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
861 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
862 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
863 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
864 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
865 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
866 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
867 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
868 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
869 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
870 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
871 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
873 static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2,
874 CPURISCVState *env, uint32_t desc,
875 opivx2_fn fn, uint32_t esz)
877 uint32_t vm = vext_vm(desc);
878 uint32_t vl = env->vl;
879 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
880 uint32_t vta = vext_vta(desc);
881 uint32_t i;
883 for (i = env->vstart; i < vl; i++) {
884 if (!vm && !vext_elem_mask(v0, i)) {
885 continue;
887 fn(vd, s1, vs2, i);
889 env->vstart = 0;
890 /* set tail elements to 1s */
891 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
894 /* generate the helpers for OPIVX */
895 #define GEN_VEXT_VX(NAME, ESZ) \
896 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
897 void *vs2, CPURISCVState *env, \
898 uint32_t desc) \
900 do_vext_vx(vd, v0, s1, vs2, env, desc, \
901 do_##NAME, ESZ); \
904 GEN_VEXT_VX(vadd_vx_b, 1)
905 GEN_VEXT_VX(vadd_vx_h, 2)
906 GEN_VEXT_VX(vadd_vx_w, 4)
907 GEN_VEXT_VX(vadd_vx_d, 8)
908 GEN_VEXT_VX(vsub_vx_b, 1)
909 GEN_VEXT_VX(vsub_vx_h, 2)
910 GEN_VEXT_VX(vsub_vx_w, 4)
911 GEN_VEXT_VX(vsub_vx_d, 8)
912 GEN_VEXT_VX(vrsub_vx_b, 1)
913 GEN_VEXT_VX(vrsub_vx_h, 2)
914 GEN_VEXT_VX(vrsub_vx_w, 4)
915 GEN_VEXT_VX(vrsub_vx_d, 8)
917 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
919 intptr_t oprsz = simd_oprsz(desc);
920 intptr_t i;
922 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
923 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
927 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
929 intptr_t oprsz = simd_oprsz(desc);
930 intptr_t i;
932 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
933 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
937 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
939 intptr_t oprsz = simd_oprsz(desc);
940 intptr_t i;
942 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
943 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
947 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
949 intptr_t oprsz = simd_oprsz(desc);
950 intptr_t i;
952 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
953 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
957 /* Vector Widening Integer Add/Subtract */
958 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
959 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
960 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
961 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
962 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
963 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
964 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
965 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
966 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
967 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
968 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
969 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
970 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
971 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
972 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
973 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
974 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
975 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
976 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
977 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
978 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
979 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
980 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
981 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
982 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
983 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
984 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
985 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
986 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
987 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
988 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
989 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
990 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
991 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
992 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
993 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
994 GEN_VEXT_VV(vwaddu_vv_b, 2)
995 GEN_VEXT_VV(vwaddu_vv_h, 4)
996 GEN_VEXT_VV(vwaddu_vv_w, 8)
997 GEN_VEXT_VV(vwsubu_vv_b, 2)
998 GEN_VEXT_VV(vwsubu_vv_h, 4)
999 GEN_VEXT_VV(vwsubu_vv_w, 8)
1000 GEN_VEXT_VV(vwadd_vv_b, 2)
1001 GEN_VEXT_VV(vwadd_vv_h, 4)
1002 GEN_VEXT_VV(vwadd_vv_w, 8)
1003 GEN_VEXT_VV(vwsub_vv_b, 2)
1004 GEN_VEXT_VV(vwsub_vv_h, 4)
1005 GEN_VEXT_VV(vwsub_vv_w, 8)
1006 GEN_VEXT_VV(vwaddu_wv_b, 2)
1007 GEN_VEXT_VV(vwaddu_wv_h, 4)
1008 GEN_VEXT_VV(vwaddu_wv_w, 8)
1009 GEN_VEXT_VV(vwsubu_wv_b, 2)
1010 GEN_VEXT_VV(vwsubu_wv_h, 4)
1011 GEN_VEXT_VV(vwsubu_wv_w, 8)
1012 GEN_VEXT_VV(vwadd_wv_b, 2)
1013 GEN_VEXT_VV(vwadd_wv_h, 4)
1014 GEN_VEXT_VV(vwadd_wv_w, 8)
1015 GEN_VEXT_VV(vwsub_wv_b, 2)
1016 GEN_VEXT_VV(vwsub_wv_h, 4)
1017 GEN_VEXT_VV(vwsub_wv_w, 8)
1019 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
1020 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
1021 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
1022 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
1023 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
1024 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
1025 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
1026 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
1027 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
1028 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
1029 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
1030 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
1031 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
1032 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
1033 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
1034 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
1035 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
1036 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
1037 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
1038 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
1039 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
1040 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
1041 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
1042 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
1043 GEN_VEXT_VX(vwaddu_vx_b, 2)
1044 GEN_VEXT_VX(vwaddu_vx_h, 4)
1045 GEN_VEXT_VX(vwaddu_vx_w, 8)
1046 GEN_VEXT_VX(vwsubu_vx_b, 2)
1047 GEN_VEXT_VX(vwsubu_vx_h, 4)
1048 GEN_VEXT_VX(vwsubu_vx_w, 8)
1049 GEN_VEXT_VX(vwadd_vx_b, 2)
1050 GEN_VEXT_VX(vwadd_vx_h, 4)
1051 GEN_VEXT_VX(vwadd_vx_w, 8)
1052 GEN_VEXT_VX(vwsub_vx_b, 2)
1053 GEN_VEXT_VX(vwsub_vx_h, 4)
1054 GEN_VEXT_VX(vwsub_vx_w, 8)
1055 GEN_VEXT_VX(vwaddu_wx_b, 2)
1056 GEN_VEXT_VX(vwaddu_wx_h, 4)
1057 GEN_VEXT_VX(vwaddu_wx_w, 8)
1058 GEN_VEXT_VX(vwsubu_wx_b, 2)
1059 GEN_VEXT_VX(vwsubu_wx_h, 4)
1060 GEN_VEXT_VX(vwsubu_wx_w, 8)
1061 GEN_VEXT_VX(vwadd_wx_b, 2)
1062 GEN_VEXT_VX(vwadd_wx_h, 4)
1063 GEN_VEXT_VX(vwadd_wx_w, 8)
1064 GEN_VEXT_VX(vwsub_wx_b, 2)
1065 GEN_VEXT_VX(vwsub_wx_h, 4)
1066 GEN_VEXT_VX(vwsub_wx_w, 8)
1068 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
1069 #define DO_VADC(N, M, C) (N + M + C)
1070 #define DO_VSBC(N, M, C) (N - M - C)
1072 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
1073 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1074 CPURISCVState *env, uint32_t desc) \
1076 uint32_t vl = env->vl; \
1077 uint32_t esz = sizeof(ETYPE); \
1078 uint32_t total_elems = \
1079 vext_get_total_elems(env, desc, esz); \
1080 uint32_t vta = vext_vta(desc); \
1081 uint32_t i; \
1083 for (i = env->vstart; i < vl; i++) { \
1084 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1085 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1086 ETYPE carry = vext_elem_mask(v0, i); \
1088 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
1090 env->vstart = 0; \
1091 /* set tail elements to 1s */ \
1092 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1095 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
1096 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
1097 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
1098 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
1100 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
1101 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
1102 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
1103 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
1105 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
1106 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1107 CPURISCVState *env, uint32_t desc) \
1109 uint32_t vl = env->vl; \
1110 uint32_t esz = sizeof(ETYPE); \
1111 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1112 uint32_t vta = vext_vta(desc); \
1113 uint32_t i; \
1115 for (i = env->vstart; i < vl; i++) { \
1116 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1117 ETYPE carry = vext_elem_mask(v0, i); \
1119 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
1121 env->vstart = 0; \
1122 /* set tail elements to 1s */ \
1123 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1126 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
1127 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
1128 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
1129 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
1131 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
1132 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
1133 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
1134 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
1136 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
1137 (__typeof(N))(N + M) < N)
1138 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
1140 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
1141 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1142 CPURISCVState *env, uint32_t desc) \
1144 uint32_t vl = env->vl; \
1145 uint32_t vm = vext_vm(desc); \
1146 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1147 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1148 uint32_t i; \
1150 for (i = env->vstart; i < vl; i++) { \
1151 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1152 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1153 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1154 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
1156 env->vstart = 0; \
1157 /* mask destination register are always tail-agnostic */ \
1158 /* set tail elements to 1s */ \
1159 if (vta_all_1s) { \
1160 for (; i < total_elems; i++) { \
1161 vext_set_elem_mask(vd, i, 1); \
1166 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
1167 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
1168 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
1169 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
1171 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
1172 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
1173 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
1174 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
1176 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
1177 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1178 void *vs2, CPURISCVState *env, uint32_t desc) \
1180 uint32_t vl = env->vl; \
1181 uint32_t vm = vext_vm(desc); \
1182 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1183 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1184 uint32_t i; \
1186 for (i = env->vstart; i < vl; i++) { \
1187 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1188 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1189 vext_set_elem_mask(vd, i, \
1190 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1192 env->vstart = 0; \
1193 /* mask destination register are always tail-agnostic */ \
1194 /* set tail elements to 1s */ \
1195 if (vta_all_1s) { \
1196 for (; i < total_elems; i++) { \
1197 vext_set_elem_mask(vd, i, 1); \
1202 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1203 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1204 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1205 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1207 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1208 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1209 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1210 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1212 /* Vector Bitwise Logical Instructions */
1213 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1214 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1215 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1216 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1217 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1218 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1219 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1220 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1221 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1222 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1223 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1224 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1225 GEN_VEXT_VV(vand_vv_b, 1)
1226 GEN_VEXT_VV(vand_vv_h, 2)
1227 GEN_VEXT_VV(vand_vv_w, 4)
1228 GEN_VEXT_VV(vand_vv_d, 8)
1229 GEN_VEXT_VV(vor_vv_b, 1)
1230 GEN_VEXT_VV(vor_vv_h, 2)
1231 GEN_VEXT_VV(vor_vv_w, 4)
1232 GEN_VEXT_VV(vor_vv_d, 8)
1233 GEN_VEXT_VV(vxor_vv_b, 1)
1234 GEN_VEXT_VV(vxor_vv_h, 2)
1235 GEN_VEXT_VV(vxor_vv_w, 4)
1236 GEN_VEXT_VV(vxor_vv_d, 8)
1238 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1239 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1240 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1241 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1242 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1243 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1244 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1245 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1246 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1247 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1248 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1249 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1250 GEN_VEXT_VX(vand_vx_b, 1)
1251 GEN_VEXT_VX(vand_vx_h, 2)
1252 GEN_VEXT_VX(vand_vx_w, 4)
1253 GEN_VEXT_VX(vand_vx_d, 8)
1254 GEN_VEXT_VX(vor_vx_b, 1)
1255 GEN_VEXT_VX(vor_vx_h, 2)
1256 GEN_VEXT_VX(vor_vx_w, 4)
1257 GEN_VEXT_VX(vor_vx_d, 8)
1258 GEN_VEXT_VX(vxor_vx_b, 1)
1259 GEN_VEXT_VX(vxor_vx_h, 2)
1260 GEN_VEXT_VX(vxor_vx_w, 4)
1261 GEN_VEXT_VX(vxor_vx_d, 8)
1263 /* Vector Single-Width Bit Shift Instructions */
1264 #define DO_SLL(N, M) (N << (M))
1265 #define DO_SRL(N, M) (N >> (M))
1267 /* generate the helpers for shift instructions with two vector operators */
1268 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1269 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1270 void *vs2, CPURISCVState *env, uint32_t desc) \
1272 uint32_t vm = vext_vm(desc); \
1273 uint32_t vl = env->vl; \
1274 uint32_t esz = sizeof(TS1); \
1275 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1276 uint32_t vta = vext_vta(desc); \
1277 uint32_t i; \
1279 for (i = env->vstart; i < vl; i++) { \
1280 if (!vm && !vext_elem_mask(v0, i)) { \
1281 continue; \
1283 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1284 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1285 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1287 env->vstart = 0; \
1288 /* set tail elements to 1s */ \
1289 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1292 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1293 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1294 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1295 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1297 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1298 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1299 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1300 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1302 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1303 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1304 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1305 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1307 /* generate the helpers for shift instructions with one vector and one scalar */
1308 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1309 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1310 void *vs2, CPURISCVState *env, uint32_t desc) \
1312 uint32_t vm = vext_vm(desc); \
1313 uint32_t vl = env->vl; \
1314 uint32_t esz = sizeof(TD); \
1315 uint32_t total_elems = \
1316 vext_get_total_elems(env, desc, esz); \
1317 uint32_t vta = vext_vta(desc); \
1318 uint32_t i; \
1320 for (i = env->vstart; i < vl; i++) { \
1321 if (!vm && !vext_elem_mask(v0, i)) { \
1322 continue; \
1324 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1325 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1327 env->vstart = 0; \
1328 /* set tail elements to 1s */ \
1329 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
1332 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1333 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1334 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1335 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1337 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1338 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1339 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1340 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1342 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1343 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1344 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1345 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1347 /* Vector Narrowing Integer Right Shift Instructions */
1348 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1349 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1350 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1351 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1352 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1353 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1354 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1355 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1356 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1357 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1358 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1359 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1361 /* Vector Integer Comparison Instructions */
1362 #define DO_MSEQ(N, M) (N == M)
1363 #define DO_MSNE(N, M) (N != M)
1364 #define DO_MSLT(N, M) (N < M)
1365 #define DO_MSLE(N, M) (N <= M)
1366 #define DO_MSGT(N, M) (N > M)
1368 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1369 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1370 CPURISCVState *env, uint32_t desc) \
1372 uint32_t vm = vext_vm(desc); \
1373 uint32_t vl = env->vl; \
1374 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1375 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1376 uint32_t i; \
1378 for (i = env->vstart; i < vl; i++) { \
1379 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1380 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1381 if (!vm && !vext_elem_mask(v0, i)) { \
1382 continue; \
1384 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1386 env->vstart = 0; \
1387 /* mask destination register are always tail-agnostic */ \
1388 /* set tail elements to 1s */ \
1389 if (vta_all_1s) { \
1390 for (; i < total_elems; i++) { \
1391 vext_set_elem_mask(vd, i, 1); \
1396 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1397 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1398 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1399 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1401 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1402 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1403 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1404 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1406 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1407 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1408 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1409 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1411 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1412 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1413 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1414 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1416 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1417 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1418 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1419 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1421 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1422 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1423 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1424 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1426 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1427 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1428 CPURISCVState *env, uint32_t desc) \
1430 uint32_t vm = vext_vm(desc); \
1431 uint32_t vl = env->vl; \
1432 uint32_t total_elems = env_archcpu(env)->cfg.vlen; \
1433 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1434 uint32_t i; \
1436 for (i = env->vstart; i < vl; i++) { \
1437 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1438 if (!vm && !vext_elem_mask(v0, i)) { \
1439 continue; \
1441 vext_set_elem_mask(vd, i, \
1442 DO_OP(s2, (ETYPE)(target_long)s1)); \
1444 env->vstart = 0; \
1445 /* mask destination register are always tail-agnostic */ \
1446 /* set tail elements to 1s */ \
1447 if (vta_all_1s) { \
1448 for (; i < total_elems; i++) { \
1449 vext_set_elem_mask(vd, i, 1); \
1454 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1455 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1456 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1457 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1459 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1460 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1461 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1462 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1464 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1465 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1466 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1467 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1469 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1470 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1471 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1472 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1474 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1475 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1476 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1477 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1479 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1480 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1481 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1482 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1484 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1485 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1486 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1487 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1489 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1490 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1491 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1492 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1494 /* Vector Integer Min/Max Instructions */
1495 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1496 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1497 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1498 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1499 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1500 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1501 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1502 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1503 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1504 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1505 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1506 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1507 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1508 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1509 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1510 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1511 GEN_VEXT_VV(vminu_vv_b, 1)
1512 GEN_VEXT_VV(vminu_vv_h, 2)
1513 GEN_VEXT_VV(vminu_vv_w, 4)
1514 GEN_VEXT_VV(vminu_vv_d, 8)
1515 GEN_VEXT_VV(vmin_vv_b, 1)
1516 GEN_VEXT_VV(vmin_vv_h, 2)
1517 GEN_VEXT_VV(vmin_vv_w, 4)
1518 GEN_VEXT_VV(vmin_vv_d, 8)
1519 GEN_VEXT_VV(vmaxu_vv_b, 1)
1520 GEN_VEXT_VV(vmaxu_vv_h, 2)
1521 GEN_VEXT_VV(vmaxu_vv_w, 4)
1522 GEN_VEXT_VV(vmaxu_vv_d, 8)
1523 GEN_VEXT_VV(vmax_vv_b, 1)
1524 GEN_VEXT_VV(vmax_vv_h, 2)
1525 GEN_VEXT_VV(vmax_vv_w, 4)
1526 GEN_VEXT_VV(vmax_vv_d, 8)
1528 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1529 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1530 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1531 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1532 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1533 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1534 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1535 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1536 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1537 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1538 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1539 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1540 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1541 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1542 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1543 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1544 GEN_VEXT_VX(vminu_vx_b, 1)
1545 GEN_VEXT_VX(vminu_vx_h, 2)
1546 GEN_VEXT_VX(vminu_vx_w, 4)
1547 GEN_VEXT_VX(vminu_vx_d, 8)
1548 GEN_VEXT_VX(vmin_vx_b, 1)
1549 GEN_VEXT_VX(vmin_vx_h, 2)
1550 GEN_VEXT_VX(vmin_vx_w, 4)
1551 GEN_VEXT_VX(vmin_vx_d, 8)
1552 GEN_VEXT_VX(vmaxu_vx_b, 1)
1553 GEN_VEXT_VX(vmaxu_vx_h, 2)
1554 GEN_VEXT_VX(vmaxu_vx_w, 4)
1555 GEN_VEXT_VX(vmaxu_vx_d, 8)
1556 GEN_VEXT_VX(vmax_vx_b, 1)
1557 GEN_VEXT_VX(vmax_vx_h, 2)
1558 GEN_VEXT_VX(vmax_vx_w, 4)
1559 GEN_VEXT_VX(vmax_vx_d, 8)
1561 /* Vector Single-Width Integer Multiply Instructions */
1562 #define DO_MUL(N, M) (N * M)
1563 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1564 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1565 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1566 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1567 GEN_VEXT_VV(vmul_vv_b, 1)
1568 GEN_VEXT_VV(vmul_vv_h, 2)
1569 GEN_VEXT_VV(vmul_vv_w, 4)
1570 GEN_VEXT_VV(vmul_vv_d, 8)
1572 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1574 return (int16_t)s2 * (int16_t)s1 >> 8;
1577 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1579 return (int32_t)s2 * (int32_t)s1 >> 16;
1582 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1584 return (int64_t)s2 * (int64_t)s1 >> 32;
1587 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1589 uint64_t hi_64, lo_64;
1591 muls64(&lo_64, &hi_64, s1, s2);
1592 return hi_64;
1595 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1597 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1600 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1602 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1605 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1607 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1610 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1612 uint64_t hi_64, lo_64;
1614 mulu64(&lo_64, &hi_64, s2, s1);
1615 return hi_64;
1618 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1620 return (int16_t)s2 * (uint16_t)s1 >> 8;
1623 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1625 return (int32_t)s2 * (uint32_t)s1 >> 16;
1628 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1630 return (int64_t)s2 * (uint64_t)s1 >> 32;
1634 * Let A = signed operand,
1635 * B = unsigned operand
1636 * P = mulu64(A, B), unsigned product
1638 * LET X = 2 ** 64 - A, 2's complement of A
1639 * SP = signed product
1640 * THEN
1641 * IF A < 0
1642 * SP = -X * B
1643 * = -(2 ** 64 - A) * B
1644 * = A * B - 2 ** 64 * B
1645 * = P - 2 ** 64 * B
1646 * ELSE
1647 * SP = P
1648 * THEN
1649 * HI_P -= (A < 0 ? B : 0)
1652 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1654 uint64_t hi_64, lo_64;
1656 mulu64(&lo_64, &hi_64, s2, s1);
1658 hi_64 -= s2 < 0 ? s1 : 0;
1659 return hi_64;
1662 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1663 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1664 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1665 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1666 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1667 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1668 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1669 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1670 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1671 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1672 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1673 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1674 GEN_VEXT_VV(vmulh_vv_b, 1)
1675 GEN_VEXT_VV(vmulh_vv_h, 2)
1676 GEN_VEXT_VV(vmulh_vv_w, 4)
1677 GEN_VEXT_VV(vmulh_vv_d, 8)
1678 GEN_VEXT_VV(vmulhu_vv_b, 1)
1679 GEN_VEXT_VV(vmulhu_vv_h, 2)
1680 GEN_VEXT_VV(vmulhu_vv_w, 4)
1681 GEN_VEXT_VV(vmulhu_vv_d, 8)
1682 GEN_VEXT_VV(vmulhsu_vv_b, 1)
1683 GEN_VEXT_VV(vmulhsu_vv_h, 2)
1684 GEN_VEXT_VV(vmulhsu_vv_w, 4)
1685 GEN_VEXT_VV(vmulhsu_vv_d, 8)
1687 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1688 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1689 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1690 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1691 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1692 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1693 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1694 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1695 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1696 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1697 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1698 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1699 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1700 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1701 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1702 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1703 GEN_VEXT_VX(vmul_vx_b, 1)
1704 GEN_VEXT_VX(vmul_vx_h, 2)
1705 GEN_VEXT_VX(vmul_vx_w, 4)
1706 GEN_VEXT_VX(vmul_vx_d, 8)
1707 GEN_VEXT_VX(vmulh_vx_b, 1)
1708 GEN_VEXT_VX(vmulh_vx_h, 2)
1709 GEN_VEXT_VX(vmulh_vx_w, 4)
1710 GEN_VEXT_VX(vmulh_vx_d, 8)
1711 GEN_VEXT_VX(vmulhu_vx_b, 1)
1712 GEN_VEXT_VX(vmulhu_vx_h, 2)
1713 GEN_VEXT_VX(vmulhu_vx_w, 4)
1714 GEN_VEXT_VX(vmulhu_vx_d, 8)
1715 GEN_VEXT_VX(vmulhsu_vx_b, 1)
1716 GEN_VEXT_VX(vmulhsu_vx_h, 2)
1717 GEN_VEXT_VX(vmulhsu_vx_w, 4)
1718 GEN_VEXT_VX(vmulhsu_vx_d, 8)
1720 /* Vector Integer Divide Instructions */
1721 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1722 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1723 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) :\
1724 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1725 #define DO_REM(N, M) (unlikely(M == 0) ? N :\
1726 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1728 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1729 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1730 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1731 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1732 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1733 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1734 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1735 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1736 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1737 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1738 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1739 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1740 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1741 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1742 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1743 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1744 GEN_VEXT_VV(vdivu_vv_b, 1)
1745 GEN_VEXT_VV(vdivu_vv_h, 2)
1746 GEN_VEXT_VV(vdivu_vv_w, 4)
1747 GEN_VEXT_VV(vdivu_vv_d, 8)
1748 GEN_VEXT_VV(vdiv_vv_b, 1)
1749 GEN_VEXT_VV(vdiv_vv_h, 2)
1750 GEN_VEXT_VV(vdiv_vv_w, 4)
1751 GEN_VEXT_VV(vdiv_vv_d, 8)
1752 GEN_VEXT_VV(vremu_vv_b, 1)
1753 GEN_VEXT_VV(vremu_vv_h, 2)
1754 GEN_VEXT_VV(vremu_vv_w, 4)
1755 GEN_VEXT_VV(vremu_vv_d, 8)
1756 GEN_VEXT_VV(vrem_vv_b, 1)
1757 GEN_VEXT_VV(vrem_vv_h, 2)
1758 GEN_VEXT_VV(vrem_vv_w, 4)
1759 GEN_VEXT_VV(vrem_vv_d, 8)
1761 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1762 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1763 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1764 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1765 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1766 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1767 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1768 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1769 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1770 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1771 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1772 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1773 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1774 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1775 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1776 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1777 GEN_VEXT_VX(vdivu_vx_b, 1)
1778 GEN_VEXT_VX(vdivu_vx_h, 2)
1779 GEN_VEXT_VX(vdivu_vx_w, 4)
1780 GEN_VEXT_VX(vdivu_vx_d, 8)
1781 GEN_VEXT_VX(vdiv_vx_b, 1)
1782 GEN_VEXT_VX(vdiv_vx_h, 2)
1783 GEN_VEXT_VX(vdiv_vx_w, 4)
1784 GEN_VEXT_VX(vdiv_vx_d, 8)
1785 GEN_VEXT_VX(vremu_vx_b, 1)
1786 GEN_VEXT_VX(vremu_vx_h, 2)
1787 GEN_VEXT_VX(vremu_vx_w, 4)
1788 GEN_VEXT_VX(vremu_vx_d, 8)
1789 GEN_VEXT_VX(vrem_vx_b, 1)
1790 GEN_VEXT_VX(vrem_vx_h, 2)
1791 GEN_VEXT_VX(vrem_vx_w, 4)
1792 GEN_VEXT_VX(vrem_vx_d, 8)
1794 /* Vector Widening Integer Multiply Instructions */
1795 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1796 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1797 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1798 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1799 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1800 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1801 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1802 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1803 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1804 GEN_VEXT_VV(vwmul_vv_b, 2)
1805 GEN_VEXT_VV(vwmul_vv_h, 4)
1806 GEN_VEXT_VV(vwmul_vv_w, 8)
1807 GEN_VEXT_VV(vwmulu_vv_b, 2)
1808 GEN_VEXT_VV(vwmulu_vv_h, 4)
1809 GEN_VEXT_VV(vwmulu_vv_w, 8)
1810 GEN_VEXT_VV(vwmulsu_vv_b, 2)
1811 GEN_VEXT_VV(vwmulsu_vv_h, 4)
1812 GEN_VEXT_VV(vwmulsu_vv_w, 8)
1814 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1815 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1816 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1817 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1818 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1819 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1820 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1821 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1822 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1823 GEN_VEXT_VX(vwmul_vx_b, 2)
1824 GEN_VEXT_VX(vwmul_vx_h, 4)
1825 GEN_VEXT_VX(vwmul_vx_w, 8)
1826 GEN_VEXT_VX(vwmulu_vx_b, 2)
1827 GEN_VEXT_VX(vwmulu_vx_h, 4)
1828 GEN_VEXT_VX(vwmulu_vx_w, 8)
1829 GEN_VEXT_VX(vwmulsu_vx_b, 2)
1830 GEN_VEXT_VX(vwmulsu_vx_h, 4)
1831 GEN_VEXT_VX(vwmulsu_vx_w, 8)
1833 /* Vector Single-Width Integer Multiply-Add Instructions */
1834 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1835 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1837 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1838 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1839 TD d = *((TD *)vd + HD(i)); \
1840 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1843 #define DO_MACC(N, M, D) (M * N + D)
1844 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1845 #define DO_MADD(N, M, D) (M * D + N)
1846 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1847 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1848 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1849 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1850 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1851 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1852 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1853 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1854 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1855 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1856 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1857 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1858 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1859 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1860 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1861 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1862 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1863 GEN_VEXT_VV(vmacc_vv_b, 1)
1864 GEN_VEXT_VV(vmacc_vv_h, 2)
1865 GEN_VEXT_VV(vmacc_vv_w, 4)
1866 GEN_VEXT_VV(vmacc_vv_d, 8)
1867 GEN_VEXT_VV(vnmsac_vv_b, 1)
1868 GEN_VEXT_VV(vnmsac_vv_h, 2)
1869 GEN_VEXT_VV(vnmsac_vv_w, 4)
1870 GEN_VEXT_VV(vnmsac_vv_d, 8)
1871 GEN_VEXT_VV(vmadd_vv_b, 1)
1872 GEN_VEXT_VV(vmadd_vv_h, 2)
1873 GEN_VEXT_VV(vmadd_vv_w, 4)
1874 GEN_VEXT_VV(vmadd_vv_d, 8)
1875 GEN_VEXT_VV(vnmsub_vv_b, 1)
1876 GEN_VEXT_VV(vnmsub_vv_h, 2)
1877 GEN_VEXT_VV(vnmsub_vv_w, 4)
1878 GEN_VEXT_VV(vnmsub_vv_d, 8)
1880 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1881 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1883 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1884 TD d = *((TD *)vd + HD(i)); \
1885 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1888 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1889 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1890 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1891 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1892 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1893 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1894 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1895 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1896 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1897 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1898 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1899 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1900 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1901 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1902 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1903 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1904 GEN_VEXT_VX(vmacc_vx_b, 1)
1905 GEN_VEXT_VX(vmacc_vx_h, 2)
1906 GEN_VEXT_VX(vmacc_vx_w, 4)
1907 GEN_VEXT_VX(vmacc_vx_d, 8)
1908 GEN_VEXT_VX(vnmsac_vx_b, 1)
1909 GEN_VEXT_VX(vnmsac_vx_h, 2)
1910 GEN_VEXT_VX(vnmsac_vx_w, 4)
1911 GEN_VEXT_VX(vnmsac_vx_d, 8)
1912 GEN_VEXT_VX(vmadd_vx_b, 1)
1913 GEN_VEXT_VX(vmadd_vx_h, 2)
1914 GEN_VEXT_VX(vmadd_vx_w, 4)
1915 GEN_VEXT_VX(vmadd_vx_d, 8)
1916 GEN_VEXT_VX(vnmsub_vx_b, 1)
1917 GEN_VEXT_VX(vnmsub_vx_h, 2)
1918 GEN_VEXT_VX(vnmsub_vx_w, 4)
1919 GEN_VEXT_VX(vnmsub_vx_d, 8)
1921 /* Vector Widening Integer Multiply-Add Instructions */
1922 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1923 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1924 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1925 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1926 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1927 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1928 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1929 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1930 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1931 GEN_VEXT_VV(vwmaccu_vv_b, 2)
1932 GEN_VEXT_VV(vwmaccu_vv_h, 4)
1933 GEN_VEXT_VV(vwmaccu_vv_w, 8)
1934 GEN_VEXT_VV(vwmacc_vv_b, 2)
1935 GEN_VEXT_VV(vwmacc_vv_h, 4)
1936 GEN_VEXT_VV(vwmacc_vv_w, 8)
1937 GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1938 GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1939 GEN_VEXT_VV(vwmaccsu_vv_w, 8)
1941 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1942 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1943 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1944 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1945 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1946 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1947 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1948 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1949 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1950 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1951 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1952 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1953 GEN_VEXT_VX(vwmaccu_vx_b, 2)
1954 GEN_VEXT_VX(vwmaccu_vx_h, 4)
1955 GEN_VEXT_VX(vwmaccu_vx_w, 8)
1956 GEN_VEXT_VX(vwmacc_vx_b, 2)
1957 GEN_VEXT_VX(vwmacc_vx_h, 4)
1958 GEN_VEXT_VX(vwmacc_vx_w, 8)
1959 GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1960 GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1961 GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1962 GEN_VEXT_VX(vwmaccus_vx_b, 2)
1963 GEN_VEXT_VX(vwmaccus_vx_h, 4)
1964 GEN_VEXT_VX(vwmaccus_vx_w, 8)
1966 /* Vector Integer Merge and Move Instructions */
1967 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
1968 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1969 uint32_t desc) \
1971 uint32_t vl = env->vl; \
1972 uint32_t esz = sizeof(ETYPE); \
1973 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1974 uint32_t vta = vext_vta(desc); \
1975 uint32_t i; \
1977 for (i = env->vstart; i < vl; i++) { \
1978 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1979 *((ETYPE *)vd + H(i)) = s1; \
1981 env->vstart = 0; \
1982 /* set tail elements to 1s */ \
1983 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1986 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1987 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1988 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1989 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1991 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
1992 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1993 uint32_t desc) \
1995 uint32_t vl = env->vl; \
1996 uint32_t esz = sizeof(ETYPE); \
1997 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1998 uint32_t vta = vext_vta(desc); \
1999 uint32_t i; \
2001 for (i = env->vstart; i < vl; i++) { \
2002 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
2004 env->vstart = 0; \
2005 /* set tail elements to 1s */ \
2006 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2009 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
2010 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
2011 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
2012 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
2014 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
2015 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2016 CPURISCVState *env, uint32_t desc) \
2018 uint32_t vl = env->vl; \
2019 uint32_t esz = sizeof(ETYPE); \
2020 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2021 uint32_t vta = vext_vta(desc); \
2022 uint32_t i; \
2024 for (i = env->vstart; i < vl; i++) { \
2025 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
2026 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
2028 env->vstart = 0; \
2029 /* set tail elements to 1s */ \
2030 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2033 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
2034 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
2035 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
2036 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
2038 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
2039 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2040 void *vs2, CPURISCVState *env, uint32_t desc) \
2042 uint32_t vl = env->vl; \
2043 uint32_t esz = sizeof(ETYPE); \
2044 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
2045 uint32_t vta = vext_vta(desc); \
2046 uint32_t i; \
2048 for (i = env->vstart; i < vl; i++) { \
2049 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
2050 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
2051 (ETYPE)(target_long)s1); \
2052 *((ETYPE *)vd + H(i)) = d; \
2054 env->vstart = 0; \
2055 /* set tail elements to 1s */ \
2056 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
2059 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
2060 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
2061 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
2062 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
2065 *** Vector Fixed-Point Arithmetic Instructions
2068 /* Vector Single-Width Saturating Add and Subtract */
2071 * As fixed point instructions probably have round mode and saturation,
2072 * define common macros for fixed point here.
2074 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
2075 CPURISCVState *env, int vxrm);
2077 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2078 static inline void \
2079 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2080 CPURISCVState *env, int vxrm) \
2082 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2083 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2084 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
2087 static inline void
2088 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
2089 CPURISCVState *env,
2090 uint32_t vl, uint32_t vm, int vxrm,
2091 opivv2_rm_fn *fn)
2093 for (uint32_t i = env->vstart; i < vl; i++) {
2094 if (!vm && !vext_elem_mask(v0, i)) {
2095 continue;
2097 fn(vd, vs1, vs2, i, env, vxrm);
2099 env->vstart = 0;
2102 static inline void
2103 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
2104 CPURISCVState *env,
2105 uint32_t desc,
2106 opivv2_rm_fn *fn)
2108 uint32_t vm = vext_vm(desc);
2109 uint32_t vl = env->vl;
2111 switch (env->vxrm) {
2112 case 0: /* rnu */
2113 vext_vv_rm_1(vd, v0, vs1, vs2,
2114 env, vl, vm, 0, fn);
2115 break;
2116 case 1: /* rne */
2117 vext_vv_rm_1(vd, v0, vs1, vs2,
2118 env, vl, vm, 1, fn);
2119 break;
2120 case 2: /* rdn */
2121 vext_vv_rm_1(vd, v0, vs1, vs2,
2122 env, vl, vm, 2, fn);
2123 break;
2124 default: /* rod */
2125 vext_vv_rm_1(vd, v0, vs1, vs2,
2126 env, vl, vm, 3, fn);
2127 break;
2131 /* generate helpers for fixed point instructions with OPIVV format */
2132 #define GEN_VEXT_VV_RM(NAME) \
2133 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2134 CPURISCVState *env, uint32_t desc) \
2136 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2137 do_##NAME); \
2140 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2142 uint8_t res = a + b;
2143 if (res < a) {
2144 res = UINT8_MAX;
2145 env->vxsat = 0x1;
2147 return res;
2150 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2151 uint16_t b)
2153 uint16_t res = a + b;
2154 if (res < a) {
2155 res = UINT16_MAX;
2156 env->vxsat = 0x1;
2158 return res;
2161 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2162 uint32_t b)
2164 uint32_t res = a + b;
2165 if (res < a) {
2166 res = UINT32_MAX;
2167 env->vxsat = 0x1;
2169 return res;
2172 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2173 uint64_t b)
2175 uint64_t res = a + b;
2176 if (res < a) {
2177 res = UINT64_MAX;
2178 env->vxsat = 0x1;
2180 return res;
2183 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2184 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2185 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2186 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2187 GEN_VEXT_VV_RM(vsaddu_vv_b)
2188 GEN_VEXT_VV_RM(vsaddu_vv_h)
2189 GEN_VEXT_VV_RM(vsaddu_vv_w)
2190 GEN_VEXT_VV_RM(vsaddu_vv_d)
2192 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2193 CPURISCVState *env, int vxrm);
2195 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2196 static inline void \
2197 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2198 CPURISCVState *env, int vxrm) \
2200 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2201 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2204 static inline void
2205 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2206 CPURISCVState *env,
2207 uint32_t vl, uint32_t vm, int vxrm,
2208 opivx2_rm_fn *fn)
2210 for (uint32_t i = env->vstart; i < vl; i++) {
2211 if (!vm && !vext_elem_mask(v0, i)) {
2212 continue;
2214 fn(vd, s1, vs2, i, env, vxrm);
2216 env->vstart = 0;
2219 static inline void
2220 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2221 CPURISCVState *env,
2222 uint32_t desc,
2223 opivx2_rm_fn *fn)
2225 uint32_t vm = vext_vm(desc);
2226 uint32_t vl = env->vl;
2228 switch (env->vxrm) {
2229 case 0: /* rnu */
2230 vext_vx_rm_1(vd, v0, s1, vs2,
2231 env, vl, vm, 0, fn);
2232 break;
2233 case 1: /* rne */
2234 vext_vx_rm_1(vd, v0, s1, vs2,
2235 env, vl, vm, 1, fn);
2236 break;
2237 case 2: /* rdn */
2238 vext_vx_rm_1(vd, v0, s1, vs2,
2239 env, vl, vm, 2, fn);
2240 break;
2241 default: /* rod */
2242 vext_vx_rm_1(vd, v0, s1, vs2,
2243 env, vl, vm, 3, fn);
2244 break;
2248 /* generate helpers for fixed point instructions with OPIVX format */
2249 #define GEN_VEXT_VX_RM(NAME) \
2250 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2251 void *vs2, CPURISCVState *env, uint32_t desc) \
2253 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2254 do_##NAME); \
2257 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2258 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2259 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2260 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2261 GEN_VEXT_VX_RM(vsaddu_vx_b)
2262 GEN_VEXT_VX_RM(vsaddu_vx_h)
2263 GEN_VEXT_VX_RM(vsaddu_vx_w)
2264 GEN_VEXT_VX_RM(vsaddu_vx_d)
2266 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2268 int8_t res = a + b;
2269 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2270 res = a > 0 ? INT8_MAX : INT8_MIN;
2271 env->vxsat = 0x1;
2273 return res;
2276 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2278 int16_t res = a + b;
2279 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2280 res = a > 0 ? INT16_MAX : INT16_MIN;
2281 env->vxsat = 0x1;
2283 return res;
2286 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2288 int32_t res = a + b;
2289 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2290 res = a > 0 ? INT32_MAX : INT32_MIN;
2291 env->vxsat = 0x1;
2293 return res;
2296 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2298 int64_t res = a + b;
2299 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2300 res = a > 0 ? INT64_MAX : INT64_MIN;
2301 env->vxsat = 0x1;
2303 return res;
2306 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2307 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2308 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2309 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2310 GEN_VEXT_VV_RM(vsadd_vv_b)
2311 GEN_VEXT_VV_RM(vsadd_vv_h)
2312 GEN_VEXT_VV_RM(vsadd_vv_w)
2313 GEN_VEXT_VV_RM(vsadd_vv_d)
2315 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2316 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2317 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2318 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2319 GEN_VEXT_VX_RM(vsadd_vx_b)
2320 GEN_VEXT_VX_RM(vsadd_vx_h)
2321 GEN_VEXT_VX_RM(vsadd_vx_w)
2322 GEN_VEXT_VX_RM(vsadd_vx_d)
2324 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2326 uint8_t res = a - b;
2327 if (res > a) {
2328 res = 0;
2329 env->vxsat = 0x1;
2331 return res;
2334 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2335 uint16_t b)
2337 uint16_t res = a - b;
2338 if (res > a) {
2339 res = 0;
2340 env->vxsat = 0x1;
2342 return res;
2345 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2346 uint32_t b)
2348 uint32_t res = a - b;
2349 if (res > a) {
2350 res = 0;
2351 env->vxsat = 0x1;
2353 return res;
2356 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2357 uint64_t b)
2359 uint64_t res = a - b;
2360 if (res > a) {
2361 res = 0;
2362 env->vxsat = 0x1;
2364 return res;
2367 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2368 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2369 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2370 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2371 GEN_VEXT_VV_RM(vssubu_vv_b)
2372 GEN_VEXT_VV_RM(vssubu_vv_h)
2373 GEN_VEXT_VV_RM(vssubu_vv_w)
2374 GEN_VEXT_VV_RM(vssubu_vv_d)
2376 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2377 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2378 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2379 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2380 GEN_VEXT_VX_RM(vssubu_vx_b)
2381 GEN_VEXT_VX_RM(vssubu_vx_h)
2382 GEN_VEXT_VX_RM(vssubu_vx_w)
2383 GEN_VEXT_VX_RM(vssubu_vx_d)
2385 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2387 int8_t res = a - b;
2388 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2389 res = a >= 0 ? INT8_MAX : INT8_MIN;
2390 env->vxsat = 0x1;
2392 return res;
2395 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2397 int16_t res = a - b;
2398 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2399 res = a >= 0 ? INT16_MAX : INT16_MIN;
2400 env->vxsat = 0x1;
2402 return res;
2405 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2407 int32_t res = a - b;
2408 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2409 res = a >= 0 ? INT32_MAX : INT32_MIN;
2410 env->vxsat = 0x1;
2412 return res;
2415 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2417 int64_t res = a - b;
2418 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2419 res = a >= 0 ? INT64_MAX : INT64_MIN;
2420 env->vxsat = 0x1;
2422 return res;
2425 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2426 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2427 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2428 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2429 GEN_VEXT_VV_RM(vssub_vv_b)
2430 GEN_VEXT_VV_RM(vssub_vv_h)
2431 GEN_VEXT_VV_RM(vssub_vv_w)
2432 GEN_VEXT_VV_RM(vssub_vv_d)
2434 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2435 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2436 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2437 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2438 GEN_VEXT_VX_RM(vssub_vx_b)
2439 GEN_VEXT_VX_RM(vssub_vx_h)
2440 GEN_VEXT_VX_RM(vssub_vx_w)
2441 GEN_VEXT_VX_RM(vssub_vx_d)
2443 /* Vector Single-Width Averaging Add and Subtract */
2444 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2446 uint8_t d = extract64(v, shift, 1);
2447 uint8_t d1;
2448 uint64_t D1, D2;
2450 if (shift == 0 || shift > 64) {
2451 return 0;
2454 d1 = extract64(v, shift - 1, 1);
2455 D1 = extract64(v, 0, shift);
2456 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2457 return d1;
2458 } else if (vxrm == 1) { /* round-to-nearest-even */
2459 if (shift > 1) {
2460 D2 = extract64(v, 0, shift - 1);
2461 return d1 & ((D2 != 0) | d);
2462 } else {
2463 return d1 & d;
2465 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2466 return !d & (D1 != 0);
2468 return 0; /* round-down (truncate) */
2471 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2473 int64_t res = (int64_t)a + b;
2474 uint8_t round = get_round(vxrm, res, 1);
2476 return (res >> 1) + round;
2479 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2481 int64_t res = a + b;
2482 uint8_t round = get_round(vxrm, res, 1);
2483 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2485 /* With signed overflow, bit 64 is inverse of bit 63. */
2486 return ((res >> 1) ^ over) + round;
2489 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2490 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2491 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2492 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2493 GEN_VEXT_VV_RM(vaadd_vv_b)
2494 GEN_VEXT_VV_RM(vaadd_vv_h)
2495 GEN_VEXT_VV_RM(vaadd_vv_w)
2496 GEN_VEXT_VV_RM(vaadd_vv_d)
2498 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2499 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2500 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2501 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2502 GEN_VEXT_VX_RM(vaadd_vx_b)
2503 GEN_VEXT_VX_RM(vaadd_vx_h)
2504 GEN_VEXT_VX_RM(vaadd_vx_w)
2505 GEN_VEXT_VX_RM(vaadd_vx_d)
2507 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2508 uint32_t a, uint32_t b)
2510 uint64_t res = (uint64_t)a + b;
2511 uint8_t round = get_round(vxrm, res, 1);
2513 return (res >> 1) + round;
2516 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2517 uint64_t a, uint64_t b)
2519 uint64_t res = a + b;
2520 uint8_t round = get_round(vxrm, res, 1);
2521 uint64_t over = (uint64_t)(res < a) << 63;
2523 return ((res >> 1) | over) + round;
2526 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2527 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2528 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2529 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2530 GEN_VEXT_VV_RM(vaaddu_vv_b)
2531 GEN_VEXT_VV_RM(vaaddu_vv_h)
2532 GEN_VEXT_VV_RM(vaaddu_vv_w)
2533 GEN_VEXT_VV_RM(vaaddu_vv_d)
2535 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2536 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2537 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2538 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2539 GEN_VEXT_VX_RM(vaaddu_vx_b)
2540 GEN_VEXT_VX_RM(vaaddu_vx_h)
2541 GEN_VEXT_VX_RM(vaaddu_vx_w)
2542 GEN_VEXT_VX_RM(vaaddu_vx_d)
2544 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2546 int64_t res = (int64_t)a - b;
2547 uint8_t round = get_round(vxrm, res, 1);
2549 return (res >> 1) + round;
2552 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2554 int64_t res = (int64_t)a - b;
2555 uint8_t round = get_round(vxrm, res, 1);
2556 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2558 /* With signed overflow, bit 64 is inverse of bit 63. */
2559 return ((res >> 1) ^ over) + round;
2562 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2563 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2564 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2565 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2566 GEN_VEXT_VV_RM(vasub_vv_b)
2567 GEN_VEXT_VV_RM(vasub_vv_h)
2568 GEN_VEXT_VV_RM(vasub_vv_w)
2569 GEN_VEXT_VV_RM(vasub_vv_d)
2571 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2572 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2573 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2574 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2575 GEN_VEXT_VX_RM(vasub_vx_b)
2576 GEN_VEXT_VX_RM(vasub_vx_h)
2577 GEN_VEXT_VX_RM(vasub_vx_w)
2578 GEN_VEXT_VX_RM(vasub_vx_d)
2580 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2581 uint32_t a, uint32_t b)
2583 int64_t res = (int64_t)a - b;
2584 uint8_t round = get_round(vxrm, res, 1);
2586 return (res >> 1) + round;
2589 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2590 uint64_t a, uint64_t b)
2592 uint64_t res = (uint64_t)a - b;
2593 uint8_t round = get_round(vxrm, res, 1);
2594 uint64_t over = (uint64_t)(res > a) << 63;
2596 return ((res >> 1) | over) + round;
2599 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2600 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2601 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2602 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2603 GEN_VEXT_VV_RM(vasubu_vv_b)
2604 GEN_VEXT_VV_RM(vasubu_vv_h)
2605 GEN_VEXT_VV_RM(vasubu_vv_w)
2606 GEN_VEXT_VV_RM(vasubu_vv_d)
2608 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2609 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2610 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2611 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2612 GEN_VEXT_VX_RM(vasubu_vx_b)
2613 GEN_VEXT_VX_RM(vasubu_vx_h)
2614 GEN_VEXT_VX_RM(vasubu_vx_w)
2615 GEN_VEXT_VX_RM(vasubu_vx_d)
2617 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2618 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2620 uint8_t round;
2621 int16_t res;
2623 res = (int16_t)a * (int16_t)b;
2624 round = get_round(vxrm, res, 7);
2625 res = (res >> 7) + round;
2627 if (res > INT8_MAX) {
2628 env->vxsat = 0x1;
2629 return INT8_MAX;
2630 } else if (res < INT8_MIN) {
2631 env->vxsat = 0x1;
2632 return INT8_MIN;
2633 } else {
2634 return res;
2638 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2640 uint8_t round;
2641 int32_t res;
2643 res = (int32_t)a * (int32_t)b;
2644 round = get_round(vxrm, res, 15);
2645 res = (res >> 15) + round;
2647 if (res > INT16_MAX) {
2648 env->vxsat = 0x1;
2649 return INT16_MAX;
2650 } else if (res < INT16_MIN) {
2651 env->vxsat = 0x1;
2652 return INT16_MIN;
2653 } else {
2654 return res;
2658 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2660 uint8_t round;
2661 int64_t res;
2663 res = (int64_t)a * (int64_t)b;
2664 round = get_round(vxrm, res, 31);
2665 res = (res >> 31) + round;
2667 if (res > INT32_MAX) {
2668 env->vxsat = 0x1;
2669 return INT32_MAX;
2670 } else if (res < INT32_MIN) {
2671 env->vxsat = 0x1;
2672 return INT32_MIN;
2673 } else {
2674 return res;
2678 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2680 uint8_t round;
2681 uint64_t hi_64, lo_64;
2682 int64_t res;
2684 if (a == INT64_MIN && b == INT64_MIN) {
2685 env->vxsat = 1;
2686 return INT64_MAX;
2689 muls64(&lo_64, &hi_64, a, b);
2690 round = get_round(vxrm, lo_64, 63);
2692 * Cannot overflow, as there are always
2693 * 2 sign bits after multiply.
2695 res = (hi_64 << 1) | (lo_64 >> 63);
2696 if (round) {
2697 if (res == INT64_MAX) {
2698 env->vxsat = 1;
2699 } else {
2700 res += 1;
2703 return res;
2706 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2707 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2708 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2709 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2710 GEN_VEXT_VV_RM(vsmul_vv_b)
2711 GEN_VEXT_VV_RM(vsmul_vv_h)
2712 GEN_VEXT_VV_RM(vsmul_vv_w)
2713 GEN_VEXT_VV_RM(vsmul_vv_d)
2715 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2716 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2717 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2718 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2719 GEN_VEXT_VX_RM(vsmul_vx_b)
2720 GEN_VEXT_VX_RM(vsmul_vx_h)
2721 GEN_VEXT_VX_RM(vsmul_vx_w)
2722 GEN_VEXT_VX_RM(vsmul_vx_d)
2724 /* Vector Single-Width Scaling Shift Instructions */
2725 static inline uint8_t
2726 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2728 uint8_t round, shift = b & 0x7;
2729 uint8_t res;
2731 round = get_round(vxrm, a, shift);
2732 res = (a >> shift) + round;
2733 return res;
2735 static inline uint16_t
2736 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2738 uint8_t round, shift = b & 0xf;
2739 uint16_t res;
2741 round = get_round(vxrm, a, shift);
2742 res = (a >> shift) + round;
2743 return res;
2745 static inline uint32_t
2746 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2748 uint8_t round, shift = b & 0x1f;
2749 uint32_t res;
2751 round = get_round(vxrm, a, shift);
2752 res = (a >> shift) + round;
2753 return res;
2755 static inline uint64_t
2756 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2758 uint8_t round, shift = b & 0x3f;
2759 uint64_t res;
2761 round = get_round(vxrm, a, shift);
2762 res = (a >> shift) + round;
2763 return res;
2765 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2766 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2767 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2768 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2769 GEN_VEXT_VV_RM(vssrl_vv_b)
2770 GEN_VEXT_VV_RM(vssrl_vv_h)
2771 GEN_VEXT_VV_RM(vssrl_vv_w)
2772 GEN_VEXT_VV_RM(vssrl_vv_d)
2774 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2775 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2776 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2777 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2778 GEN_VEXT_VX_RM(vssrl_vx_b)
2779 GEN_VEXT_VX_RM(vssrl_vx_h)
2780 GEN_VEXT_VX_RM(vssrl_vx_w)
2781 GEN_VEXT_VX_RM(vssrl_vx_d)
2783 static inline int8_t
2784 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2786 uint8_t round, shift = b & 0x7;
2787 int8_t res;
2789 round = get_round(vxrm, a, shift);
2790 res = (a >> shift) + round;
2791 return res;
2793 static inline int16_t
2794 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2796 uint8_t round, shift = b & 0xf;
2797 int16_t res;
2799 round = get_round(vxrm, a, shift);
2800 res = (a >> shift) + round;
2801 return res;
2803 static inline int32_t
2804 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2806 uint8_t round, shift = b & 0x1f;
2807 int32_t res;
2809 round = get_round(vxrm, a, shift);
2810 res = (a >> shift) + round;
2811 return res;
2813 static inline int64_t
2814 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2816 uint8_t round, shift = b & 0x3f;
2817 int64_t res;
2819 round = get_round(vxrm, a, shift);
2820 res = (a >> shift) + round;
2821 return res;
2824 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2825 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2826 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2827 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2828 GEN_VEXT_VV_RM(vssra_vv_b)
2829 GEN_VEXT_VV_RM(vssra_vv_h)
2830 GEN_VEXT_VV_RM(vssra_vv_w)
2831 GEN_VEXT_VV_RM(vssra_vv_d)
2833 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2834 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2835 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2836 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2837 GEN_VEXT_VX_RM(vssra_vx_b)
2838 GEN_VEXT_VX_RM(vssra_vx_h)
2839 GEN_VEXT_VX_RM(vssra_vx_w)
2840 GEN_VEXT_VX_RM(vssra_vx_d)
2842 /* Vector Narrowing Fixed-Point Clip Instructions */
2843 static inline int8_t
2844 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2846 uint8_t round, shift = b & 0xf;
2847 int16_t res;
2849 round = get_round(vxrm, a, shift);
2850 res = (a >> shift) + round;
2851 if (res > INT8_MAX) {
2852 env->vxsat = 0x1;
2853 return INT8_MAX;
2854 } else if (res < INT8_MIN) {
2855 env->vxsat = 0x1;
2856 return INT8_MIN;
2857 } else {
2858 return res;
2862 static inline int16_t
2863 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2865 uint8_t round, shift = b & 0x1f;
2866 int32_t res;
2868 round = get_round(vxrm, a, shift);
2869 res = (a >> shift) + round;
2870 if (res > INT16_MAX) {
2871 env->vxsat = 0x1;
2872 return INT16_MAX;
2873 } else if (res < INT16_MIN) {
2874 env->vxsat = 0x1;
2875 return INT16_MIN;
2876 } else {
2877 return res;
2881 static inline int32_t
2882 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2884 uint8_t round, shift = b & 0x3f;
2885 int64_t res;
2887 round = get_round(vxrm, a, shift);
2888 res = (a >> shift) + round;
2889 if (res > INT32_MAX) {
2890 env->vxsat = 0x1;
2891 return INT32_MAX;
2892 } else if (res < INT32_MIN) {
2893 env->vxsat = 0x1;
2894 return INT32_MIN;
2895 } else {
2896 return res;
2900 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2901 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2902 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2903 GEN_VEXT_VV_RM(vnclip_wv_b)
2904 GEN_VEXT_VV_RM(vnclip_wv_h)
2905 GEN_VEXT_VV_RM(vnclip_wv_w)
2907 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2908 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2909 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2910 GEN_VEXT_VX_RM(vnclip_wx_b)
2911 GEN_VEXT_VX_RM(vnclip_wx_h)
2912 GEN_VEXT_VX_RM(vnclip_wx_w)
2914 static inline uint8_t
2915 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2917 uint8_t round, shift = b & 0xf;
2918 uint16_t res;
2920 round = get_round(vxrm, a, shift);
2921 res = (a >> shift) + round;
2922 if (res > UINT8_MAX) {
2923 env->vxsat = 0x1;
2924 return UINT8_MAX;
2925 } else {
2926 return res;
2930 static inline uint16_t
2931 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2933 uint8_t round, shift = b & 0x1f;
2934 uint32_t res;
2936 round = get_round(vxrm, a, shift);
2937 res = (a >> shift) + round;
2938 if (res > UINT16_MAX) {
2939 env->vxsat = 0x1;
2940 return UINT16_MAX;
2941 } else {
2942 return res;
2946 static inline uint32_t
2947 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2949 uint8_t round, shift = b & 0x3f;
2950 uint64_t res;
2952 round = get_round(vxrm, a, shift);
2953 res = (a >> shift) + round;
2954 if (res > UINT32_MAX) {
2955 env->vxsat = 0x1;
2956 return UINT32_MAX;
2957 } else {
2958 return res;
2962 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2963 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2964 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2965 GEN_VEXT_VV_RM(vnclipu_wv_b)
2966 GEN_VEXT_VV_RM(vnclipu_wv_h)
2967 GEN_VEXT_VV_RM(vnclipu_wv_w)
2969 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2970 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2971 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2972 GEN_VEXT_VX_RM(vnclipu_wx_b)
2973 GEN_VEXT_VX_RM(vnclipu_wx_h)
2974 GEN_VEXT_VX_RM(vnclipu_wx_w)
2977 *** Vector Float Point Arithmetic Instructions
2979 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2980 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2981 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2982 CPURISCVState *env) \
2984 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2985 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2986 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2989 #define GEN_VEXT_VV_ENV(NAME) \
2990 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2991 void *vs2, CPURISCVState *env, \
2992 uint32_t desc) \
2994 uint32_t vm = vext_vm(desc); \
2995 uint32_t vl = env->vl; \
2996 uint32_t i; \
2998 for (i = env->vstart; i < vl; i++) { \
2999 if (!vm && !vext_elem_mask(v0, i)) { \
3000 continue; \
3002 do_##NAME(vd, vs1, vs2, i, env); \
3004 env->vstart = 0; \
3007 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
3008 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
3009 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
3010 GEN_VEXT_VV_ENV(vfadd_vv_h)
3011 GEN_VEXT_VV_ENV(vfadd_vv_w)
3012 GEN_VEXT_VV_ENV(vfadd_vv_d)
3014 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3015 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3016 CPURISCVState *env) \
3018 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3019 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
3022 #define GEN_VEXT_VF(NAME) \
3023 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
3024 void *vs2, CPURISCVState *env, \
3025 uint32_t desc) \
3027 uint32_t vm = vext_vm(desc); \
3028 uint32_t vl = env->vl; \
3029 uint32_t i; \
3031 for (i = env->vstart; i < vl; i++) { \
3032 if (!vm && !vext_elem_mask(v0, i)) { \
3033 continue; \
3035 do_##NAME(vd, s1, vs2, i, env); \
3037 env->vstart = 0; \
3040 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
3041 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
3042 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
3043 GEN_VEXT_VF(vfadd_vf_h)
3044 GEN_VEXT_VF(vfadd_vf_w)
3045 GEN_VEXT_VF(vfadd_vf_d)
3047 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
3048 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
3049 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
3050 GEN_VEXT_VV_ENV(vfsub_vv_h)
3051 GEN_VEXT_VV_ENV(vfsub_vv_w)
3052 GEN_VEXT_VV_ENV(vfsub_vv_d)
3053 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
3054 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
3055 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
3056 GEN_VEXT_VF(vfsub_vf_h)
3057 GEN_VEXT_VF(vfsub_vf_w)
3058 GEN_VEXT_VF(vfsub_vf_d)
3060 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
3062 return float16_sub(b, a, s);
3065 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
3067 return float32_sub(b, a, s);
3070 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
3072 return float64_sub(b, a, s);
3075 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
3076 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
3077 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
3078 GEN_VEXT_VF(vfrsub_vf_h)
3079 GEN_VEXT_VF(vfrsub_vf_w)
3080 GEN_VEXT_VF(vfrsub_vf_d)
3082 /* Vector Widening Floating-Point Add/Subtract Instructions */
3083 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
3085 return float32_add(float16_to_float32(a, true, s),
3086 float16_to_float32(b, true, s), s);
3089 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
3091 return float64_add(float32_to_float64(a, s),
3092 float32_to_float64(b, s), s);
3096 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3097 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3098 GEN_VEXT_VV_ENV(vfwadd_vv_h)
3099 GEN_VEXT_VV_ENV(vfwadd_vv_w)
3100 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3101 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3102 GEN_VEXT_VF(vfwadd_vf_h)
3103 GEN_VEXT_VF(vfwadd_vf_w)
3105 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3107 return float32_sub(float16_to_float32(a, true, s),
3108 float16_to_float32(b, true, s), s);
3111 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3113 return float64_sub(float32_to_float64(a, s),
3114 float32_to_float64(b, s), s);
3118 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3119 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3120 GEN_VEXT_VV_ENV(vfwsub_vv_h)
3121 GEN_VEXT_VV_ENV(vfwsub_vv_w)
3122 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3123 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3124 GEN_VEXT_VF(vfwsub_vf_h)
3125 GEN_VEXT_VF(vfwsub_vf_w)
3127 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3129 return float32_add(a, float16_to_float32(b, true, s), s);
3132 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3134 return float64_add(a, float32_to_float64(b, s), s);
3137 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3138 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3139 GEN_VEXT_VV_ENV(vfwadd_wv_h)
3140 GEN_VEXT_VV_ENV(vfwadd_wv_w)
3141 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3142 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3143 GEN_VEXT_VF(vfwadd_wf_h)
3144 GEN_VEXT_VF(vfwadd_wf_w)
3146 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3148 return float32_sub(a, float16_to_float32(b, true, s), s);
3151 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3153 return float64_sub(a, float32_to_float64(b, s), s);
3156 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3157 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3158 GEN_VEXT_VV_ENV(vfwsub_wv_h)
3159 GEN_VEXT_VV_ENV(vfwsub_wv_w)
3160 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3161 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3162 GEN_VEXT_VF(vfwsub_wf_h)
3163 GEN_VEXT_VF(vfwsub_wf_w)
3165 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3166 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3167 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3168 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3169 GEN_VEXT_VV_ENV(vfmul_vv_h)
3170 GEN_VEXT_VV_ENV(vfmul_vv_w)
3171 GEN_VEXT_VV_ENV(vfmul_vv_d)
3172 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3173 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3174 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3175 GEN_VEXT_VF(vfmul_vf_h)
3176 GEN_VEXT_VF(vfmul_vf_w)
3177 GEN_VEXT_VF(vfmul_vf_d)
3179 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3180 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3181 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3182 GEN_VEXT_VV_ENV(vfdiv_vv_h)
3183 GEN_VEXT_VV_ENV(vfdiv_vv_w)
3184 GEN_VEXT_VV_ENV(vfdiv_vv_d)
3185 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3186 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3187 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3188 GEN_VEXT_VF(vfdiv_vf_h)
3189 GEN_VEXT_VF(vfdiv_vf_w)
3190 GEN_VEXT_VF(vfdiv_vf_d)
3192 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3194 return float16_div(b, a, s);
3197 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3199 return float32_div(b, a, s);
3202 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3204 return float64_div(b, a, s);
3207 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3208 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3209 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3210 GEN_VEXT_VF(vfrdiv_vf_h)
3211 GEN_VEXT_VF(vfrdiv_vf_w)
3212 GEN_VEXT_VF(vfrdiv_vf_d)
3214 /* Vector Widening Floating-Point Multiply */
3215 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3217 return float32_mul(float16_to_float32(a, true, s),
3218 float16_to_float32(b, true, s), s);
3221 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3223 return float64_mul(float32_to_float64(a, s),
3224 float32_to_float64(b, s), s);
3227 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3228 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3229 GEN_VEXT_VV_ENV(vfwmul_vv_h)
3230 GEN_VEXT_VV_ENV(vfwmul_vv_w)
3231 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3232 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3233 GEN_VEXT_VF(vfwmul_vf_h)
3234 GEN_VEXT_VF(vfwmul_vf_w)
3236 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3237 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3238 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3239 CPURISCVState *env) \
3241 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3242 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3243 TD d = *((TD *)vd + HD(i)); \
3244 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3247 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3249 return float16_muladd(a, b, d, 0, s);
3252 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3254 return float32_muladd(a, b, d, 0, s);
3257 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3259 return float64_muladd(a, b, d, 0, s);
3262 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3263 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3264 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3265 GEN_VEXT_VV_ENV(vfmacc_vv_h)
3266 GEN_VEXT_VV_ENV(vfmacc_vv_w)
3267 GEN_VEXT_VV_ENV(vfmacc_vv_d)
3269 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3270 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3271 CPURISCVState *env) \
3273 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3274 TD d = *((TD *)vd + HD(i)); \
3275 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3278 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3279 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3280 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3281 GEN_VEXT_VF(vfmacc_vf_h)
3282 GEN_VEXT_VF(vfmacc_vf_w)
3283 GEN_VEXT_VF(vfmacc_vf_d)
3285 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3287 return float16_muladd(a, b, d,
3288 float_muladd_negate_c | float_muladd_negate_product, s);
3291 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3293 return float32_muladd(a, b, d,
3294 float_muladd_negate_c | float_muladd_negate_product, s);
3297 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3299 return float64_muladd(a, b, d,
3300 float_muladd_negate_c | float_muladd_negate_product, s);
3303 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3304 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3305 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3306 GEN_VEXT_VV_ENV(vfnmacc_vv_h)
3307 GEN_VEXT_VV_ENV(vfnmacc_vv_w)
3308 GEN_VEXT_VV_ENV(vfnmacc_vv_d)
3309 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3310 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3311 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3312 GEN_VEXT_VF(vfnmacc_vf_h)
3313 GEN_VEXT_VF(vfnmacc_vf_w)
3314 GEN_VEXT_VF(vfnmacc_vf_d)
3316 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3318 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3321 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3323 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3326 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3328 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3331 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3332 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3333 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3334 GEN_VEXT_VV_ENV(vfmsac_vv_h)
3335 GEN_VEXT_VV_ENV(vfmsac_vv_w)
3336 GEN_VEXT_VV_ENV(vfmsac_vv_d)
3337 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3338 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3339 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3340 GEN_VEXT_VF(vfmsac_vf_h)
3341 GEN_VEXT_VF(vfmsac_vf_w)
3342 GEN_VEXT_VF(vfmsac_vf_d)
3344 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3346 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3349 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3351 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3354 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3356 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3359 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3360 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3361 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3362 GEN_VEXT_VV_ENV(vfnmsac_vv_h)
3363 GEN_VEXT_VV_ENV(vfnmsac_vv_w)
3364 GEN_VEXT_VV_ENV(vfnmsac_vv_d)
3365 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3366 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3367 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3368 GEN_VEXT_VF(vfnmsac_vf_h)
3369 GEN_VEXT_VF(vfnmsac_vf_w)
3370 GEN_VEXT_VF(vfnmsac_vf_d)
3372 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3374 return float16_muladd(d, b, a, 0, s);
3377 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3379 return float32_muladd(d, b, a, 0, s);
3382 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3384 return float64_muladd(d, b, a, 0, s);
3387 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3388 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3389 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3390 GEN_VEXT_VV_ENV(vfmadd_vv_h)
3391 GEN_VEXT_VV_ENV(vfmadd_vv_w)
3392 GEN_VEXT_VV_ENV(vfmadd_vv_d)
3393 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3394 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3395 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3396 GEN_VEXT_VF(vfmadd_vf_h)
3397 GEN_VEXT_VF(vfmadd_vf_w)
3398 GEN_VEXT_VF(vfmadd_vf_d)
3400 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3402 return float16_muladd(d, b, a,
3403 float_muladd_negate_c | float_muladd_negate_product, s);
3406 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3408 return float32_muladd(d, b, a,
3409 float_muladd_negate_c | float_muladd_negate_product, s);
3412 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3414 return float64_muladd(d, b, a,
3415 float_muladd_negate_c | float_muladd_negate_product, s);
3418 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3419 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3420 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3421 GEN_VEXT_VV_ENV(vfnmadd_vv_h)
3422 GEN_VEXT_VV_ENV(vfnmadd_vv_w)
3423 GEN_VEXT_VV_ENV(vfnmadd_vv_d)
3424 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3425 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3426 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3427 GEN_VEXT_VF(vfnmadd_vf_h)
3428 GEN_VEXT_VF(vfnmadd_vf_w)
3429 GEN_VEXT_VF(vfnmadd_vf_d)
3431 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3433 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3436 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3438 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3441 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3443 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3446 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3447 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3448 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3449 GEN_VEXT_VV_ENV(vfmsub_vv_h)
3450 GEN_VEXT_VV_ENV(vfmsub_vv_w)
3451 GEN_VEXT_VV_ENV(vfmsub_vv_d)
3452 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3453 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3454 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3455 GEN_VEXT_VF(vfmsub_vf_h)
3456 GEN_VEXT_VF(vfmsub_vf_w)
3457 GEN_VEXT_VF(vfmsub_vf_d)
3459 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3461 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3464 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3466 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3469 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3471 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3474 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3475 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3476 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3477 GEN_VEXT_VV_ENV(vfnmsub_vv_h)
3478 GEN_VEXT_VV_ENV(vfnmsub_vv_w)
3479 GEN_VEXT_VV_ENV(vfnmsub_vv_d)
3480 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3481 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3482 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3483 GEN_VEXT_VF(vfnmsub_vf_h)
3484 GEN_VEXT_VF(vfnmsub_vf_w)
3485 GEN_VEXT_VF(vfnmsub_vf_d)
3487 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3488 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3490 return float32_muladd(float16_to_float32(a, true, s),
3491 float16_to_float32(b, true, s), d, 0, s);
3494 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3496 return float64_muladd(float32_to_float64(a, s),
3497 float32_to_float64(b, s), d, 0, s);
3500 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3501 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3502 GEN_VEXT_VV_ENV(vfwmacc_vv_h)
3503 GEN_VEXT_VV_ENV(vfwmacc_vv_w)
3504 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3505 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3506 GEN_VEXT_VF(vfwmacc_vf_h)
3507 GEN_VEXT_VF(vfwmacc_vf_w)
3509 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3511 return float32_muladd(float16_to_float32(a, true, s),
3512 float16_to_float32(b, true, s), d,
3513 float_muladd_negate_c | float_muladd_negate_product, s);
3516 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3518 return float64_muladd(float32_to_float64(a, s),
3519 float32_to_float64(b, s), d,
3520 float_muladd_negate_c | float_muladd_negate_product, s);
3523 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3524 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3525 GEN_VEXT_VV_ENV(vfwnmacc_vv_h)
3526 GEN_VEXT_VV_ENV(vfwnmacc_vv_w)
3527 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3528 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3529 GEN_VEXT_VF(vfwnmacc_vf_h)
3530 GEN_VEXT_VF(vfwnmacc_vf_w)
3532 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3534 return float32_muladd(float16_to_float32(a, true, s),
3535 float16_to_float32(b, true, s), d,
3536 float_muladd_negate_c, s);
3539 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3541 return float64_muladd(float32_to_float64(a, s),
3542 float32_to_float64(b, s), d,
3543 float_muladd_negate_c, s);
3546 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3547 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3548 GEN_VEXT_VV_ENV(vfwmsac_vv_h)
3549 GEN_VEXT_VV_ENV(vfwmsac_vv_w)
3550 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3551 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3552 GEN_VEXT_VF(vfwmsac_vf_h)
3553 GEN_VEXT_VF(vfwmsac_vf_w)
3555 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3557 return float32_muladd(float16_to_float32(a, true, s),
3558 float16_to_float32(b, true, s), d,
3559 float_muladd_negate_product, s);
3562 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3564 return float64_muladd(float32_to_float64(a, s),
3565 float32_to_float64(b, s), d,
3566 float_muladd_negate_product, s);
3569 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3570 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3571 GEN_VEXT_VV_ENV(vfwnmsac_vv_h)
3572 GEN_VEXT_VV_ENV(vfwnmsac_vv_w)
3573 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3574 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3575 GEN_VEXT_VF(vfwnmsac_vf_h)
3576 GEN_VEXT_VF(vfwnmsac_vf_w)
3578 /* Vector Floating-Point Square-Root Instruction */
3579 /* (TD, T2, TX2) */
3580 #define OP_UU_H uint16_t, uint16_t, uint16_t
3581 #define OP_UU_W uint32_t, uint32_t, uint32_t
3582 #define OP_UU_D uint64_t, uint64_t, uint64_t
3584 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3585 static void do_##NAME(void *vd, void *vs2, int i, \
3586 CPURISCVState *env) \
3588 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3589 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3592 #define GEN_VEXT_V_ENV(NAME) \
3593 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3594 CPURISCVState *env, uint32_t desc) \
3596 uint32_t vm = vext_vm(desc); \
3597 uint32_t vl = env->vl; \
3598 uint32_t i; \
3600 if (vl == 0) { \
3601 return; \
3603 for (i = env->vstart; i < vl; i++) { \
3604 if (!vm && !vext_elem_mask(v0, i)) { \
3605 continue; \
3607 do_##NAME(vd, vs2, i, env); \
3609 env->vstart = 0; \
3612 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3613 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3614 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3615 GEN_VEXT_V_ENV(vfsqrt_v_h)
3616 GEN_VEXT_V_ENV(vfsqrt_v_w)
3617 GEN_VEXT_V_ENV(vfsqrt_v_d)
3620 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3622 * Adapted from riscv-v-spec recip.c:
3623 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3625 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3627 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3628 uint64_t exp = extract64(f, frac_size, exp_size);
3629 uint64_t frac = extract64(f, 0, frac_size);
3631 const uint8_t lookup_table[] = {
3632 52, 51, 50, 48, 47, 46, 44, 43,
3633 42, 41, 40, 39, 38, 36, 35, 34,
3634 33, 32, 31, 30, 30, 29, 28, 27,
3635 26, 25, 24, 23, 23, 22, 21, 20,
3636 19, 19, 18, 17, 16, 16, 15, 14,
3637 14, 13, 12, 12, 11, 10, 10, 9,
3638 9, 8, 7, 7, 6, 6, 5, 4,
3639 4, 3, 3, 2, 2, 1, 1, 0,
3640 127, 125, 123, 121, 119, 118, 116, 114,
3641 113, 111, 109, 108, 106, 105, 103, 102,
3642 100, 99, 97, 96, 95, 93, 92, 91,
3643 90, 88, 87, 86, 85, 84, 83, 82,
3644 80, 79, 78, 77, 76, 75, 74, 73,
3645 72, 71, 70, 70, 69, 68, 67, 66,
3646 65, 64, 63, 63, 62, 61, 60, 59,
3647 59, 58, 57, 56, 56, 55, 54, 53
3649 const int precision = 7;
3651 if (exp == 0 && frac != 0) { /* subnormal */
3652 /* Normalize the subnormal. */
3653 while (extract64(frac, frac_size - 1, 1) == 0) {
3654 exp--;
3655 frac <<= 1;
3658 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3661 int idx = ((exp & 1) << (precision - 1)) |
3662 (frac >> (frac_size - precision + 1));
3663 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3664 (frac_size - precision);
3665 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3667 uint64_t val = 0;
3668 val = deposit64(val, 0, frac_size, out_frac);
3669 val = deposit64(val, frac_size, exp_size, out_exp);
3670 val = deposit64(val, frac_size + exp_size, 1, sign);
3671 return val;
3674 static float16 frsqrt7_h(float16 f, float_status *s)
3676 int exp_size = 5, frac_size = 10;
3677 bool sign = float16_is_neg(f);
3680 * frsqrt7(sNaN) = canonical NaN
3681 * frsqrt7(-inf) = canonical NaN
3682 * frsqrt7(-normal) = canonical NaN
3683 * frsqrt7(-subnormal) = canonical NaN
3685 if (float16_is_signaling_nan(f, s) ||
3686 (float16_is_infinity(f) && sign) ||
3687 (float16_is_normal(f) && sign) ||
3688 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3689 s->float_exception_flags |= float_flag_invalid;
3690 return float16_default_nan(s);
3693 /* frsqrt7(qNaN) = canonical NaN */
3694 if (float16_is_quiet_nan(f, s)) {
3695 return float16_default_nan(s);
3698 /* frsqrt7(+-0) = +-inf */
3699 if (float16_is_zero(f)) {
3700 s->float_exception_flags |= float_flag_divbyzero;
3701 return float16_set_sign(float16_infinity, sign);
3704 /* frsqrt7(+inf) = +0 */
3705 if (float16_is_infinity(f) && !sign) {
3706 return float16_set_sign(float16_zero, sign);
3709 /* +normal, +subnormal */
3710 uint64_t val = frsqrt7(f, exp_size, frac_size);
3711 return make_float16(val);
3714 static float32 frsqrt7_s(float32 f, float_status *s)
3716 int exp_size = 8, frac_size = 23;
3717 bool sign = float32_is_neg(f);
3720 * frsqrt7(sNaN) = canonical NaN
3721 * frsqrt7(-inf) = canonical NaN
3722 * frsqrt7(-normal) = canonical NaN
3723 * frsqrt7(-subnormal) = canonical NaN
3725 if (float32_is_signaling_nan(f, s) ||
3726 (float32_is_infinity(f) && sign) ||
3727 (float32_is_normal(f) && sign) ||
3728 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3729 s->float_exception_flags |= float_flag_invalid;
3730 return float32_default_nan(s);
3733 /* frsqrt7(qNaN) = canonical NaN */
3734 if (float32_is_quiet_nan(f, s)) {
3735 return float32_default_nan(s);
3738 /* frsqrt7(+-0) = +-inf */
3739 if (float32_is_zero(f)) {
3740 s->float_exception_flags |= float_flag_divbyzero;
3741 return float32_set_sign(float32_infinity, sign);
3744 /* frsqrt7(+inf) = +0 */
3745 if (float32_is_infinity(f) && !sign) {
3746 return float32_set_sign(float32_zero, sign);
3749 /* +normal, +subnormal */
3750 uint64_t val = frsqrt7(f, exp_size, frac_size);
3751 return make_float32(val);
3754 static float64 frsqrt7_d(float64 f, float_status *s)
3756 int exp_size = 11, frac_size = 52;
3757 bool sign = float64_is_neg(f);
3760 * frsqrt7(sNaN) = canonical NaN
3761 * frsqrt7(-inf) = canonical NaN
3762 * frsqrt7(-normal) = canonical NaN
3763 * frsqrt7(-subnormal) = canonical NaN
3765 if (float64_is_signaling_nan(f, s) ||
3766 (float64_is_infinity(f) && sign) ||
3767 (float64_is_normal(f) && sign) ||
3768 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3769 s->float_exception_flags |= float_flag_invalid;
3770 return float64_default_nan(s);
3773 /* frsqrt7(qNaN) = canonical NaN */
3774 if (float64_is_quiet_nan(f, s)) {
3775 return float64_default_nan(s);
3778 /* frsqrt7(+-0) = +-inf */
3779 if (float64_is_zero(f)) {
3780 s->float_exception_flags |= float_flag_divbyzero;
3781 return float64_set_sign(float64_infinity, sign);
3784 /* frsqrt7(+inf) = +0 */
3785 if (float64_is_infinity(f) && !sign) {
3786 return float64_set_sign(float64_zero, sign);
3789 /* +normal, +subnormal */
3790 uint64_t val = frsqrt7(f, exp_size, frac_size);
3791 return make_float64(val);
3794 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3795 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3796 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3797 GEN_VEXT_V_ENV(vfrsqrt7_v_h)
3798 GEN_VEXT_V_ENV(vfrsqrt7_v_w)
3799 GEN_VEXT_V_ENV(vfrsqrt7_v_d)
3802 * Vector Floating-Point Reciprocal Estimate Instruction
3804 * Adapted from riscv-v-spec recip.c:
3805 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3807 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3808 float_status *s)
3810 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3811 uint64_t exp = extract64(f, frac_size, exp_size);
3812 uint64_t frac = extract64(f, 0, frac_size);
3814 const uint8_t lookup_table[] = {
3815 127, 125, 123, 121, 119, 117, 116, 114,
3816 112, 110, 109, 107, 105, 104, 102, 100,
3817 99, 97, 96, 94, 93, 91, 90, 88,
3818 87, 85, 84, 83, 81, 80, 79, 77,
3819 76, 75, 74, 72, 71, 70, 69, 68,
3820 66, 65, 64, 63, 62, 61, 60, 59,
3821 58, 57, 56, 55, 54, 53, 52, 51,
3822 50, 49, 48, 47, 46, 45, 44, 43,
3823 42, 41, 40, 40, 39, 38, 37, 36,
3824 35, 35, 34, 33, 32, 31, 31, 30,
3825 29, 28, 28, 27, 26, 25, 25, 24,
3826 23, 23, 22, 21, 21, 20, 19, 19,
3827 18, 17, 17, 16, 15, 15, 14, 14,
3828 13, 12, 12, 11, 11, 10, 9, 9,
3829 8, 8, 7, 7, 6, 5, 5, 4,
3830 4, 3, 3, 2, 2, 1, 1, 0
3832 const int precision = 7;
3834 if (exp == 0 && frac != 0) { /* subnormal */
3835 /* Normalize the subnormal. */
3836 while (extract64(frac, frac_size - 1, 1) == 0) {
3837 exp--;
3838 frac <<= 1;
3841 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3843 if (exp != 0 && exp != UINT64_MAX) {
3845 * Overflow to inf or max value of same sign,
3846 * depending on sign and rounding mode.
3848 s->float_exception_flags |= (float_flag_inexact |
3849 float_flag_overflow);
3851 if ((s->float_rounding_mode == float_round_to_zero) ||
3852 ((s->float_rounding_mode == float_round_down) && !sign) ||
3853 ((s->float_rounding_mode == float_round_up) && sign)) {
3854 /* Return greatest/negative finite value. */
3855 return (sign << (exp_size + frac_size)) |
3856 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3857 } else {
3858 /* Return +-inf. */
3859 return (sign << (exp_size + frac_size)) |
3860 MAKE_64BIT_MASK(frac_size, exp_size);
3865 int idx = frac >> (frac_size - precision);
3866 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3867 (frac_size - precision);
3868 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3870 if (out_exp == 0 || out_exp == UINT64_MAX) {
3872 * The result is subnormal, but don't raise the underflow exception,
3873 * because there's no additional loss of precision.
3875 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3876 if (out_exp == UINT64_MAX) {
3877 out_frac >>= 1;
3878 out_exp = 0;
3882 uint64_t val = 0;
3883 val = deposit64(val, 0, frac_size, out_frac);
3884 val = deposit64(val, frac_size, exp_size, out_exp);
3885 val = deposit64(val, frac_size + exp_size, 1, sign);
3886 return val;
3889 static float16 frec7_h(float16 f, float_status *s)
3891 int exp_size = 5, frac_size = 10;
3892 bool sign = float16_is_neg(f);
3894 /* frec7(+-inf) = +-0 */
3895 if (float16_is_infinity(f)) {
3896 return float16_set_sign(float16_zero, sign);
3899 /* frec7(+-0) = +-inf */
3900 if (float16_is_zero(f)) {
3901 s->float_exception_flags |= float_flag_divbyzero;
3902 return float16_set_sign(float16_infinity, sign);
3905 /* frec7(sNaN) = canonical NaN */
3906 if (float16_is_signaling_nan(f, s)) {
3907 s->float_exception_flags |= float_flag_invalid;
3908 return float16_default_nan(s);
3911 /* frec7(qNaN) = canonical NaN */
3912 if (float16_is_quiet_nan(f, s)) {
3913 return float16_default_nan(s);
3916 /* +-normal, +-subnormal */
3917 uint64_t val = frec7(f, exp_size, frac_size, s);
3918 return make_float16(val);
3921 static float32 frec7_s(float32 f, float_status *s)
3923 int exp_size = 8, frac_size = 23;
3924 bool sign = float32_is_neg(f);
3926 /* frec7(+-inf) = +-0 */
3927 if (float32_is_infinity(f)) {
3928 return float32_set_sign(float32_zero, sign);
3931 /* frec7(+-0) = +-inf */
3932 if (float32_is_zero(f)) {
3933 s->float_exception_flags |= float_flag_divbyzero;
3934 return float32_set_sign(float32_infinity, sign);
3937 /* frec7(sNaN) = canonical NaN */
3938 if (float32_is_signaling_nan(f, s)) {
3939 s->float_exception_flags |= float_flag_invalid;
3940 return float32_default_nan(s);
3943 /* frec7(qNaN) = canonical NaN */
3944 if (float32_is_quiet_nan(f, s)) {
3945 return float32_default_nan(s);
3948 /* +-normal, +-subnormal */
3949 uint64_t val = frec7(f, exp_size, frac_size, s);
3950 return make_float32(val);
3953 static float64 frec7_d(float64 f, float_status *s)
3955 int exp_size = 11, frac_size = 52;
3956 bool sign = float64_is_neg(f);
3958 /* frec7(+-inf) = +-0 */
3959 if (float64_is_infinity(f)) {
3960 return float64_set_sign(float64_zero, sign);
3963 /* frec7(+-0) = +-inf */
3964 if (float64_is_zero(f)) {
3965 s->float_exception_flags |= float_flag_divbyzero;
3966 return float64_set_sign(float64_infinity, sign);
3969 /* frec7(sNaN) = canonical NaN */
3970 if (float64_is_signaling_nan(f, s)) {
3971 s->float_exception_flags |= float_flag_invalid;
3972 return float64_default_nan(s);
3975 /* frec7(qNaN) = canonical NaN */
3976 if (float64_is_quiet_nan(f, s)) {
3977 return float64_default_nan(s);
3980 /* +-normal, +-subnormal */
3981 uint64_t val = frec7(f, exp_size, frac_size, s);
3982 return make_float64(val);
3985 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3986 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3987 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3988 GEN_VEXT_V_ENV(vfrec7_v_h)
3989 GEN_VEXT_V_ENV(vfrec7_v_w)
3990 GEN_VEXT_V_ENV(vfrec7_v_d)
3992 /* Vector Floating-Point MIN/MAX Instructions */
3993 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3994 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3995 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3996 GEN_VEXT_VV_ENV(vfmin_vv_h)
3997 GEN_VEXT_VV_ENV(vfmin_vv_w)
3998 GEN_VEXT_VV_ENV(vfmin_vv_d)
3999 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
4000 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
4001 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
4002 GEN_VEXT_VF(vfmin_vf_h)
4003 GEN_VEXT_VF(vfmin_vf_w)
4004 GEN_VEXT_VF(vfmin_vf_d)
4006 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
4007 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
4008 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
4009 GEN_VEXT_VV_ENV(vfmax_vv_h)
4010 GEN_VEXT_VV_ENV(vfmax_vv_w)
4011 GEN_VEXT_VV_ENV(vfmax_vv_d)
4012 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
4013 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
4014 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
4015 GEN_VEXT_VF(vfmax_vf_h)
4016 GEN_VEXT_VF(vfmax_vf_w)
4017 GEN_VEXT_VF(vfmax_vf_d)
4019 /* Vector Floating-Point Sign-Injection Instructions */
4020 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
4022 return deposit64(b, 0, 15, a);
4025 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
4027 return deposit64(b, 0, 31, a);
4030 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
4032 return deposit64(b, 0, 63, a);
4035 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
4036 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
4037 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
4038 GEN_VEXT_VV_ENV(vfsgnj_vv_h)
4039 GEN_VEXT_VV_ENV(vfsgnj_vv_w)
4040 GEN_VEXT_VV_ENV(vfsgnj_vv_d)
4041 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
4042 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
4043 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
4044 GEN_VEXT_VF(vfsgnj_vf_h)
4045 GEN_VEXT_VF(vfsgnj_vf_w)
4046 GEN_VEXT_VF(vfsgnj_vf_d)
4048 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
4050 return deposit64(~b, 0, 15, a);
4053 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
4055 return deposit64(~b, 0, 31, a);
4058 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
4060 return deposit64(~b, 0, 63, a);
4063 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
4064 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
4065 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
4066 GEN_VEXT_VV_ENV(vfsgnjn_vv_h)
4067 GEN_VEXT_VV_ENV(vfsgnjn_vv_w)
4068 GEN_VEXT_VV_ENV(vfsgnjn_vv_d)
4069 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
4070 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
4071 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
4072 GEN_VEXT_VF(vfsgnjn_vf_h)
4073 GEN_VEXT_VF(vfsgnjn_vf_w)
4074 GEN_VEXT_VF(vfsgnjn_vf_d)
4076 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4078 return deposit64(b ^ a, 0, 15, a);
4081 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4083 return deposit64(b ^ a, 0, 31, a);
4086 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4088 return deposit64(b ^ a, 0, 63, a);
4091 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4092 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4093 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4094 GEN_VEXT_VV_ENV(vfsgnjx_vv_h)
4095 GEN_VEXT_VV_ENV(vfsgnjx_vv_w)
4096 GEN_VEXT_VV_ENV(vfsgnjx_vv_d)
4097 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4098 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4099 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4100 GEN_VEXT_VF(vfsgnjx_vf_h)
4101 GEN_VEXT_VF(vfsgnjx_vf_w)
4102 GEN_VEXT_VF(vfsgnjx_vf_d)
4104 /* Vector Floating-Point Compare Instructions */
4105 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4106 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4107 CPURISCVState *env, uint32_t desc) \
4109 uint32_t vm = vext_vm(desc); \
4110 uint32_t vl = env->vl; \
4111 uint32_t i; \
4113 for (i = env->vstart; i < vl; i++) { \
4114 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4115 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4116 if (!vm && !vext_elem_mask(v0, i)) { \
4117 continue; \
4119 vext_set_elem_mask(vd, i, \
4120 DO_OP(s2, s1, &env->fp_status)); \
4122 env->vstart = 0; \
4125 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4126 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4127 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4129 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4130 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4131 CPURISCVState *env, uint32_t desc) \
4133 uint32_t vm = vext_vm(desc); \
4134 uint32_t vl = env->vl; \
4135 uint32_t i; \
4137 for (i = env->vstart; i < vl; i++) { \
4138 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4139 if (!vm && !vext_elem_mask(v0, i)) { \
4140 continue; \
4142 vext_set_elem_mask(vd, i, \
4143 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4145 env->vstart = 0; \
4148 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4149 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4150 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4152 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4154 FloatRelation compare = float16_compare_quiet(a, b, s);
4155 return compare != float_relation_equal;
4158 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4160 FloatRelation compare = float32_compare_quiet(a, b, s);
4161 return compare != float_relation_equal;
4164 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4166 FloatRelation compare = float64_compare_quiet(a, b, s);
4167 return compare != float_relation_equal;
4170 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4171 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4172 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4173 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4174 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4175 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4177 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4178 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4179 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4180 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4181 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4182 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4184 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4185 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4186 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4187 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4188 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4189 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4191 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4193 FloatRelation compare = float16_compare(a, b, s);
4194 return compare == float_relation_greater;
4197 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4199 FloatRelation compare = float32_compare(a, b, s);
4200 return compare == float_relation_greater;
4203 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4205 FloatRelation compare = float64_compare(a, b, s);
4206 return compare == float_relation_greater;
4209 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4210 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4211 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4213 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4215 FloatRelation compare = float16_compare(a, b, s);
4216 return compare == float_relation_greater ||
4217 compare == float_relation_equal;
4220 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4222 FloatRelation compare = float32_compare(a, b, s);
4223 return compare == float_relation_greater ||
4224 compare == float_relation_equal;
4227 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4229 FloatRelation compare = float64_compare(a, b, s);
4230 return compare == float_relation_greater ||
4231 compare == float_relation_equal;
4234 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4235 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4236 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4238 /* Vector Floating-Point Classify Instruction */
4239 #define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
4240 static void do_##NAME(void *vd, void *vs2, int i) \
4242 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
4243 *((TD *)vd + HD(i)) = OP(s2); \
4246 #define GEN_VEXT_V(NAME) \
4247 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
4248 CPURISCVState *env, uint32_t desc) \
4250 uint32_t vm = vext_vm(desc); \
4251 uint32_t vl = env->vl; \
4252 uint32_t i; \
4254 for (i = env->vstart; i < vl; i++) { \
4255 if (!vm && !vext_elem_mask(v0, i)) { \
4256 continue; \
4258 do_##NAME(vd, vs2, i); \
4260 env->vstart = 0; \
4263 target_ulong fclass_h(uint64_t frs1)
4265 float16 f = frs1;
4266 bool sign = float16_is_neg(f);
4268 if (float16_is_infinity(f)) {
4269 return sign ? 1 << 0 : 1 << 7;
4270 } else if (float16_is_zero(f)) {
4271 return sign ? 1 << 3 : 1 << 4;
4272 } else if (float16_is_zero_or_denormal(f)) {
4273 return sign ? 1 << 2 : 1 << 5;
4274 } else if (float16_is_any_nan(f)) {
4275 float_status s = { }; /* for snan_bit_is_one */
4276 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4277 } else {
4278 return sign ? 1 << 1 : 1 << 6;
4282 target_ulong fclass_s(uint64_t frs1)
4284 float32 f = frs1;
4285 bool sign = float32_is_neg(f);
4287 if (float32_is_infinity(f)) {
4288 return sign ? 1 << 0 : 1 << 7;
4289 } else if (float32_is_zero(f)) {
4290 return sign ? 1 << 3 : 1 << 4;
4291 } else if (float32_is_zero_or_denormal(f)) {
4292 return sign ? 1 << 2 : 1 << 5;
4293 } else if (float32_is_any_nan(f)) {
4294 float_status s = { }; /* for snan_bit_is_one */
4295 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4296 } else {
4297 return sign ? 1 << 1 : 1 << 6;
4301 target_ulong fclass_d(uint64_t frs1)
4303 float64 f = frs1;
4304 bool sign = float64_is_neg(f);
4306 if (float64_is_infinity(f)) {
4307 return sign ? 1 << 0 : 1 << 7;
4308 } else if (float64_is_zero(f)) {
4309 return sign ? 1 << 3 : 1 << 4;
4310 } else if (float64_is_zero_or_denormal(f)) {
4311 return sign ? 1 << 2 : 1 << 5;
4312 } else if (float64_is_any_nan(f)) {
4313 float_status s = { }; /* for snan_bit_is_one */
4314 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4315 } else {
4316 return sign ? 1 << 1 : 1 << 6;
4320 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4321 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4322 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4323 GEN_VEXT_V(vfclass_v_h)
4324 GEN_VEXT_V(vfclass_v_w)
4325 GEN_VEXT_V(vfclass_v_d)
4327 /* Vector Floating-Point Merge Instruction */
4328 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4329 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4330 CPURISCVState *env, uint32_t desc) \
4332 uint32_t vm = vext_vm(desc); \
4333 uint32_t vl = env->vl; \
4334 uint32_t i; \
4336 for (i = env->vstart; i < vl; i++) { \
4337 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4338 *((ETYPE *)vd + H(i)) \
4339 = (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4341 env->vstart = 0; \
4344 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4345 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4346 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4348 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4349 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4350 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4351 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4352 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4353 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h)
4354 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w)
4355 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d)
4357 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4358 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4359 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4360 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4361 GEN_VEXT_V_ENV(vfcvt_x_f_v_h)
4362 GEN_VEXT_V_ENV(vfcvt_x_f_v_w)
4363 GEN_VEXT_V_ENV(vfcvt_x_f_v_d)
4365 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4366 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4367 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4368 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4369 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h)
4370 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w)
4371 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d)
4373 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4374 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4375 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4376 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4377 GEN_VEXT_V_ENV(vfcvt_f_x_v_h)
4378 GEN_VEXT_V_ENV(vfcvt_f_x_v_w)
4379 GEN_VEXT_V_ENV(vfcvt_f_x_v_d)
4381 /* Widening Floating-Point/Integer Type-Convert Instructions */
4382 /* (TD, T2, TX2) */
4383 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4384 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4385 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4386 /* vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.*/
4387 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4388 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4389 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h)
4390 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w)
4392 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4393 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4394 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4395 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h)
4396 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w)
4398 /* vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float */
4399 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4400 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4401 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4402 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b)
4403 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h)
4404 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w)
4406 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4407 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4408 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4409 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4410 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b)
4411 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h)
4412 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w)
4415 * vfwcvt.f.f.v vd, vs2, vm
4416 * Convert single-width float to double-width float.
4418 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4420 return float16_to_float32(a, true, s);
4423 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4424 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4425 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h)
4426 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w)
4428 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4429 /* (TD, T2, TX2) */
4430 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4431 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4432 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4433 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4434 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4435 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4436 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4437 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b)
4438 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h)
4439 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w)
4441 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4442 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4443 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4444 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4445 GEN_VEXT_V_ENV(vfncvt_x_f_w_b)
4446 GEN_VEXT_V_ENV(vfncvt_x_f_w_h)
4447 GEN_VEXT_V_ENV(vfncvt_x_f_w_w)
4449 /* vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float */
4450 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4451 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4452 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h)
4453 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w)
4455 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4456 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4457 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4458 GEN_VEXT_V_ENV(vfncvt_f_x_w_h)
4459 GEN_VEXT_V_ENV(vfncvt_f_x_w_w)
4461 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4462 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4464 return float32_to_float16(a, true, s);
4467 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4468 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4469 GEN_VEXT_V_ENV(vfncvt_f_f_w_h)
4470 GEN_VEXT_V_ENV(vfncvt_f_f_w_w)
4473 *** Vector Reduction Operations
4475 /* Vector Single-Width Integer Reduction Instructions */
4476 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4477 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4478 void *vs2, CPURISCVState *env, uint32_t desc) \
4480 uint32_t vm = vext_vm(desc); \
4481 uint32_t vl = env->vl; \
4482 uint32_t i; \
4483 TD s1 = *((TD *)vs1 + HD(0)); \
4485 for (i = env->vstart; i < vl; i++) { \
4486 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4487 if (!vm && !vext_elem_mask(v0, i)) { \
4488 continue; \
4490 s1 = OP(s1, (TD)s2); \
4492 *((TD *)vd + HD(0)) = s1; \
4493 env->vstart = 0; \
4496 /* vd[0] = sum(vs1[0], vs2[*]) */
4497 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4498 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4499 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4500 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4502 /* vd[0] = maxu(vs1[0], vs2[*]) */
4503 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4504 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4505 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4506 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4508 /* vd[0] = max(vs1[0], vs2[*]) */
4509 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4510 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4511 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4512 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4514 /* vd[0] = minu(vs1[0], vs2[*]) */
4515 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4516 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4517 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4518 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4520 /* vd[0] = min(vs1[0], vs2[*]) */
4521 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4522 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4523 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4524 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4526 /* vd[0] = and(vs1[0], vs2[*]) */
4527 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4528 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4529 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4530 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4532 /* vd[0] = or(vs1[0], vs2[*]) */
4533 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4534 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4535 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4536 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4538 /* vd[0] = xor(vs1[0], vs2[*]) */
4539 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4540 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4541 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4542 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4544 /* Vector Widening Integer Reduction Instructions */
4545 /* signed sum reduction into double-width accumulator */
4546 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4547 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4548 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4550 /* Unsigned sum reduction into double-width accumulator */
4551 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4552 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4553 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4555 /* Vector Single-Width Floating-Point Reduction Instructions */
4556 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4557 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4558 void *vs2, CPURISCVState *env, \
4559 uint32_t desc) \
4561 uint32_t vm = vext_vm(desc); \
4562 uint32_t vl = env->vl; \
4563 uint32_t i; \
4564 TD s1 = *((TD *)vs1 + HD(0)); \
4566 for (i = env->vstart; i < vl; i++) { \
4567 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4568 if (!vm && !vext_elem_mask(v0, i)) { \
4569 continue; \
4571 s1 = OP(s1, (TD)s2, &env->fp_status); \
4573 *((TD *)vd + HD(0)) = s1; \
4574 env->vstart = 0; \
4577 /* Unordered sum */
4578 GEN_VEXT_FRED(vfredsum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4579 GEN_VEXT_FRED(vfredsum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4580 GEN_VEXT_FRED(vfredsum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4582 /* Maximum value */
4583 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2, float16_maximum_number)
4584 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4, float32_maximum_number)
4585 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8, float64_maximum_number)
4587 /* Minimum value */
4588 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2, float16_minimum_number)
4589 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4, float32_minimum_number)
4590 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8, float64_minimum_number)
4592 /* Vector Widening Floating-Point Reduction Instructions */
4593 /* Unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4594 void HELPER(vfwredsum_vs_h)(void *vd, void *v0, void *vs1,
4595 void *vs2, CPURISCVState *env, uint32_t desc)
4597 uint32_t vm = vext_vm(desc);
4598 uint32_t vl = env->vl;
4599 uint32_t i;
4600 uint32_t s1 = *((uint32_t *)vs1 + H4(0));
4602 for (i = env->vstart; i < vl; i++) {
4603 uint16_t s2 = *((uint16_t *)vs2 + H2(i));
4604 if (!vm && !vext_elem_mask(v0, i)) {
4605 continue;
4607 s1 = float32_add(s1, float16_to_float32(s2, true, &env->fp_status),
4608 &env->fp_status);
4610 *((uint32_t *)vd + H4(0)) = s1;
4611 env->vstart = 0;
4614 void HELPER(vfwredsum_vs_w)(void *vd, void *v0, void *vs1,
4615 void *vs2, CPURISCVState *env, uint32_t desc)
4617 uint32_t vm = vext_vm(desc);
4618 uint32_t vl = env->vl;
4619 uint32_t i;
4620 uint64_t s1 = *((uint64_t *)vs1);
4622 for (i = env->vstart; i < vl; i++) {
4623 uint32_t s2 = *((uint32_t *)vs2 + H4(i));
4624 if (!vm && !vext_elem_mask(v0, i)) {
4625 continue;
4627 s1 = float64_add(s1, float32_to_float64(s2, &env->fp_status),
4628 &env->fp_status);
4630 *((uint64_t *)vd) = s1;
4631 env->vstart = 0;
4635 *** Vector Mask Operations
4637 /* Vector Mask-Register Logical Instructions */
4638 #define GEN_VEXT_MASK_VV(NAME, OP) \
4639 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4640 void *vs2, CPURISCVState *env, \
4641 uint32_t desc) \
4643 uint32_t vl = env->vl; \
4644 uint32_t i; \
4645 int a, b; \
4647 for (i = env->vstart; i < vl; i++) { \
4648 a = vext_elem_mask(vs1, i); \
4649 b = vext_elem_mask(vs2, i); \
4650 vext_set_elem_mask(vd, i, OP(b, a)); \
4652 env->vstart = 0; \
4655 #define DO_NAND(N, M) (!(N & M))
4656 #define DO_ANDNOT(N, M) (N & !M)
4657 #define DO_NOR(N, M) (!(N | M))
4658 #define DO_ORNOT(N, M) (N | !M)
4659 #define DO_XNOR(N, M) (!(N ^ M))
4661 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4662 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4663 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4664 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4665 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4666 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4667 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4668 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4670 /* Vector count population in mask vcpop */
4671 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4672 uint32_t desc)
4674 target_ulong cnt = 0;
4675 uint32_t vm = vext_vm(desc);
4676 uint32_t vl = env->vl;
4677 int i;
4679 for (i = env->vstart; i < vl; i++) {
4680 if (vm || vext_elem_mask(v0, i)) {
4681 if (vext_elem_mask(vs2, i)) {
4682 cnt++;
4686 env->vstart = 0;
4687 return cnt;
4690 /* vfirst find-first-set mask bit*/
4691 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4692 uint32_t desc)
4694 uint32_t vm = vext_vm(desc);
4695 uint32_t vl = env->vl;
4696 int i;
4698 for (i = env->vstart; i < vl; i++) {
4699 if (vm || vext_elem_mask(v0, i)) {
4700 if (vext_elem_mask(vs2, i)) {
4701 return i;
4705 env->vstart = 0;
4706 return -1LL;
4709 enum set_mask_type {
4710 ONLY_FIRST = 1,
4711 INCLUDE_FIRST,
4712 BEFORE_FIRST,
4715 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4716 uint32_t desc, enum set_mask_type type)
4718 uint32_t vm = vext_vm(desc);
4719 uint32_t vl = env->vl;
4720 int i;
4721 bool first_mask_bit = false;
4723 for (i = env->vstart; i < vl; i++) {
4724 if (!vm && !vext_elem_mask(v0, i)) {
4725 continue;
4727 /* write a zero to all following active elements */
4728 if (first_mask_bit) {
4729 vext_set_elem_mask(vd, i, 0);
4730 continue;
4732 if (vext_elem_mask(vs2, i)) {
4733 first_mask_bit = true;
4734 if (type == BEFORE_FIRST) {
4735 vext_set_elem_mask(vd, i, 0);
4736 } else {
4737 vext_set_elem_mask(vd, i, 1);
4739 } else {
4740 if (type == ONLY_FIRST) {
4741 vext_set_elem_mask(vd, i, 0);
4742 } else {
4743 vext_set_elem_mask(vd, i, 1);
4747 env->vstart = 0;
4750 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4751 uint32_t desc)
4753 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4756 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4757 uint32_t desc)
4759 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4762 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4763 uint32_t desc)
4765 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4768 /* Vector Iota Instruction */
4769 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
4770 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4771 uint32_t desc) \
4773 uint32_t vm = vext_vm(desc); \
4774 uint32_t vl = env->vl; \
4775 uint32_t sum = 0; \
4776 int i; \
4778 for (i = env->vstart; i < vl; i++) { \
4779 if (!vm && !vext_elem_mask(v0, i)) { \
4780 continue; \
4782 *((ETYPE *)vd + H(i)) = sum; \
4783 if (vext_elem_mask(vs2, i)) { \
4784 sum++; \
4787 env->vstart = 0; \
4790 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4791 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4792 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4793 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4795 /* Vector Element Index Instruction */
4796 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
4797 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4799 uint32_t vm = vext_vm(desc); \
4800 uint32_t vl = env->vl; \
4801 int i; \
4803 for (i = env->vstart; i < vl; i++) { \
4804 if (!vm && !vext_elem_mask(v0, i)) { \
4805 continue; \
4807 *((ETYPE *)vd + H(i)) = i; \
4809 env->vstart = 0; \
4812 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4813 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4814 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4815 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4818 *** Vector Permutation Instructions
4821 /* Vector Slide Instructions */
4822 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
4823 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4824 CPURISCVState *env, uint32_t desc) \
4826 uint32_t vm = vext_vm(desc); \
4827 uint32_t vl = env->vl; \
4828 target_ulong offset = s1, i_min, i; \
4830 i_min = MAX(env->vstart, offset); \
4831 for (i = i_min; i < vl; i++) { \
4832 if (!vm && !vext_elem_mask(v0, i)) { \
4833 continue; \
4835 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4839 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4840 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4841 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4842 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4843 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4845 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
4846 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4847 CPURISCVState *env, uint32_t desc) \
4849 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4850 uint32_t vm = vext_vm(desc); \
4851 uint32_t vl = env->vl; \
4852 target_ulong i_max, i; \
4854 i_max = MAX(MIN(s1 < vlmax ? vlmax - s1 : 0, vl), env->vstart); \
4855 for (i = env->vstart; i < i_max; ++i) { \
4856 if (vm || vext_elem_mask(v0, i)) { \
4857 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4861 for (i = i_max; i < vl; ++i) { \
4862 if (vm || vext_elem_mask(v0, i)) { \
4863 *((ETYPE *)vd + H(i)) = 0; \
4867 env->vstart = 0; \
4870 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4871 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4872 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4873 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4874 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4876 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
4877 static void vslide1up_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
4878 void *vs2, CPURISCVState *env, uint32_t desc) \
4880 typedef uint##BITWIDTH##_t ETYPE; \
4881 uint32_t vm = vext_vm(desc); \
4882 uint32_t vl = env->vl; \
4883 uint32_t i; \
4885 for (i = env->vstart; i < vl; i++) { \
4886 if (!vm && !vext_elem_mask(v0, i)) { \
4887 continue; \
4889 if (i == 0) { \
4890 *((ETYPE *)vd + H(i)) = s1; \
4891 } else { \
4892 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4895 env->vstart = 0; \
4898 GEN_VEXT_VSLIE1UP(8, H1)
4899 GEN_VEXT_VSLIE1UP(16, H2)
4900 GEN_VEXT_VSLIE1UP(32, H4)
4901 GEN_VEXT_VSLIE1UP(64, H8)
4903 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
4904 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4905 CPURISCVState *env, uint32_t desc) \
4907 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4910 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4911 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4912 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4913 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4914 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4916 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
4917 static void vslide1down_##BITWIDTH(void *vd, void *v0, target_ulong s1, \
4918 void *vs2, CPURISCVState *env, uint32_t desc) \
4920 typedef uint##BITWIDTH##_t ETYPE; \
4921 uint32_t vm = vext_vm(desc); \
4922 uint32_t vl = env->vl; \
4923 uint32_t i; \
4925 for (i = env->vstart; i < vl; i++) { \
4926 if (!vm && !vext_elem_mask(v0, i)) { \
4927 continue; \
4929 if (i == vl - 1) { \
4930 *((ETYPE *)vd + H(i)) = s1; \
4931 } else { \
4932 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4935 env->vstart = 0; \
4938 GEN_VEXT_VSLIDE1DOWN(8, H1)
4939 GEN_VEXT_VSLIDE1DOWN(16, H2)
4940 GEN_VEXT_VSLIDE1DOWN(32, H4)
4941 GEN_VEXT_VSLIDE1DOWN(64, H8)
4943 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
4944 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4945 CPURISCVState *env, uint32_t desc) \
4947 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4950 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4951 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4952 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4953 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4954 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
4956 /* Vector Floating-Point Slide Instructions */
4957 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
4958 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4959 CPURISCVState *env, uint32_t desc) \
4961 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4964 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
4965 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
4966 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
4967 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
4969 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
4970 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4971 CPURISCVState *env, uint32_t desc) \
4973 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4976 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
4977 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
4978 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
4979 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
4981 /* Vector Register Gather Instruction */
4982 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
4983 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4984 CPURISCVState *env, uint32_t desc) \
4986 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
4987 uint32_t vm = vext_vm(desc); \
4988 uint32_t vl = env->vl; \
4989 uint64_t index; \
4990 uint32_t i; \
4992 for (i = env->vstart; i < vl; i++) { \
4993 if (!vm && !vext_elem_mask(v0, i)) { \
4994 continue; \
4996 index = *((TS1 *)vs1 + HS1(i)); \
4997 if (index >= vlmax) { \
4998 *((TS2 *)vd + HS2(i)) = 0; \
4999 } else { \
5000 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
5003 env->vstart = 0; \
5006 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
5007 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5008 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5009 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5010 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5012 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5013 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5014 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5015 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5017 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
5018 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5019 CPURISCVState *env, uint32_t desc) \
5021 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5022 uint32_t vm = vext_vm(desc); \
5023 uint32_t vl = env->vl; \
5024 uint64_t index = s1; \
5025 uint32_t i; \
5027 for (i = env->vstart; i < vl; i++) { \
5028 if (!vm && !vext_elem_mask(v0, i)) { \
5029 continue; \
5031 if (index >= vlmax) { \
5032 *((ETYPE *)vd + H(i)) = 0; \
5033 } else { \
5034 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5037 env->vstart = 0; \
5040 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
5041 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5042 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5043 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5044 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
5046 /* Vector Compress Instruction */
5047 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
5048 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5049 CPURISCVState *env, uint32_t desc) \
5051 uint32_t vl = env->vl; \
5052 uint32_t num = 0, i; \
5054 for (i = env->vstart; i < vl; i++) { \
5055 if (!vext_elem_mask(vs1, i)) { \
5056 continue; \
5058 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5059 num++; \
5061 env->vstart = 0; \
5064 /* Compress into vd elements of vs2 where vs1 is enabled */
5065 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5066 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5067 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5068 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5070 /* Vector Whole Register Move */
5071 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5073 /* EEW = SEW */
5074 uint32_t maxsz = simd_maxsz(desc);
5075 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5076 uint32_t startb = env->vstart * sewb;
5077 uint32_t i = startb;
5079 memcpy((uint8_t *)vd + H1(i),
5080 (uint8_t *)vs2 + H1(i),
5081 maxsz - startb);
5083 env->vstart = 0;
5086 /* Vector Integer Extension */
5087 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5088 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5089 CPURISCVState *env, uint32_t desc) \
5091 uint32_t vl = env->vl; \
5092 uint32_t vm = vext_vm(desc); \
5093 uint32_t i; \
5095 for (i = env->vstart; i < vl; i++) { \
5096 if (!vm && !vext_elem_mask(v0, i)) { \
5097 continue; \
5099 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5101 env->vstart = 0; \
5104 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5105 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5106 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5107 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5108 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5109 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5111 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5112 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5113 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5114 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5115 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5116 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)