Merge tag 'pull-trivial-patches' of https://gitlab.com/mjt0k/qemu into staging
[qemu/kevin.git] / target / riscv / vector_helper.c
blob1b4d5a8e378a5d4f5726c941eb9bd6e211bd64e7
1 /*
2 * RISC-V Vector Extension Helpers for QEMU.
4 * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2 or later, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
15 * You should have received a copy of the GNU General Public License along with
16 * this program. If not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qemu/host-utils.h"
21 #include "qemu/bitops.h"
22 #include "cpu.h"
23 #include "exec/memop.h"
24 #include "exec/exec-all.h"
25 #include "exec/cpu_ldst.h"
26 #include "exec/page-protection.h"
27 #include "exec/helper-proto.h"
28 #include "fpu/softfloat.h"
29 #include "tcg/tcg-gvec-desc.h"
30 #include "internals.h"
31 #include "vector_internals.h"
32 #include <math.h>
34 target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1,
35 target_ulong s2)
37 int vlmax, vl;
38 RISCVCPU *cpu = env_archcpu(env);
39 uint64_t vlmul = FIELD_EX64(s2, VTYPE, VLMUL);
40 uint8_t vsew = FIELD_EX64(s2, VTYPE, VSEW);
41 uint16_t sew = 8 << vsew;
42 uint8_t ediv = FIELD_EX64(s2, VTYPE, VEDIV);
43 int xlen = riscv_cpu_xlen(env);
44 bool vill = (s2 >> (xlen - 1)) & 0x1;
45 target_ulong reserved = s2 &
46 MAKE_64BIT_MASK(R_VTYPE_RESERVED_SHIFT,
47 xlen - 1 - R_VTYPE_RESERVED_SHIFT);
48 uint16_t vlen = cpu->cfg.vlenb << 3;
49 int8_t lmul;
51 if (vlmul & 4) {
53 * Fractional LMUL, check:
55 * VLEN * LMUL >= SEW
56 * VLEN >> (8 - lmul) >= sew
57 * (vlenb << 3) >> (8 - lmul) >= sew
59 if (vlmul == 4 || (vlen >> (8 - vlmul)) < sew) {
60 vill = true;
64 if ((sew > cpu->cfg.elen) || vill || (ediv != 0) || (reserved != 0)) {
65 /* only set vill bit. */
66 env->vill = 1;
67 env->vtype = 0;
68 env->vl = 0;
69 env->vstart = 0;
70 return 0;
73 /* lmul encoded as in DisasContext::lmul */
74 lmul = sextract32(FIELD_EX64(s2, VTYPE, VLMUL), 0, 3);
75 vlmax = vext_get_vlmax(cpu->cfg.vlenb, vsew, lmul);
76 if (s1 <= vlmax) {
77 vl = s1;
78 } else {
79 vl = vlmax;
81 env->vl = vl;
82 env->vtype = s2;
83 env->vstart = 0;
84 env->vill = 0;
85 return vl;
89 * Get the maximum number of elements can be operated.
91 * log2_esz: log2 of element size in bytes.
93 static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz)
96 * As simd_desc support at most 2048 bytes, the max vlen is 1024 bits.
97 * so vlen in bytes (vlenb) is encoded as maxsz.
99 uint32_t vlenb = simd_maxsz(desc);
101 /* Return VLMAX */
102 int scale = vext_lmul(desc) - log2_esz;
103 return scale < 0 ? vlenb >> -scale : vlenb << scale;
106 static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr)
108 return (addr & ~env->cur_pmmask) | env->cur_pmbase;
112 * This function checks watchpoint before real load operation.
114 * In system mode, the TLB API probe_access is enough for watchpoint check.
115 * In user mode, there is no watchpoint support now.
117 * It will trigger an exception if there is no mapping in TLB
118 * and page table walk can't fill the TLB entry. Then the guest
119 * software can return here after process the exception or never return.
121 static void probe_pages(CPURISCVState *env, target_ulong addr,
122 target_ulong len, uintptr_t ra,
123 MMUAccessType access_type)
125 target_ulong pagelen = -(addr | TARGET_PAGE_MASK);
126 target_ulong curlen = MIN(pagelen, len);
127 int mmu_index = riscv_env_mmu_index(env, false);
129 probe_access(env, adjust_addr(env, addr), curlen, access_type,
130 mmu_index, ra);
131 if (len > curlen) {
132 addr += curlen;
133 curlen = len - curlen;
134 probe_access(env, adjust_addr(env, addr), curlen, access_type,
135 mmu_index, ra);
139 static inline void vext_set_elem_mask(void *v0, int index,
140 uint8_t value)
142 int idx = index / 64;
143 int pos = index % 64;
144 uint64_t old = ((uint64_t *)v0)[idx];
145 ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value);
148 /* elements operations for load and store */
149 typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr,
150 uint32_t idx, void *vd, uintptr_t retaddr);
152 #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
153 static void NAME(CPURISCVState *env, abi_ptr addr, \
154 uint32_t idx, void *vd, uintptr_t retaddr)\
156 ETYPE *cur = ((ETYPE *)vd + H(idx)); \
157 *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
160 GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
161 GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
162 GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
163 GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
165 #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
166 static void NAME(CPURISCVState *env, abi_ptr addr, \
167 uint32_t idx, void *vd, uintptr_t retaddr)\
169 ETYPE data = *((ETYPE *)vd + H(idx)); \
170 cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
173 GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
174 GEN_VEXT_ST_ELEM(ste_h, int16_t, H2, stw)
175 GEN_VEXT_ST_ELEM(ste_w, int32_t, H4, stl)
176 GEN_VEXT_ST_ELEM(ste_d, int64_t, H8, stq)
178 static void vext_set_tail_elems_1s(target_ulong vl, void *vd,
179 uint32_t desc, uint32_t nf,
180 uint32_t esz, uint32_t max_elems)
182 uint32_t vta = vext_vta(desc);
183 int k;
185 if (vta == 0) {
186 return;
189 for (k = 0; k < nf; ++k) {
190 vext_set_elems_1s(vd, vta, (k * max_elems + vl) * esz,
191 (k * max_elems + max_elems) * esz);
196 * stride: access vector element from strided memory
198 static void
199 vext_ldst_stride(void *vd, void *v0, target_ulong base,
200 target_ulong stride, CPURISCVState *env,
201 uint32_t desc, uint32_t vm,
202 vext_ldst_elem_fn *ldst_elem,
203 uint32_t log2_esz, uintptr_t ra)
205 uint32_t i, k;
206 uint32_t nf = vext_nf(desc);
207 uint32_t max_elems = vext_max_elems(desc, log2_esz);
208 uint32_t esz = 1 << log2_esz;
209 uint32_t vma = vext_vma(desc);
211 VSTART_CHECK_EARLY_EXIT(env);
213 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
214 k = 0;
215 while (k < nf) {
216 if (!vm && !vext_elem_mask(v0, i)) {
217 /* set masked-off elements to 1s */
218 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
219 (i + k * max_elems + 1) * esz);
220 k++;
221 continue;
223 target_ulong addr = base + stride * i + (k << log2_esz);
224 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
225 k++;
228 env->vstart = 0;
230 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
233 #define GEN_VEXT_LD_STRIDE(NAME, ETYPE, LOAD_FN) \
234 void HELPER(NAME)(void *vd, void * v0, target_ulong base, \
235 target_ulong stride, CPURISCVState *env, \
236 uint32_t desc) \
238 uint32_t vm = vext_vm(desc); \
239 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, LOAD_FN, \
240 ctzl(sizeof(ETYPE)), GETPC()); \
243 GEN_VEXT_LD_STRIDE(vlse8_v, int8_t, lde_b)
244 GEN_VEXT_LD_STRIDE(vlse16_v, int16_t, lde_h)
245 GEN_VEXT_LD_STRIDE(vlse32_v, int32_t, lde_w)
246 GEN_VEXT_LD_STRIDE(vlse64_v, int64_t, lde_d)
248 #define GEN_VEXT_ST_STRIDE(NAME, ETYPE, STORE_FN) \
249 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
250 target_ulong stride, CPURISCVState *env, \
251 uint32_t desc) \
253 uint32_t vm = vext_vm(desc); \
254 vext_ldst_stride(vd, v0, base, stride, env, desc, vm, STORE_FN, \
255 ctzl(sizeof(ETYPE)), GETPC()); \
258 GEN_VEXT_ST_STRIDE(vsse8_v, int8_t, ste_b)
259 GEN_VEXT_ST_STRIDE(vsse16_v, int16_t, ste_h)
260 GEN_VEXT_ST_STRIDE(vsse32_v, int32_t, ste_w)
261 GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
264 * unit-stride: access elements stored contiguously in memory
267 /* unmasked unit-stride load and store operation */
268 static void
269 vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
270 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
271 uintptr_t ra)
273 uint32_t i, k;
274 uint32_t nf = vext_nf(desc);
275 uint32_t max_elems = vext_max_elems(desc, log2_esz);
276 uint32_t esz = 1 << log2_esz;
278 VSTART_CHECK_EARLY_EXIT(env);
280 /* load bytes from guest memory */
281 for (i = env->vstart; i < evl; env->vstart = ++i) {
282 k = 0;
283 while (k < nf) {
284 target_ulong addr = base + ((i * nf + k) << log2_esz);
285 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
286 k++;
289 env->vstart = 0;
291 vext_set_tail_elems_1s(evl, vd, desc, nf, esz, max_elems);
295 * masked unit-stride load and store operation will be a special case of
296 * stride, stride = NF * sizeof (ETYPE)
299 #define GEN_VEXT_LD_US(NAME, ETYPE, LOAD_FN) \
300 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
301 CPURISCVState *env, uint32_t desc) \
303 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
304 vext_ldst_stride(vd, v0, base, stride, env, desc, false, LOAD_FN, \
305 ctzl(sizeof(ETYPE)), GETPC()); \
308 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
309 CPURISCVState *env, uint32_t desc) \
311 vext_ldst_us(vd, base, env, desc, LOAD_FN, \
312 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
315 GEN_VEXT_LD_US(vle8_v, int8_t, lde_b)
316 GEN_VEXT_LD_US(vle16_v, int16_t, lde_h)
317 GEN_VEXT_LD_US(vle32_v, int32_t, lde_w)
318 GEN_VEXT_LD_US(vle64_v, int64_t, lde_d)
320 #define GEN_VEXT_ST_US(NAME, ETYPE, STORE_FN) \
321 void HELPER(NAME##_mask)(void *vd, void *v0, target_ulong base, \
322 CPURISCVState *env, uint32_t desc) \
324 uint32_t stride = vext_nf(desc) << ctzl(sizeof(ETYPE)); \
325 vext_ldst_stride(vd, v0, base, stride, env, desc, false, STORE_FN, \
326 ctzl(sizeof(ETYPE)), GETPC()); \
329 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
330 CPURISCVState *env, uint32_t desc) \
332 vext_ldst_us(vd, base, env, desc, STORE_FN, \
333 ctzl(sizeof(ETYPE)), env->vl, GETPC()); \
336 GEN_VEXT_ST_US(vse8_v, int8_t, ste_b)
337 GEN_VEXT_ST_US(vse16_v, int16_t, ste_h)
338 GEN_VEXT_ST_US(vse32_v, int32_t, ste_w)
339 GEN_VEXT_ST_US(vse64_v, int64_t, ste_d)
342 * unit stride mask load and store, EEW = 1
344 void HELPER(vlm_v)(void *vd, void *v0, target_ulong base,
345 CPURISCVState *env, uint32_t desc)
347 /* evl = ceil(vl/8) */
348 uint8_t evl = (env->vl + 7) >> 3;
349 vext_ldst_us(vd, base, env, desc, lde_b,
350 0, evl, GETPC());
353 void HELPER(vsm_v)(void *vd, void *v0, target_ulong base,
354 CPURISCVState *env, uint32_t desc)
356 /* evl = ceil(vl/8) */
357 uint8_t evl = (env->vl + 7) >> 3;
358 vext_ldst_us(vd, base, env, desc, ste_b,
359 0, evl, GETPC());
363 * index: access vector element from indexed memory
365 typedef target_ulong vext_get_index_addr(target_ulong base,
366 uint32_t idx, void *vs2);
368 #define GEN_VEXT_GET_INDEX_ADDR(NAME, ETYPE, H) \
369 static target_ulong NAME(target_ulong base, \
370 uint32_t idx, void *vs2) \
372 return (base + *((ETYPE *)vs2 + H(idx))); \
375 GEN_VEXT_GET_INDEX_ADDR(idx_b, uint8_t, H1)
376 GEN_VEXT_GET_INDEX_ADDR(idx_h, uint16_t, H2)
377 GEN_VEXT_GET_INDEX_ADDR(idx_w, uint32_t, H4)
378 GEN_VEXT_GET_INDEX_ADDR(idx_d, uint64_t, H8)
380 static inline void
381 vext_ldst_index(void *vd, void *v0, target_ulong base,
382 void *vs2, CPURISCVState *env, uint32_t desc,
383 vext_get_index_addr get_index_addr,
384 vext_ldst_elem_fn *ldst_elem,
385 uint32_t log2_esz, uintptr_t ra)
387 uint32_t i, k;
388 uint32_t nf = vext_nf(desc);
389 uint32_t vm = vext_vm(desc);
390 uint32_t max_elems = vext_max_elems(desc, log2_esz);
391 uint32_t esz = 1 << log2_esz;
392 uint32_t vma = vext_vma(desc);
394 VSTART_CHECK_EARLY_EXIT(env);
396 /* load bytes from guest memory */
397 for (i = env->vstart; i < env->vl; env->vstart = ++i) {
398 k = 0;
399 while (k < nf) {
400 if (!vm && !vext_elem_mask(v0, i)) {
401 /* set masked-off elements to 1s */
402 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
403 (i + k * max_elems + 1) * esz);
404 k++;
405 continue;
407 abi_ptr addr = get_index_addr(base, i, vs2) + (k << log2_esz);
408 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
409 k++;
412 env->vstart = 0;
414 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
417 #define GEN_VEXT_LD_INDEX(NAME, ETYPE, INDEX_FN, LOAD_FN) \
418 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
419 void *vs2, CPURISCVState *env, uint32_t desc) \
421 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
422 LOAD_FN, ctzl(sizeof(ETYPE)), GETPC()); \
425 GEN_VEXT_LD_INDEX(vlxei8_8_v, int8_t, idx_b, lde_b)
426 GEN_VEXT_LD_INDEX(vlxei8_16_v, int16_t, idx_b, lde_h)
427 GEN_VEXT_LD_INDEX(vlxei8_32_v, int32_t, idx_b, lde_w)
428 GEN_VEXT_LD_INDEX(vlxei8_64_v, int64_t, idx_b, lde_d)
429 GEN_VEXT_LD_INDEX(vlxei16_8_v, int8_t, idx_h, lde_b)
430 GEN_VEXT_LD_INDEX(vlxei16_16_v, int16_t, idx_h, lde_h)
431 GEN_VEXT_LD_INDEX(vlxei16_32_v, int32_t, idx_h, lde_w)
432 GEN_VEXT_LD_INDEX(vlxei16_64_v, int64_t, idx_h, lde_d)
433 GEN_VEXT_LD_INDEX(vlxei32_8_v, int8_t, idx_w, lde_b)
434 GEN_VEXT_LD_INDEX(vlxei32_16_v, int16_t, idx_w, lde_h)
435 GEN_VEXT_LD_INDEX(vlxei32_32_v, int32_t, idx_w, lde_w)
436 GEN_VEXT_LD_INDEX(vlxei32_64_v, int64_t, idx_w, lde_d)
437 GEN_VEXT_LD_INDEX(vlxei64_8_v, int8_t, idx_d, lde_b)
438 GEN_VEXT_LD_INDEX(vlxei64_16_v, int16_t, idx_d, lde_h)
439 GEN_VEXT_LD_INDEX(vlxei64_32_v, int32_t, idx_d, lde_w)
440 GEN_VEXT_LD_INDEX(vlxei64_64_v, int64_t, idx_d, lde_d)
442 #define GEN_VEXT_ST_INDEX(NAME, ETYPE, INDEX_FN, STORE_FN) \
443 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
444 void *vs2, CPURISCVState *env, uint32_t desc) \
446 vext_ldst_index(vd, v0, base, vs2, env, desc, INDEX_FN, \
447 STORE_FN, ctzl(sizeof(ETYPE)), \
448 GETPC()); \
451 GEN_VEXT_ST_INDEX(vsxei8_8_v, int8_t, idx_b, ste_b)
452 GEN_VEXT_ST_INDEX(vsxei8_16_v, int16_t, idx_b, ste_h)
453 GEN_VEXT_ST_INDEX(vsxei8_32_v, int32_t, idx_b, ste_w)
454 GEN_VEXT_ST_INDEX(vsxei8_64_v, int64_t, idx_b, ste_d)
455 GEN_VEXT_ST_INDEX(vsxei16_8_v, int8_t, idx_h, ste_b)
456 GEN_VEXT_ST_INDEX(vsxei16_16_v, int16_t, idx_h, ste_h)
457 GEN_VEXT_ST_INDEX(vsxei16_32_v, int32_t, idx_h, ste_w)
458 GEN_VEXT_ST_INDEX(vsxei16_64_v, int64_t, idx_h, ste_d)
459 GEN_VEXT_ST_INDEX(vsxei32_8_v, int8_t, idx_w, ste_b)
460 GEN_VEXT_ST_INDEX(vsxei32_16_v, int16_t, idx_w, ste_h)
461 GEN_VEXT_ST_INDEX(vsxei32_32_v, int32_t, idx_w, ste_w)
462 GEN_VEXT_ST_INDEX(vsxei32_64_v, int64_t, idx_w, ste_d)
463 GEN_VEXT_ST_INDEX(vsxei64_8_v, int8_t, idx_d, ste_b)
464 GEN_VEXT_ST_INDEX(vsxei64_16_v, int16_t, idx_d, ste_h)
465 GEN_VEXT_ST_INDEX(vsxei64_32_v, int32_t, idx_d, ste_w)
466 GEN_VEXT_ST_INDEX(vsxei64_64_v, int64_t, idx_d, ste_d)
469 * unit-stride fault-only-fisrt load instructions
471 static inline void
472 vext_ldff(void *vd, void *v0, target_ulong base,
473 CPURISCVState *env, uint32_t desc,
474 vext_ldst_elem_fn *ldst_elem,
475 uint32_t log2_esz, uintptr_t ra)
477 void *host;
478 uint32_t i, k, vl = 0;
479 uint32_t nf = vext_nf(desc);
480 uint32_t vm = vext_vm(desc);
481 uint32_t max_elems = vext_max_elems(desc, log2_esz);
482 uint32_t esz = 1 << log2_esz;
483 uint32_t vma = vext_vma(desc);
484 target_ulong addr, offset, remain;
485 int mmu_index = riscv_env_mmu_index(env, false);
487 VSTART_CHECK_EARLY_EXIT(env);
489 /* probe every access */
490 for (i = env->vstart; i < env->vl; i++) {
491 if (!vm && !vext_elem_mask(v0, i)) {
492 continue;
494 addr = adjust_addr(env, base + i * (nf << log2_esz));
495 if (i == 0) {
496 probe_pages(env, addr, nf << log2_esz, ra, MMU_DATA_LOAD);
497 } else {
498 /* if it triggers an exception, no need to check watchpoint */
499 remain = nf << log2_esz;
500 while (remain > 0) {
501 offset = -(addr | TARGET_PAGE_MASK);
502 host = tlb_vaddr_to_host(env, addr, MMU_DATA_LOAD, mmu_index);
503 if (host) {
504 #ifdef CONFIG_USER_ONLY
505 if (!page_check_range(addr, offset, PAGE_READ)) {
506 vl = i;
507 goto ProbeSuccess;
509 #else
510 probe_pages(env, addr, offset, ra, MMU_DATA_LOAD);
511 #endif
512 } else {
513 vl = i;
514 goto ProbeSuccess;
516 if (remain <= offset) {
517 break;
519 remain -= offset;
520 addr = adjust_addr(env, addr + offset);
524 ProbeSuccess:
525 /* load bytes from guest memory */
526 if (vl != 0) {
527 env->vl = vl;
529 for (i = env->vstart; i < env->vl; i++) {
530 k = 0;
531 while (k < nf) {
532 if (!vm && !vext_elem_mask(v0, i)) {
533 /* set masked-off elements to 1s */
534 vext_set_elems_1s(vd, vma, (i + k * max_elems) * esz,
535 (i + k * max_elems + 1) * esz);
536 k++;
537 continue;
539 addr = base + ((i * nf + k) << log2_esz);
540 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
541 k++;
544 env->vstart = 0;
546 vext_set_tail_elems_1s(env->vl, vd, desc, nf, esz, max_elems);
549 #define GEN_VEXT_LDFF(NAME, ETYPE, LOAD_FN) \
550 void HELPER(NAME)(void *vd, void *v0, target_ulong base, \
551 CPURISCVState *env, uint32_t desc) \
553 vext_ldff(vd, v0, base, env, desc, LOAD_FN, \
554 ctzl(sizeof(ETYPE)), GETPC()); \
557 GEN_VEXT_LDFF(vle8ff_v, int8_t, lde_b)
558 GEN_VEXT_LDFF(vle16ff_v, int16_t, lde_h)
559 GEN_VEXT_LDFF(vle32ff_v, int32_t, lde_w)
560 GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d)
562 #define DO_SWAP(N, M) (M)
563 #define DO_AND(N, M) (N & M)
564 #define DO_XOR(N, M) (N ^ M)
565 #define DO_OR(N, M) (N | M)
566 #define DO_ADD(N, M) (N + M)
568 /* Signed min/max */
569 #define DO_MAX(N, M) ((N) >= (M) ? (N) : (M))
570 #define DO_MIN(N, M) ((N) >= (M) ? (M) : (N))
573 * load and store whole register instructions
575 static void
576 vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
577 vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uintptr_t ra)
579 uint32_t i, k, off, pos;
580 uint32_t nf = vext_nf(desc);
581 uint32_t vlenb = riscv_cpu_cfg(env)->vlenb;
582 uint32_t max_elems = vlenb >> log2_esz;
584 if (env->vstart >= ((vlenb * nf) >> log2_esz)) {
585 env->vstart = 0;
586 return;
589 k = env->vstart / max_elems;
590 off = env->vstart % max_elems;
592 if (off) {
593 /* load/store rest of elements of current segment pointed by vstart */
594 for (pos = off; pos < max_elems; pos++, env->vstart++) {
595 target_ulong addr = base + ((pos + k * max_elems) << log2_esz);
596 ldst_elem(env, adjust_addr(env, addr), pos + k * max_elems, vd,
597 ra);
599 k++;
602 /* load/store elements for rest of segments */
603 for (; k < nf; k++) {
604 for (i = 0; i < max_elems; i++, env->vstart++) {
605 target_ulong addr = base + ((i + k * max_elems) << log2_esz);
606 ldst_elem(env, adjust_addr(env, addr), i + k * max_elems, vd, ra);
610 env->vstart = 0;
613 #define GEN_VEXT_LD_WHOLE(NAME, ETYPE, LOAD_FN) \
614 void HELPER(NAME)(void *vd, target_ulong base, \
615 CPURISCVState *env, uint32_t desc) \
617 vext_ldst_whole(vd, base, env, desc, LOAD_FN, \
618 ctzl(sizeof(ETYPE)), GETPC()); \
621 GEN_VEXT_LD_WHOLE(vl1re8_v, int8_t, lde_b)
622 GEN_VEXT_LD_WHOLE(vl1re16_v, int16_t, lde_h)
623 GEN_VEXT_LD_WHOLE(vl1re32_v, int32_t, lde_w)
624 GEN_VEXT_LD_WHOLE(vl1re64_v, int64_t, lde_d)
625 GEN_VEXT_LD_WHOLE(vl2re8_v, int8_t, lde_b)
626 GEN_VEXT_LD_WHOLE(vl2re16_v, int16_t, lde_h)
627 GEN_VEXT_LD_WHOLE(vl2re32_v, int32_t, lde_w)
628 GEN_VEXT_LD_WHOLE(vl2re64_v, int64_t, lde_d)
629 GEN_VEXT_LD_WHOLE(vl4re8_v, int8_t, lde_b)
630 GEN_VEXT_LD_WHOLE(vl4re16_v, int16_t, lde_h)
631 GEN_VEXT_LD_WHOLE(vl4re32_v, int32_t, lde_w)
632 GEN_VEXT_LD_WHOLE(vl4re64_v, int64_t, lde_d)
633 GEN_VEXT_LD_WHOLE(vl8re8_v, int8_t, lde_b)
634 GEN_VEXT_LD_WHOLE(vl8re16_v, int16_t, lde_h)
635 GEN_VEXT_LD_WHOLE(vl8re32_v, int32_t, lde_w)
636 GEN_VEXT_LD_WHOLE(vl8re64_v, int64_t, lde_d)
638 #define GEN_VEXT_ST_WHOLE(NAME, ETYPE, STORE_FN) \
639 void HELPER(NAME)(void *vd, target_ulong base, \
640 CPURISCVState *env, uint32_t desc) \
642 vext_ldst_whole(vd, base, env, desc, STORE_FN, \
643 ctzl(sizeof(ETYPE)), GETPC()); \
646 GEN_VEXT_ST_WHOLE(vs1r_v, int8_t, ste_b)
647 GEN_VEXT_ST_WHOLE(vs2r_v, int8_t, ste_b)
648 GEN_VEXT_ST_WHOLE(vs4r_v, int8_t, ste_b)
649 GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b)
652 * Vector Integer Arithmetic Instructions
655 /* (TD, T1, T2, TX1, TX2) */
656 #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t
657 #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t
658 #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t
659 #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t
660 #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t
661 #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t
662 #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t
663 #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t
664 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
665 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
666 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
667 #define WOP_SUS_B int16_t, uint8_t, int8_t, uint16_t, int16_t
668 #define WOP_SUS_H int32_t, uint16_t, int16_t, uint32_t, int32_t
669 #define WOP_SUS_W int64_t, uint32_t, int32_t, uint64_t, int64_t
670 #define WOP_SSU_B int16_t, int8_t, uint8_t, int16_t, uint16_t
671 #define WOP_SSU_H int32_t, int16_t, uint16_t, int32_t, uint32_t
672 #define WOP_SSU_W int64_t, int32_t, uint32_t, int64_t, uint64_t
673 #define NOP_SSS_B int8_t, int8_t, int16_t, int8_t, int16_t
674 #define NOP_SSS_H int16_t, int16_t, int32_t, int16_t, int32_t
675 #define NOP_SSS_W int32_t, int32_t, int64_t, int32_t, int64_t
676 #define NOP_UUU_B uint8_t, uint8_t, uint16_t, uint8_t, uint16_t
677 #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t
678 #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t
680 #define DO_SUB(N, M) (N - M)
681 #define DO_RSUB(N, M) (M - N)
683 RVVCALL(OPIVV2, vadd_vv_b, OP_SSS_B, H1, H1, H1, DO_ADD)
684 RVVCALL(OPIVV2, vadd_vv_h, OP_SSS_H, H2, H2, H2, DO_ADD)
685 RVVCALL(OPIVV2, vadd_vv_w, OP_SSS_W, H4, H4, H4, DO_ADD)
686 RVVCALL(OPIVV2, vadd_vv_d, OP_SSS_D, H8, H8, H8, DO_ADD)
687 RVVCALL(OPIVV2, vsub_vv_b, OP_SSS_B, H1, H1, H1, DO_SUB)
688 RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB)
689 RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB)
690 RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB)
692 GEN_VEXT_VV(vadd_vv_b, 1)
693 GEN_VEXT_VV(vadd_vv_h, 2)
694 GEN_VEXT_VV(vadd_vv_w, 4)
695 GEN_VEXT_VV(vadd_vv_d, 8)
696 GEN_VEXT_VV(vsub_vv_b, 1)
697 GEN_VEXT_VV(vsub_vv_h, 2)
698 GEN_VEXT_VV(vsub_vv_w, 4)
699 GEN_VEXT_VV(vsub_vv_d, 8)
702 RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD)
703 RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD)
704 RVVCALL(OPIVX2, vadd_vx_w, OP_SSS_W, H4, H4, DO_ADD)
705 RVVCALL(OPIVX2, vadd_vx_d, OP_SSS_D, H8, H8, DO_ADD)
706 RVVCALL(OPIVX2, vsub_vx_b, OP_SSS_B, H1, H1, DO_SUB)
707 RVVCALL(OPIVX2, vsub_vx_h, OP_SSS_H, H2, H2, DO_SUB)
708 RVVCALL(OPIVX2, vsub_vx_w, OP_SSS_W, H4, H4, DO_SUB)
709 RVVCALL(OPIVX2, vsub_vx_d, OP_SSS_D, H8, H8, DO_SUB)
710 RVVCALL(OPIVX2, vrsub_vx_b, OP_SSS_B, H1, H1, DO_RSUB)
711 RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB)
712 RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB)
713 RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB)
715 GEN_VEXT_VX(vadd_vx_b, 1)
716 GEN_VEXT_VX(vadd_vx_h, 2)
717 GEN_VEXT_VX(vadd_vx_w, 4)
718 GEN_VEXT_VX(vadd_vx_d, 8)
719 GEN_VEXT_VX(vsub_vx_b, 1)
720 GEN_VEXT_VX(vsub_vx_h, 2)
721 GEN_VEXT_VX(vsub_vx_w, 4)
722 GEN_VEXT_VX(vsub_vx_d, 8)
723 GEN_VEXT_VX(vrsub_vx_b, 1)
724 GEN_VEXT_VX(vrsub_vx_h, 2)
725 GEN_VEXT_VX(vrsub_vx_w, 4)
726 GEN_VEXT_VX(vrsub_vx_d, 8)
728 void HELPER(vec_rsubs8)(void *d, void *a, uint64_t b, uint32_t desc)
730 intptr_t oprsz = simd_oprsz(desc);
731 intptr_t i;
733 for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
734 *(uint8_t *)(d + i) = (uint8_t)b - *(uint8_t *)(a + i);
738 void HELPER(vec_rsubs16)(void *d, void *a, uint64_t b, uint32_t desc)
740 intptr_t oprsz = simd_oprsz(desc);
741 intptr_t i;
743 for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
744 *(uint16_t *)(d + i) = (uint16_t)b - *(uint16_t *)(a + i);
748 void HELPER(vec_rsubs32)(void *d, void *a, uint64_t b, uint32_t desc)
750 intptr_t oprsz = simd_oprsz(desc);
751 intptr_t i;
753 for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
754 *(uint32_t *)(d + i) = (uint32_t)b - *(uint32_t *)(a + i);
758 void HELPER(vec_rsubs64)(void *d, void *a, uint64_t b, uint32_t desc)
760 intptr_t oprsz = simd_oprsz(desc);
761 intptr_t i;
763 for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
764 *(uint64_t *)(d + i) = b - *(uint64_t *)(a + i);
768 /* Vector Widening Integer Add/Subtract */
769 #define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t
770 #define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t
771 #define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t
772 #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t
773 #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t
774 #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t
775 #define WOP_WUUU_B uint16_t, uint8_t, uint16_t, uint16_t, uint16_t
776 #define WOP_WUUU_H uint32_t, uint16_t, uint32_t, uint32_t, uint32_t
777 #define WOP_WUUU_W uint64_t, uint32_t, uint64_t, uint64_t, uint64_t
778 #define WOP_WSSS_B int16_t, int8_t, int16_t, int16_t, int16_t
779 #define WOP_WSSS_H int32_t, int16_t, int32_t, int32_t, int32_t
780 #define WOP_WSSS_W int64_t, int32_t, int64_t, int64_t, int64_t
781 RVVCALL(OPIVV2, vwaddu_vv_b, WOP_UUU_B, H2, H1, H1, DO_ADD)
782 RVVCALL(OPIVV2, vwaddu_vv_h, WOP_UUU_H, H4, H2, H2, DO_ADD)
783 RVVCALL(OPIVV2, vwaddu_vv_w, WOP_UUU_W, H8, H4, H4, DO_ADD)
784 RVVCALL(OPIVV2, vwsubu_vv_b, WOP_UUU_B, H2, H1, H1, DO_SUB)
785 RVVCALL(OPIVV2, vwsubu_vv_h, WOP_UUU_H, H4, H2, H2, DO_SUB)
786 RVVCALL(OPIVV2, vwsubu_vv_w, WOP_UUU_W, H8, H4, H4, DO_SUB)
787 RVVCALL(OPIVV2, vwadd_vv_b, WOP_SSS_B, H2, H1, H1, DO_ADD)
788 RVVCALL(OPIVV2, vwadd_vv_h, WOP_SSS_H, H4, H2, H2, DO_ADD)
789 RVVCALL(OPIVV2, vwadd_vv_w, WOP_SSS_W, H8, H4, H4, DO_ADD)
790 RVVCALL(OPIVV2, vwsub_vv_b, WOP_SSS_B, H2, H1, H1, DO_SUB)
791 RVVCALL(OPIVV2, vwsub_vv_h, WOP_SSS_H, H4, H2, H2, DO_SUB)
792 RVVCALL(OPIVV2, vwsub_vv_w, WOP_SSS_W, H8, H4, H4, DO_SUB)
793 RVVCALL(OPIVV2, vwaddu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_ADD)
794 RVVCALL(OPIVV2, vwaddu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_ADD)
795 RVVCALL(OPIVV2, vwaddu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_ADD)
796 RVVCALL(OPIVV2, vwsubu_wv_b, WOP_WUUU_B, H2, H1, H1, DO_SUB)
797 RVVCALL(OPIVV2, vwsubu_wv_h, WOP_WUUU_H, H4, H2, H2, DO_SUB)
798 RVVCALL(OPIVV2, vwsubu_wv_w, WOP_WUUU_W, H8, H4, H4, DO_SUB)
799 RVVCALL(OPIVV2, vwadd_wv_b, WOP_WSSS_B, H2, H1, H1, DO_ADD)
800 RVVCALL(OPIVV2, vwadd_wv_h, WOP_WSSS_H, H4, H2, H2, DO_ADD)
801 RVVCALL(OPIVV2, vwadd_wv_w, WOP_WSSS_W, H8, H4, H4, DO_ADD)
802 RVVCALL(OPIVV2, vwsub_wv_b, WOP_WSSS_B, H2, H1, H1, DO_SUB)
803 RVVCALL(OPIVV2, vwsub_wv_h, WOP_WSSS_H, H4, H2, H2, DO_SUB)
804 RVVCALL(OPIVV2, vwsub_wv_w, WOP_WSSS_W, H8, H4, H4, DO_SUB)
805 GEN_VEXT_VV(vwaddu_vv_b, 2)
806 GEN_VEXT_VV(vwaddu_vv_h, 4)
807 GEN_VEXT_VV(vwaddu_vv_w, 8)
808 GEN_VEXT_VV(vwsubu_vv_b, 2)
809 GEN_VEXT_VV(vwsubu_vv_h, 4)
810 GEN_VEXT_VV(vwsubu_vv_w, 8)
811 GEN_VEXT_VV(vwadd_vv_b, 2)
812 GEN_VEXT_VV(vwadd_vv_h, 4)
813 GEN_VEXT_VV(vwadd_vv_w, 8)
814 GEN_VEXT_VV(vwsub_vv_b, 2)
815 GEN_VEXT_VV(vwsub_vv_h, 4)
816 GEN_VEXT_VV(vwsub_vv_w, 8)
817 GEN_VEXT_VV(vwaddu_wv_b, 2)
818 GEN_VEXT_VV(vwaddu_wv_h, 4)
819 GEN_VEXT_VV(vwaddu_wv_w, 8)
820 GEN_VEXT_VV(vwsubu_wv_b, 2)
821 GEN_VEXT_VV(vwsubu_wv_h, 4)
822 GEN_VEXT_VV(vwsubu_wv_w, 8)
823 GEN_VEXT_VV(vwadd_wv_b, 2)
824 GEN_VEXT_VV(vwadd_wv_h, 4)
825 GEN_VEXT_VV(vwadd_wv_w, 8)
826 GEN_VEXT_VV(vwsub_wv_b, 2)
827 GEN_VEXT_VV(vwsub_wv_h, 4)
828 GEN_VEXT_VV(vwsub_wv_w, 8)
830 RVVCALL(OPIVX2, vwaddu_vx_b, WOP_UUU_B, H2, H1, DO_ADD)
831 RVVCALL(OPIVX2, vwaddu_vx_h, WOP_UUU_H, H4, H2, DO_ADD)
832 RVVCALL(OPIVX2, vwaddu_vx_w, WOP_UUU_W, H8, H4, DO_ADD)
833 RVVCALL(OPIVX2, vwsubu_vx_b, WOP_UUU_B, H2, H1, DO_SUB)
834 RVVCALL(OPIVX2, vwsubu_vx_h, WOP_UUU_H, H4, H2, DO_SUB)
835 RVVCALL(OPIVX2, vwsubu_vx_w, WOP_UUU_W, H8, H4, DO_SUB)
836 RVVCALL(OPIVX2, vwadd_vx_b, WOP_SSS_B, H2, H1, DO_ADD)
837 RVVCALL(OPIVX2, vwadd_vx_h, WOP_SSS_H, H4, H2, DO_ADD)
838 RVVCALL(OPIVX2, vwadd_vx_w, WOP_SSS_W, H8, H4, DO_ADD)
839 RVVCALL(OPIVX2, vwsub_vx_b, WOP_SSS_B, H2, H1, DO_SUB)
840 RVVCALL(OPIVX2, vwsub_vx_h, WOP_SSS_H, H4, H2, DO_SUB)
841 RVVCALL(OPIVX2, vwsub_vx_w, WOP_SSS_W, H8, H4, DO_SUB)
842 RVVCALL(OPIVX2, vwaddu_wx_b, WOP_WUUU_B, H2, H1, DO_ADD)
843 RVVCALL(OPIVX2, vwaddu_wx_h, WOP_WUUU_H, H4, H2, DO_ADD)
844 RVVCALL(OPIVX2, vwaddu_wx_w, WOP_WUUU_W, H8, H4, DO_ADD)
845 RVVCALL(OPIVX2, vwsubu_wx_b, WOP_WUUU_B, H2, H1, DO_SUB)
846 RVVCALL(OPIVX2, vwsubu_wx_h, WOP_WUUU_H, H4, H2, DO_SUB)
847 RVVCALL(OPIVX2, vwsubu_wx_w, WOP_WUUU_W, H8, H4, DO_SUB)
848 RVVCALL(OPIVX2, vwadd_wx_b, WOP_WSSS_B, H2, H1, DO_ADD)
849 RVVCALL(OPIVX2, vwadd_wx_h, WOP_WSSS_H, H4, H2, DO_ADD)
850 RVVCALL(OPIVX2, vwadd_wx_w, WOP_WSSS_W, H8, H4, DO_ADD)
851 RVVCALL(OPIVX2, vwsub_wx_b, WOP_WSSS_B, H2, H1, DO_SUB)
852 RVVCALL(OPIVX2, vwsub_wx_h, WOP_WSSS_H, H4, H2, DO_SUB)
853 RVVCALL(OPIVX2, vwsub_wx_w, WOP_WSSS_W, H8, H4, DO_SUB)
854 GEN_VEXT_VX(vwaddu_vx_b, 2)
855 GEN_VEXT_VX(vwaddu_vx_h, 4)
856 GEN_VEXT_VX(vwaddu_vx_w, 8)
857 GEN_VEXT_VX(vwsubu_vx_b, 2)
858 GEN_VEXT_VX(vwsubu_vx_h, 4)
859 GEN_VEXT_VX(vwsubu_vx_w, 8)
860 GEN_VEXT_VX(vwadd_vx_b, 2)
861 GEN_VEXT_VX(vwadd_vx_h, 4)
862 GEN_VEXT_VX(vwadd_vx_w, 8)
863 GEN_VEXT_VX(vwsub_vx_b, 2)
864 GEN_VEXT_VX(vwsub_vx_h, 4)
865 GEN_VEXT_VX(vwsub_vx_w, 8)
866 GEN_VEXT_VX(vwaddu_wx_b, 2)
867 GEN_VEXT_VX(vwaddu_wx_h, 4)
868 GEN_VEXT_VX(vwaddu_wx_w, 8)
869 GEN_VEXT_VX(vwsubu_wx_b, 2)
870 GEN_VEXT_VX(vwsubu_wx_h, 4)
871 GEN_VEXT_VX(vwsubu_wx_w, 8)
872 GEN_VEXT_VX(vwadd_wx_b, 2)
873 GEN_VEXT_VX(vwadd_wx_h, 4)
874 GEN_VEXT_VX(vwadd_wx_w, 8)
875 GEN_VEXT_VX(vwsub_wx_b, 2)
876 GEN_VEXT_VX(vwsub_wx_h, 4)
877 GEN_VEXT_VX(vwsub_wx_w, 8)
879 /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */
880 #define DO_VADC(N, M, C) (N + M + C)
881 #define DO_VSBC(N, M, C) (N - M - C)
883 #define GEN_VEXT_VADC_VVM(NAME, ETYPE, H, DO_OP) \
884 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
885 CPURISCVState *env, uint32_t desc) \
887 uint32_t vl = env->vl; \
888 uint32_t esz = sizeof(ETYPE); \
889 uint32_t total_elems = \
890 vext_get_total_elems(env, desc, esz); \
891 uint32_t vta = vext_vta(desc); \
892 uint32_t i; \
894 VSTART_CHECK_EARLY_EXIT(env); \
896 for (i = env->vstart; i < vl; i++) { \
897 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
898 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
899 ETYPE carry = vext_elem_mask(v0, i); \
901 *((ETYPE *)vd + H(i)) = DO_OP(s2, s1, carry); \
903 env->vstart = 0; \
904 /* set tail elements to 1s */ \
905 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
908 GEN_VEXT_VADC_VVM(vadc_vvm_b, uint8_t, H1, DO_VADC)
909 GEN_VEXT_VADC_VVM(vadc_vvm_h, uint16_t, H2, DO_VADC)
910 GEN_VEXT_VADC_VVM(vadc_vvm_w, uint32_t, H4, DO_VADC)
911 GEN_VEXT_VADC_VVM(vadc_vvm_d, uint64_t, H8, DO_VADC)
913 GEN_VEXT_VADC_VVM(vsbc_vvm_b, uint8_t, H1, DO_VSBC)
914 GEN_VEXT_VADC_VVM(vsbc_vvm_h, uint16_t, H2, DO_VSBC)
915 GEN_VEXT_VADC_VVM(vsbc_vvm_w, uint32_t, H4, DO_VSBC)
916 GEN_VEXT_VADC_VVM(vsbc_vvm_d, uint64_t, H8, DO_VSBC)
918 #define GEN_VEXT_VADC_VXM(NAME, ETYPE, H, DO_OP) \
919 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
920 CPURISCVState *env, uint32_t desc) \
922 uint32_t vl = env->vl; \
923 uint32_t esz = sizeof(ETYPE); \
924 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
925 uint32_t vta = vext_vta(desc); \
926 uint32_t i; \
928 VSTART_CHECK_EARLY_EXIT(env); \
930 for (i = env->vstart; i < vl; i++) { \
931 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
932 ETYPE carry = vext_elem_mask(v0, i); \
934 *((ETYPE *)vd + H(i)) = DO_OP(s2, (ETYPE)(target_long)s1, carry);\
936 env->vstart = 0; \
937 /* set tail elements to 1s */ \
938 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
941 GEN_VEXT_VADC_VXM(vadc_vxm_b, uint8_t, H1, DO_VADC)
942 GEN_VEXT_VADC_VXM(vadc_vxm_h, uint16_t, H2, DO_VADC)
943 GEN_VEXT_VADC_VXM(vadc_vxm_w, uint32_t, H4, DO_VADC)
944 GEN_VEXT_VADC_VXM(vadc_vxm_d, uint64_t, H8, DO_VADC)
946 GEN_VEXT_VADC_VXM(vsbc_vxm_b, uint8_t, H1, DO_VSBC)
947 GEN_VEXT_VADC_VXM(vsbc_vxm_h, uint16_t, H2, DO_VSBC)
948 GEN_VEXT_VADC_VXM(vsbc_vxm_w, uint32_t, H4, DO_VSBC)
949 GEN_VEXT_VADC_VXM(vsbc_vxm_d, uint64_t, H8, DO_VSBC)
951 #define DO_MADC(N, M, C) (C ? (__typeof(N))(N + M + 1) <= N : \
952 (__typeof(N))(N + M) < N)
953 #define DO_MSBC(N, M, C) (C ? N <= M : N < M)
955 #define GEN_VEXT_VMADC_VVM(NAME, ETYPE, H, DO_OP) \
956 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
957 CPURISCVState *env, uint32_t desc) \
959 uint32_t vl = env->vl; \
960 uint32_t vm = vext_vm(desc); \
961 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
962 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
963 uint32_t i; \
965 VSTART_CHECK_EARLY_EXIT(env); \
967 for (i = env->vstart; i < vl; i++) { \
968 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
969 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
970 ETYPE carry = !vm && vext_elem_mask(v0, i); \
971 vext_set_elem_mask(vd, i, DO_OP(s2, s1, carry)); \
973 env->vstart = 0; \
975 * mask destination register are always tail-agnostic
976 * set tail elements to 1s
977 */ \
978 if (vta_all_1s) { \
979 for (; i < total_elems; i++) { \
980 vext_set_elem_mask(vd, i, 1); \
985 GEN_VEXT_VMADC_VVM(vmadc_vvm_b, uint8_t, H1, DO_MADC)
986 GEN_VEXT_VMADC_VVM(vmadc_vvm_h, uint16_t, H2, DO_MADC)
987 GEN_VEXT_VMADC_VVM(vmadc_vvm_w, uint32_t, H4, DO_MADC)
988 GEN_VEXT_VMADC_VVM(vmadc_vvm_d, uint64_t, H8, DO_MADC)
990 GEN_VEXT_VMADC_VVM(vmsbc_vvm_b, uint8_t, H1, DO_MSBC)
991 GEN_VEXT_VMADC_VVM(vmsbc_vvm_h, uint16_t, H2, DO_MSBC)
992 GEN_VEXT_VMADC_VVM(vmsbc_vvm_w, uint32_t, H4, DO_MSBC)
993 GEN_VEXT_VMADC_VVM(vmsbc_vvm_d, uint64_t, H8, DO_MSBC)
995 #define GEN_VEXT_VMADC_VXM(NAME, ETYPE, H, DO_OP) \
996 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
997 void *vs2, CPURISCVState *env, uint32_t desc) \
999 uint32_t vl = env->vl; \
1000 uint32_t vm = vext_vm(desc); \
1001 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1002 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1003 uint32_t i; \
1005 VSTART_CHECK_EARLY_EXIT(env); \
1007 for (i = env->vstart; i < vl; i++) { \
1008 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1009 ETYPE carry = !vm && vext_elem_mask(v0, i); \
1010 vext_set_elem_mask(vd, i, \
1011 DO_OP(s2, (ETYPE)(target_long)s1, carry)); \
1013 env->vstart = 0; \
1015 * mask destination register are always tail-agnostic
1016 * set tail elements to 1s
1017 */ \
1018 if (vta_all_1s) { \
1019 for (; i < total_elems; i++) { \
1020 vext_set_elem_mask(vd, i, 1); \
1025 GEN_VEXT_VMADC_VXM(vmadc_vxm_b, uint8_t, H1, DO_MADC)
1026 GEN_VEXT_VMADC_VXM(vmadc_vxm_h, uint16_t, H2, DO_MADC)
1027 GEN_VEXT_VMADC_VXM(vmadc_vxm_w, uint32_t, H4, DO_MADC)
1028 GEN_VEXT_VMADC_VXM(vmadc_vxm_d, uint64_t, H8, DO_MADC)
1030 GEN_VEXT_VMADC_VXM(vmsbc_vxm_b, uint8_t, H1, DO_MSBC)
1031 GEN_VEXT_VMADC_VXM(vmsbc_vxm_h, uint16_t, H2, DO_MSBC)
1032 GEN_VEXT_VMADC_VXM(vmsbc_vxm_w, uint32_t, H4, DO_MSBC)
1033 GEN_VEXT_VMADC_VXM(vmsbc_vxm_d, uint64_t, H8, DO_MSBC)
1035 /* Vector Bitwise Logical Instructions */
1036 RVVCALL(OPIVV2, vand_vv_b, OP_SSS_B, H1, H1, H1, DO_AND)
1037 RVVCALL(OPIVV2, vand_vv_h, OP_SSS_H, H2, H2, H2, DO_AND)
1038 RVVCALL(OPIVV2, vand_vv_w, OP_SSS_W, H4, H4, H4, DO_AND)
1039 RVVCALL(OPIVV2, vand_vv_d, OP_SSS_D, H8, H8, H8, DO_AND)
1040 RVVCALL(OPIVV2, vor_vv_b, OP_SSS_B, H1, H1, H1, DO_OR)
1041 RVVCALL(OPIVV2, vor_vv_h, OP_SSS_H, H2, H2, H2, DO_OR)
1042 RVVCALL(OPIVV2, vor_vv_w, OP_SSS_W, H4, H4, H4, DO_OR)
1043 RVVCALL(OPIVV2, vor_vv_d, OP_SSS_D, H8, H8, H8, DO_OR)
1044 RVVCALL(OPIVV2, vxor_vv_b, OP_SSS_B, H1, H1, H1, DO_XOR)
1045 RVVCALL(OPIVV2, vxor_vv_h, OP_SSS_H, H2, H2, H2, DO_XOR)
1046 RVVCALL(OPIVV2, vxor_vv_w, OP_SSS_W, H4, H4, H4, DO_XOR)
1047 RVVCALL(OPIVV2, vxor_vv_d, OP_SSS_D, H8, H8, H8, DO_XOR)
1048 GEN_VEXT_VV(vand_vv_b, 1)
1049 GEN_VEXT_VV(vand_vv_h, 2)
1050 GEN_VEXT_VV(vand_vv_w, 4)
1051 GEN_VEXT_VV(vand_vv_d, 8)
1052 GEN_VEXT_VV(vor_vv_b, 1)
1053 GEN_VEXT_VV(vor_vv_h, 2)
1054 GEN_VEXT_VV(vor_vv_w, 4)
1055 GEN_VEXT_VV(vor_vv_d, 8)
1056 GEN_VEXT_VV(vxor_vv_b, 1)
1057 GEN_VEXT_VV(vxor_vv_h, 2)
1058 GEN_VEXT_VV(vxor_vv_w, 4)
1059 GEN_VEXT_VV(vxor_vv_d, 8)
1061 RVVCALL(OPIVX2, vand_vx_b, OP_SSS_B, H1, H1, DO_AND)
1062 RVVCALL(OPIVX2, vand_vx_h, OP_SSS_H, H2, H2, DO_AND)
1063 RVVCALL(OPIVX2, vand_vx_w, OP_SSS_W, H4, H4, DO_AND)
1064 RVVCALL(OPIVX2, vand_vx_d, OP_SSS_D, H8, H8, DO_AND)
1065 RVVCALL(OPIVX2, vor_vx_b, OP_SSS_B, H1, H1, DO_OR)
1066 RVVCALL(OPIVX2, vor_vx_h, OP_SSS_H, H2, H2, DO_OR)
1067 RVVCALL(OPIVX2, vor_vx_w, OP_SSS_W, H4, H4, DO_OR)
1068 RVVCALL(OPIVX2, vor_vx_d, OP_SSS_D, H8, H8, DO_OR)
1069 RVVCALL(OPIVX2, vxor_vx_b, OP_SSS_B, H1, H1, DO_XOR)
1070 RVVCALL(OPIVX2, vxor_vx_h, OP_SSS_H, H2, H2, DO_XOR)
1071 RVVCALL(OPIVX2, vxor_vx_w, OP_SSS_W, H4, H4, DO_XOR)
1072 RVVCALL(OPIVX2, vxor_vx_d, OP_SSS_D, H8, H8, DO_XOR)
1073 GEN_VEXT_VX(vand_vx_b, 1)
1074 GEN_VEXT_VX(vand_vx_h, 2)
1075 GEN_VEXT_VX(vand_vx_w, 4)
1076 GEN_VEXT_VX(vand_vx_d, 8)
1077 GEN_VEXT_VX(vor_vx_b, 1)
1078 GEN_VEXT_VX(vor_vx_h, 2)
1079 GEN_VEXT_VX(vor_vx_w, 4)
1080 GEN_VEXT_VX(vor_vx_d, 8)
1081 GEN_VEXT_VX(vxor_vx_b, 1)
1082 GEN_VEXT_VX(vxor_vx_h, 2)
1083 GEN_VEXT_VX(vxor_vx_w, 4)
1084 GEN_VEXT_VX(vxor_vx_d, 8)
1086 /* Vector Single-Width Bit Shift Instructions */
1087 #define DO_SLL(N, M) (N << (M))
1088 #define DO_SRL(N, M) (N >> (M))
1090 /* generate the helpers for shift instructions with two vector operators */
1091 #define GEN_VEXT_SHIFT_VV(NAME, TS1, TS2, HS1, HS2, OP, MASK) \
1092 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
1093 void *vs2, CPURISCVState *env, uint32_t desc) \
1095 uint32_t vm = vext_vm(desc); \
1096 uint32_t vl = env->vl; \
1097 uint32_t esz = sizeof(TS1); \
1098 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1099 uint32_t vta = vext_vta(desc); \
1100 uint32_t vma = vext_vma(desc); \
1101 uint32_t i; \
1103 VSTART_CHECK_EARLY_EXIT(env); \
1105 for (i = env->vstart; i < vl; i++) { \
1106 if (!vm && !vext_elem_mask(v0, i)) { \
1107 /* set masked-off elements to 1s */ \
1108 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
1109 continue; \
1111 TS1 s1 = *((TS1 *)vs1 + HS1(i)); \
1112 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1113 *((TS1 *)vd + HS1(i)) = OP(s2, s1 & MASK); \
1115 env->vstart = 0; \
1116 /* set tail elements to 1s */ \
1117 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1120 GEN_VEXT_SHIFT_VV(vsll_vv_b, uint8_t, uint8_t, H1, H1, DO_SLL, 0x7)
1121 GEN_VEXT_SHIFT_VV(vsll_vv_h, uint16_t, uint16_t, H2, H2, DO_SLL, 0xf)
1122 GEN_VEXT_SHIFT_VV(vsll_vv_w, uint32_t, uint32_t, H4, H4, DO_SLL, 0x1f)
1123 GEN_VEXT_SHIFT_VV(vsll_vv_d, uint64_t, uint64_t, H8, H8, DO_SLL, 0x3f)
1125 GEN_VEXT_SHIFT_VV(vsrl_vv_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1126 GEN_VEXT_SHIFT_VV(vsrl_vv_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1127 GEN_VEXT_SHIFT_VV(vsrl_vv_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1128 GEN_VEXT_SHIFT_VV(vsrl_vv_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1130 GEN_VEXT_SHIFT_VV(vsra_vv_b, uint8_t, int8_t, H1, H1, DO_SRL, 0x7)
1131 GEN_VEXT_SHIFT_VV(vsra_vv_h, uint16_t, int16_t, H2, H2, DO_SRL, 0xf)
1132 GEN_VEXT_SHIFT_VV(vsra_vv_w, uint32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1133 GEN_VEXT_SHIFT_VV(vsra_vv_d, uint64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1136 * generate the helpers for shift instructions with one vector and one scalar
1138 #define GEN_VEXT_SHIFT_VX(NAME, TD, TS2, HD, HS2, OP, MASK) \
1139 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1140 void *vs2, CPURISCVState *env, \
1141 uint32_t desc) \
1143 uint32_t vm = vext_vm(desc); \
1144 uint32_t vl = env->vl; \
1145 uint32_t esz = sizeof(TD); \
1146 uint32_t total_elems = \
1147 vext_get_total_elems(env, desc, esz); \
1148 uint32_t vta = vext_vta(desc); \
1149 uint32_t vma = vext_vma(desc); \
1150 uint32_t i; \
1152 VSTART_CHECK_EARLY_EXIT(env); \
1154 for (i = env->vstart; i < vl; i++) { \
1155 if (!vm && !vext_elem_mask(v0, i)) { \
1156 /* set masked-off elements to 1s */ \
1157 vext_set_elems_1s(vd, vma, i * esz, \
1158 (i + 1) * esz); \
1159 continue; \
1161 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
1162 *((TD *)vd + HD(i)) = OP(s2, s1 & MASK); \
1164 env->vstart = 0; \
1165 /* set tail elements to 1s */ \
1166 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);\
1169 GEN_VEXT_SHIFT_VX(vsll_vx_b, uint8_t, int8_t, H1, H1, DO_SLL, 0x7)
1170 GEN_VEXT_SHIFT_VX(vsll_vx_h, uint16_t, int16_t, H2, H2, DO_SLL, 0xf)
1171 GEN_VEXT_SHIFT_VX(vsll_vx_w, uint32_t, int32_t, H4, H4, DO_SLL, 0x1f)
1172 GEN_VEXT_SHIFT_VX(vsll_vx_d, uint64_t, int64_t, H8, H8, DO_SLL, 0x3f)
1174 GEN_VEXT_SHIFT_VX(vsrl_vx_b, uint8_t, uint8_t, H1, H1, DO_SRL, 0x7)
1175 GEN_VEXT_SHIFT_VX(vsrl_vx_h, uint16_t, uint16_t, H2, H2, DO_SRL, 0xf)
1176 GEN_VEXT_SHIFT_VX(vsrl_vx_w, uint32_t, uint32_t, H4, H4, DO_SRL, 0x1f)
1177 GEN_VEXT_SHIFT_VX(vsrl_vx_d, uint64_t, uint64_t, H8, H8, DO_SRL, 0x3f)
1179 GEN_VEXT_SHIFT_VX(vsra_vx_b, int8_t, int8_t, H1, H1, DO_SRL, 0x7)
1180 GEN_VEXT_SHIFT_VX(vsra_vx_h, int16_t, int16_t, H2, H2, DO_SRL, 0xf)
1181 GEN_VEXT_SHIFT_VX(vsra_vx_w, int32_t, int32_t, H4, H4, DO_SRL, 0x1f)
1182 GEN_VEXT_SHIFT_VX(vsra_vx_d, int64_t, int64_t, H8, H8, DO_SRL, 0x3f)
1184 /* Vector Narrowing Integer Right Shift Instructions */
1185 GEN_VEXT_SHIFT_VV(vnsrl_wv_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1186 GEN_VEXT_SHIFT_VV(vnsrl_wv_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1187 GEN_VEXT_SHIFT_VV(vnsrl_wv_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1188 GEN_VEXT_SHIFT_VV(vnsra_wv_b, uint8_t, int16_t, H1, H2, DO_SRL, 0xf)
1189 GEN_VEXT_SHIFT_VV(vnsra_wv_h, uint16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1190 GEN_VEXT_SHIFT_VV(vnsra_wv_w, uint32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1191 GEN_VEXT_SHIFT_VX(vnsrl_wx_b, uint8_t, uint16_t, H1, H2, DO_SRL, 0xf)
1192 GEN_VEXT_SHIFT_VX(vnsrl_wx_h, uint16_t, uint32_t, H2, H4, DO_SRL, 0x1f)
1193 GEN_VEXT_SHIFT_VX(vnsrl_wx_w, uint32_t, uint64_t, H4, H8, DO_SRL, 0x3f)
1194 GEN_VEXT_SHIFT_VX(vnsra_wx_b, int8_t, int16_t, H1, H2, DO_SRL, 0xf)
1195 GEN_VEXT_SHIFT_VX(vnsra_wx_h, int16_t, int32_t, H2, H4, DO_SRL, 0x1f)
1196 GEN_VEXT_SHIFT_VX(vnsra_wx_w, int32_t, int64_t, H4, H8, DO_SRL, 0x3f)
1198 /* Vector Integer Comparison Instructions */
1199 #define DO_MSEQ(N, M) (N == M)
1200 #define DO_MSNE(N, M) (N != M)
1201 #define DO_MSLT(N, M) (N < M)
1202 #define DO_MSLE(N, M) (N <= M)
1203 #define DO_MSGT(N, M) (N > M)
1205 #define GEN_VEXT_CMP_VV(NAME, ETYPE, H, DO_OP) \
1206 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1207 CPURISCVState *env, uint32_t desc) \
1209 uint32_t vm = vext_vm(desc); \
1210 uint32_t vl = env->vl; \
1211 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1212 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1213 uint32_t vma = vext_vma(desc); \
1214 uint32_t i; \
1216 VSTART_CHECK_EARLY_EXIT(env); \
1218 for (i = env->vstart; i < vl; i++) { \
1219 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1220 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1221 if (!vm && !vext_elem_mask(v0, i)) { \
1222 /* set masked-off elements to 1s */ \
1223 if (vma) { \
1224 vext_set_elem_mask(vd, i, 1); \
1226 continue; \
1228 vext_set_elem_mask(vd, i, DO_OP(s2, s1)); \
1230 env->vstart = 0; \
1232 * mask destination register are always tail-agnostic
1233 * set tail elements to 1s
1234 */ \
1235 if (vta_all_1s) { \
1236 for (; i < total_elems; i++) { \
1237 vext_set_elem_mask(vd, i, 1); \
1242 GEN_VEXT_CMP_VV(vmseq_vv_b, uint8_t, H1, DO_MSEQ)
1243 GEN_VEXT_CMP_VV(vmseq_vv_h, uint16_t, H2, DO_MSEQ)
1244 GEN_VEXT_CMP_VV(vmseq_vv_w, uint32_t, H4, DO_MSEQ)
1245 GEN_VEXT_CMP_VV(vmseq_vv_d, uint64_t, H8, DO_MSEQ)
1247 GEN_VEXT_CMP_VV(vmsne_vv_b, uint8_t, H1, DO_MSNE)
1248 GEN_VEXT_CMP_VV(vmsne_vv_h, uint16_t, H2, DO_MSNE)
1249 GEN_VEXT_CMP_VV(vmsne_vv_w, uint32_t, H4, DO_MSNE)
1250 GEN_VEXT_CMP_VV(vmsne_vv_d, uint64_t, H8, DO_MSNE)
1252 GEN_VEXT_CMP_VV(vmsltu_vv_b, uint8_t, H1, DO_MSLT)
1253 GEN_VEXT_CMP_VV(vmsltu_vv_h, uint16_t, H2, DO_MSLT)
1254 GEN_VEXT_CMP_VV(vmsltu_vv_w, uint32_t, H4, DO_MSLT)
1255 GEN_VEXT_CMP_VV(vmsltu_vv_d, uint64_t, H8, DO_MSLT)
1257 GEN_VEXT_CMP_VV(vmslt_vv_b, int8_t, H1, DO_MSLT)
1258 GEN_VEXT_CMP_VV(vmslt_vv_h, int16_t, H2, DO_MSLT)
1259 GEN_VEXT_CMP_VV(vmslt_vv_w, int32_t, H4, DO_MSLT)
1260 GEN_VEXT_CMP_VV(vmslt_vv_d, int64_t, H8, DO_MSLT)
1262 GEN_VEXT_CMP_VV(vmsleu_vv_b, uint8_t, H1, DO_MSLE)
1263 GEN_VEXT_CMP_VV(vmsleu_vv_h, uint16_t, H2, DO_MSLE)
1264 GEN_VEXT_CMP_VV(vmsleu_vv_w, uint32_t, H4, DO_MSLE)
1265 GEN_VEXT_CMP_VV(vmsleu_vv_d, uint64_t, H8, DO_MSLE)
1267 GEN_VEXT_CMP_VV(vmsle_vv_b, int8_t, H1, DO_MSLE)
1268 GEN_VEXT_CMP_VV(vmsle_vv_h, int16_t, H2, DO_MSLE)
1269 GEN_VEXT_CMP_VV(vmsle_vv_w, int32_t, H4, DO_MSLE)
1270 GEN_VEXT_CMP_VV(vmsle_vv_d, int64_t, H8, DO_MSLE)
1272 #define GEN_VEXT_CMP_VX(NAME, ETYPE, H, DO_OP) \
1273 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
1274 CPURISCVState *env, uint32_t desc) \
1276 uint32_t vm = vext_vm(desc); \
1277 uint32_t vl = env->vl; \
1278 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
1279 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
1280 uint32_t vma = vext_vma(desc); \
1281 uint32_t i; \
1283 VSTART_CHECK_EARLY_EXIT(env); \
1285 for (i = env->vstart; i < vl; i++) { \
1286 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1287 if (!vm && !vext_elem_mask(v0, i)) { \
1288 /* set masked-off elements to 1s */ \
1289 if (vma) { \
1290 vext_set_elem_mask(vd, i, 1); \
1292 continue; \
1294 vext_set_elem_mask(vd, i, \
1295 DO_OP(s2, (ETYPE)(target_long)s1)); \
1297 env->vstart = 0; \
1299 * mask destination register are always tail-agnostic
1300 * set tail elements to 1s
1301 */ \
1302 if (vta_all_1s) { \
1303 for (; i < total_elems; i++) { \
1304 vext_set_elem_mask(vd, i, 1); \
1309 GEN_VEXT_CMP_VX(vmseq_vx_b, uint8_t, H1, DO_MSEQ)
1310 GEN_VEXT_CMP_VX(vmseq_vx_h, uint16_t, H2, DO_MSEQ)
1311 GEN_VEXT_CMP_VX(vmseq_vx_w, uint32_t, H4, DO_MSEQ)
1312 GEN_VEXT_CMP_VX(vmseq_vx_d, uint64_t, H8, DO_MSEQ)
1314 GEN_VEXT_CMP_VX(vmsne_vx_b, uint8_t, H1, DO_MSNE)
1315 GEN_VEXT_CMP_VX(vmsne_vx_h, uint16_t, H2, DO_MSNE)
1316 GEN_VEXT_CMP_VX(vmsne_vx_w, uint32_t, H4, DO_MSNE)
1317 GEN_VEXT_CMP_VX(vmsne_vx_d, uint64_t, H8, DO_MSNE)
1319 GEN_VEXT_CMP_VX(vmsltu_vx_b, uint8_t, H1, DO_MSLT)
1320 GEN_VEXT_CMP_VX(vmsltu_vx_h, uint16_t, H2, DO_MSLT)
1321 GEN_VEXT_CMP_VX(vmsltu_vx_w, uint32_t, H4, DO_MSLT)
1322 GEN_VEXT_CMP_VX(vmsltu_vx_d, uint64_t, H8, DO_MSLT)
1324 GEN_VEXT_CMP_VX(vmslt_vx_b, int8_t, H1, DO_MSLT)
1325 GEN_VEXT_CMP_VX(vmslt_vx_h, int16_t, H2, DO_MSLT)
1326 GEN_VEXT_CMP_VX(vmslt_vx_w, int32_t, H4, DO_MSLT)
1327 GEN_VEXT_CMP_VX(vmslt_vx_d, int64_t, H8, DO_MSLT)
1329 GEN_VEXT_CMP_VX(vmsleu_vx_b, uint8_t, H1, DO_MSLE)
1330 GEN_VEXT_CMP_VX(vmsleu_vx_h, uint16_t, H2, DO_MSLE)
1331 GEN_VEXT_CMP_VX(vmsleu_vx_w, uint32_t, H4, DO_MSLE)
1332 GEN_VEXT_CMP_VX(vmsleu_vx_d, uint64_t, H8, DO_MSLE)
1334 GEN_VEXT_CMP_VX(vmsle_vx_b, int8_t, H1, DO_MSLE)
1335 GEN_VEXT_CMP_VX(vmsle_vx_h, int16_t, H2, DO_MSLE)
1336 GEN_VEXT_CMP_VX(vmsle_vx_w, int32_t, H4, DO_MSLE)
1337 GEN_VEXT_CMP_VX(vmsle_vx_d, int64_t, H8, DO_MSLE)
1339 GEN_VEXT_CMP_VX(vmsgtu_vx_b, uint8_t, H1, DO_MSGT)
1340 GEN_VEXT_CMP_VX(vmsgtu_vx_h, uint16_t, H2, DO_MSGT)
1341 GEN_VEXT_CMP_VX(vmsgtu_vx_w, uint32_t, H4, DO_MSGT)
1342 GEN_VEXT_CMP_VX(vmsgtu_vx_d, uint64_t, H8, DO_MSGT)
1344 GEN_VEXT_CMP_VX(vmsgt_vx_b, int8_t, H1, DO_MSGT)
1345 GEN_VEXT_CMP_VX(vmsgt_vx_h, int16_t, H2, DO_MSGT)
1346 GEN_VEXT_CMP_VX(vmsgt_vx_w, int32_t, H4, DO_MSGT)
1347 GEN_VEXT_CMP_VX(vmsgt_vx_d, int64_t, H8, DO_MSGT)
1349 /* Vector Integer Min/Max Instructions */
1350 RVVCALL(OPIVV2, vminu_vv_b, OP_UUU_B, H1, H1, H1, DO_MIN)
1351 RVVCALL(OPIVV2, vminu_vv_h, OP_UUU_H, H2, H2, H2, DO_MIN)
1352 RVVCALL(OPIVV2, vminu_vv_w, OP_UUU_W, H4, H4, H4, DO_MIN)
1353 RVVCALL(OPIVV2, vminu_vv_d, OP_UUU_D, H8, H8, H8, DO_MIN)
1354 RVVCALL(OPIVV2, vmin_vv_b, OP_SSS_B, H1, H1, H1, DO_MIN)
1355 RVVCALL(OPIVV2, vmin_vv_h, OP_SSS_H, H2, H2, H2, DO_MIN)
1356 RVVCALL(OPIVV2, vmin_vv_w, OP_SSS_W, H4, H4, H4, DO_MIN)
1357 RVVCALL(OPIVV2, vmin_vv_d, OP_SSS_D, H8, H8, H8, DO_MIN)
1358 RVVCALL(OPIVV2, vmaxu_vv_b, OP_UUU_B, H1, H1, H1, DO_MAX)
1359 RVVCALL(OPIVV2, vmaxu_vv_h, OP_UUU_H, H2, H2, H2, DO_MAX)
1360 RVVCALL(OPIVV2, vmaxu_vv_w, OP_UUU_W, H4, H4, H4, DO_MAX)
1361 RVVCALL(OPIVV2, vmaxu_vv_d, OP_UUU_D, H8, H8, H8, DO_MAX)
1362 RVVCALL(OPIVV2, vmax_vv_b, OP_SSS_B, H1, H1, H1, DO_MAX)
1363 RVVCALL(OPIVV2, vmax_vv_h, OP_SSS_H, H2, H2, H2, DO_MAX)
1364 RVVCALL(OPIVV2, vmax_vv_w, OP_SSS_W, H4, H4, H4, DO_MAX)
1365 RVVCALL(OPIVV2, vmax_vv_d, OP_SSS_D, H8, H8, H8, DO_MAX)
1366 GEN_VEXT_VV(vminu_vv_b, 1)
1367 GEN_VEXT_VV(vminu_vv_h, 2)
1368 GEN_VEXT_VV(vminu_vv_w, 4)
1369 GEN_VEXT_VV(vminu_vv_d, 8)
1370 GEN_VEXT_VV(vmin_vv_b, 1)
1371 GEN_VEXT_VV(vmin_vv_h, 2)
1372 GEN_VEXT_VV(vmin_vv_w, 4)
1373 GEN_VEXT_VV(vmin_vv_d, 8)
1374 GEN_VEXT_VV(vmaxu_vv_b, 1)
1375 GEN_VEXT_VV(vmaxu_vv_h, 2)
1376 GEN_VEXT_VV(vmaxu_vv_w, 4)
1377 GEN_VEXT_VV(vmaxu_vv_d, 8)
1378 GEN_VEXT_VV(vmax_vv_b, 1)
1379 GEN_VEXT_VV(vmax_vv_h, 2)
1380 GEN_VEXT_VV(vmax_vv_w, 4)
1381 GEN_VEXT_VV(vmax_vv_d, 8)
1383 RVVCALL(OPIVX2, vminu_vx_b, OP_UUU_B, H1, H1, DO_MIN)
1384 RVVCALL(OPIVX2, vminu_vx_h, OP_UUU_H, H2, H2, DO_MIN)
1385 RVVCALL(OPIVX2, vminu_vx_w, OP_UUU_W, H4, H4, DO_MIN)
1386 RVVCALL(OPIVX2, vminu_vx_d, OP_UUU_D, H8, H8, DO_MIN)
1387 RVVCALL(OPIVX2, vmin_vx_b, OP_SSS_B, H1, H1, DO_MIN)
1388 RVVCALL(OPIVX2, vmin_vx_h, OP_SSS_H, H2, H2, DO_MIN)
1389 RVVCALL(OPIVX2, vmin_vx_w, OP_SSS_W, H4, H4, DO_MIN)
1390 RVVCALL(OPIVX2, vmin_vx_d, OP_SSS_D, H8, H8, DO_MIN)
1391 RVVCALL(OPIVX2, vmaxu_vx_b, OP_UUU_B, H1, H1, DO_MAX)
1392 RVVCALL(OPIVX2, vmaxu_vx_h, OP_UUU_H, H2, H2, DO_MAX)
1393 RVVCALL(OPIVX2, vmaxu_vx_w, OP_UUU_W, H4, H4, DO_MAX)
1394 RVVCALL(OPIVX2, vmaxu_vx_d, OP_UUU_D, H8, H8, DO_MAX)
1395 RVVCALL(OPIVX2, vmax_vx_b, OP_SSS_B, H1, H1, DO_MAX)
1396 RVVCALL(OPIVX2, vmax_vx_h, OP_SSS_H, H2, H2, DO_MAX)
1397 RVVCALL(OPIVX2, vmax_vx_w, OP_SSS_W, H4, H4, DO_MAX)
1398 RVVCALL(OPIVX2, vmax_vx_d, OP_SSS_D, H8, H8, DO_MAX)
1399 GEN_VEXT_VX(vminu_vx_b, 1)
1400 GEN_VEXT_VX(vminu_vx_h, 2)
1401 GEN_VEXT_VX(vminu_vx_w, 4)
1402 GEN_VEXT_VX(vminu_vx_d, 8)
1403 GEN_VEXT_VX(vmin_vx_b, 1)
1404 GEN_VEXT_VX(vmin_vx_h, 2)
1405 GEN_VEXT_VX(vmin_vx_w, 4)
1406 GEN_VEXT_VX(vmin_vx_d, 8)
1407 GEN_VEXT_VX(vmaxu_vx_b, 1)
1408 GEN_VEXT_VX(vmaxu_vx_h, 2)
1409 GEN_VEXT_VX(vmaxu_vx_w, 4)
1410 GEN_VEXT_VX(vmaxu_vx_d, 8)
1411 GEN_VEXT_VX(vmax_vx_b, 1)
1412 GEN_VEXT_VX(vmax_vx_h, 2)
1413 GEN_VEXT_VX(vmax_vx_w, 4)
1414 GEN_VEXT_VX(vmax_vx_d, 8)
1416 /* Vector Single-Width Integer Multiply Instructions */
1417 #define DO_MUL(N, M) (N * M)
1418 RVVCALL(OPIVV2, vmul_vv_b, OP_SSS_B, H1, H1, H1, DO_MUL)
1419 RVVCALL(OPIVV2, vmul_vv_h, OP_SSS_H, H2, H2, H2, DO_MUL)
1420 RVVCALL(OPIVV2, vmul_vv_w, OP_SSS_W, H4, H4, H4, DO_MUL)
1421 RVVCALL(OPIVV2, vmul_vv_d, OP_SSS_D, H8, H8, H8, DO_MUL)
1422 GEN_VEXT_VV(vmul_vv_b, 1)
1423 GEN_VEXT_VV(vmul_vv_h, 2)
1424 GEN_VEXT_VV(vmul_vv_w, 4)
1425 GEN_VEXT_VV(vmul_vv_d, 8)
1427 static int8_t do_mulh_b(int8_t s2, int8_t s1)
1429 return (int16_t)s2 * (int16_t)s1 >> 8;
1432 static int16_t do_mulh_h(int16_t s2, int16_t s1)
1434 return (int32_t)s2 * (int32_t)s1 >> 16;
1437 static int32_t do_mulh_w(int32_t s2, int32_t s1)
1439 return (int64_t)s2 * (int64_t)s1 >> 32;
1442 static int64_t do_mulh_d(int64_t s2, int64_t s1)
1444 uint64_t hi_64, lo_64;
1446 muls64(&lo_64, &hi_64, s1, s2);
1447 return hi_64;
1450 static uint8_t do_mulhu_b(uint8_t s2, uint8_t s1)
1452 return (uint16_t)s2 * (uint16_t)s1 >> 8;
1455 static uint16_t do_mulhu_h(uint16_t s2, uint16_t s1)
1457 return (uint32_t)s2 * (uint32_t)s1 >> 16;
1460 static uint32_t do_mulhu_w(uint32_t s2, uint32_t s1)
1462 return (uint64_t)s2 * (uint64_t)s1 >> 32;
1465 static uint64_t do_mulhu_d(uint64_t s2, uint64_t s1)
1467 uint64_t hi_64, lo_64;
1469 mulu64(&lo_64, &hi_64, s2, s1);
1470 return hi_64;
1473 static int8_t do_mulhsu_b(int8_t s2, uint8_t s1)
1475 return (int16_t)s2 * (uint16_t)s1 >> 8;
1478 static int16_t do_mulhsu_h(int16_t s2, uint16_t s1)
1480 return (int32_t)s2 * (uint32_t)s1 >> 16;
1483 static int32_t do_mulhsu_w(int32_t s2, uint32_t s1)
1485 return (int64_t)s2 * (uint64_t)s1 >> 32;
1489 * Let A = signed operand,
1490 * B = unsigned operand
1491 * P = mulu64(A, B), unsigned product
1493 * LET X = 2 ** 64 - A, 2's complement of A
1494 * SP = signed product
1495 * THEN
1496 * IF A < 0
1497 * SP = -X * B
1498 * = -(2 ** 64 - A) * B
1499 * = A * B - 2 ** 64 * B
1500 * = P - 2 ** 64 * B
1501 * ELSE
1502 * SP = P
1503 * THEN
1504 * HI_P -= (A < 0 ? B : 0)
1507 static int64_t do_mulhsu_d(int64_t s2, uint64_t s1)
1509 uint64_t hi_64, lo_64;
1511 mulu64(&lo_64, &hi_64, s2, s1);
1513 hi_64 -= s2 < 0 ? s1 : 0;
1514 return hi_64;
1517 RVVCALL(OPIVV2, vmulh_vv_b, OP_SSS_B, H1, H1, H1, do_mulh_b)
1518 RVVCALL(OPIVV2, vmulh_vv_h, OP_SSS_H, H2, H2, H2, do_mulh_h)
1519 RVVCALL(OPIVV2, vmulh_vv_w, OP_SSS_W, H4, H4, H4, do_mulh_w)
1520 RVVCALL(OPIVV2, vmulh_vv_d, OP_SSS_D, H8, H8, H8, do_mulh_d)
1521 RVVCALL(OPIVV2, vmulhu_vv_b, OP_UUU_B, H1, H1, H1, do_mulhu_b)
1522 RVVCALL(OPIVV2, vmulhu_vv_h, OP_UUU_H, H2, H2, H2, do_mulhu_h)
1523 RVVCALL(OPIVV2, vmulhu_vv_w, OP_UUU_W, H4, H4, H4, do_mulhu_w)
1524 RVVCALL(OPIVV2, vmulhu_vv_d, OP_UUU_D, H8, H8, H8, do_mulhu_d)
1525 RVVCALL(OPIVV2, vmulhsu_vv_b, OP_SUS_B, H1, H1, H1, do_mulhsu_b)
1526 RVVCALL(OPIVV2, vmulhsu_vv_h, OP_SUS_H, H2, H2, H2, do_mulhsu_h)
1527 RVVCALL(OPIVV2, vmulhsu_vv_w, OP_SUS_W, H4, H4, H4, do_mulhsu_w)
1528 RVVCALL(OPIVV2, vmulhsu_vv_d, OP_SUS_D, H8, H8, H8, do_mulhsu_d)
1529 GEN_VEXT_VV(vmulh_vv_b, 1)
1530 GEN_VEXT_VV(vmulh_vv_h, 2)
1531 GEN_VEXT_VV(vmulh_vv_w, 4)
1532 GEN_VEXT_VV(vmulh_vv_d, 8)
1533 GEN_VEXT_VV(vmulhu_vv_b, 1)
1534 GEN_VEXT_VV(vmulhu_vv_h, 2)
1535 GEN_VEXT_VV(vmulhu_vv_w, 4)
1536 GEN_VEXT_VV(vmulhu_vv_d, 8)
1537 GEN_VEXT_VV(vmulhsu_vv_b, 1)
1538 GEN_VEXT_VV(vmulhsu_vv_h, 2)
1539 GEN_VEXT_VV(vmulhsu_vv_w, 4)
1540 GEN_VEXT_VV(vmulhsu_vv_d, 8)
1542 RVVCALL(OPIVX2, vmul_vx_b, OP_SSS_B, H1, H1, DO_MUL)
1543 RVVCALL(OPIVX2, vmul_vx_h, OP_SSS_H, H2, H2, DO_MUL)
1544 RVVCALL(OPIVX2, vmul_vx_w, OP_SSS_W, H4, H4, DO_MUL)
1545 RVVCALL(OPIVX2, vmul_vx_d, OP_SSS_D, H8, H8, DO_MUL)
1546 RVVCALL(OPIVX2, vmulh_vx_b, OP_SSS_B, H1, H1, do_mulh_b)
1547 RVVCALL(OPIVX2, vmulh_vx_h, OP_SSS_H, H2, H2, do_mulh_h)
1548 RVVCALL(OPIVX2, vmulh_vx_w, OP_SSS_W, H4, H4, do_mulh_w)
1549 RVVCALL(OPIVX2, vmulh_vx_d, OP_SSS_D, H8, H8, do_mulh_d)
1550 RVVCALL(OPIVX2, vmulhu_vx_b, OP_UUU_B, H1, H1, do_mulhu_b)
1551 RVVCALL(OPIVX2, vmulhu_vx_h, OP_UUU_H, H2, H2, do_mulhu_h)
1552 RVVCALL(OPIVX2, vmulhu_vx_w, OP_UUU_W, H4, H4, do_mulhu_w)
1553 RVVCALL(OPIVX2, vmulhu_vx_d, OP_UUU_D, H8, H8, do_mulhu_d)
1554 RVVCALL(OPIVX2, vmulhsu_vx_b, OP_SUS_B, H1, H1, do_mulhsu_b)
1555 RVVCALL(OPIVX2, vmulhsu_vx_h, OP_SUS_H, H2, H2, do_mulhsu_h)
1556 RVVCALL(OPIVX2, vmulhsu_vx_w, OP_SUS_W, H4, H4, do_mulhsu_w)
1557 RVVCALL(OPIVX2, vmulhsu_vx_d, OP_SUS_D, H8, H8, do_mulhsu_d)
1558 GEN_VEXT_VX(vmul_vx_b, 1)
1559 GEN_VEXT_VX(vmul_vx_h, 2)
1560 GEN_VEXT_VX(vmul_vx_w, 4)
1561 GEN_VEXT_VX(vmul_vx_d, 8)
1562 GEN_VEXT_VX(vmulh_vx_b, 1)
1563 GEN_VEXT_VX(vmulh_vx_h, 2)
1564 GEN_VEXT_VX(vmulh_vx_w, 4)
1565 GEN_VEXT_VX(vmulh_vx_d, 8)
1566 GEN_VEXT_VX(vmulhu_vx_b, 1)
1567 GEN_VEXT_VX(vmulhu_vx_h, 2)
1568 GEN_VEXT_VX(vmulhu_vx_w, 4)
1569 GEN_VEXT_VX(vmulhu_vx_d, 8)
1570 GEN_VEXT_VX(vmulhsu_vx_b, 1)
1571 GEN_VEXT_VX(vmulhsu_vx_h, 2)
1572 GEN_VEXT_VX(vmulhsu_vx_w, 4)
1573 GEN_VEXT_VX(vmulhsu_vx_d, 8)
1575 /* Vector Integer Divide Instructions */
1576 #define DO_DIVU(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : N / M)
1577 #define DO_REMU(N, M) (unlikely(M == 0) ? N : N % M)
1578 #define DO_DIV(N, M) (unlikely(M == 0) ? (__typeof(N))(-1) : \
1579 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? N : N / M)
1580 #define DO_REM(N, M) (unlikely(M == 0) ? N : \
1581 unlikely((N == -N) && (M == (__typeof(N))(-1))) ? 0 : N % M)
1583 RVVCALL(OPIVV2, vdivu_vv_b, OP_UUU_B, H1, H1, H1, DO_DIVU)
1584 RVVCALL(OPIVV2, vdivu_vv_h, OP_UUU_H, H2, H2, H2, DO_DIVU)
1585 RVVCALL(OPIVV2, vdivu_vv_w, OP_UUU_W, H4, H4, H4, DO_DIVU)
1586 RVVCALL(OPIVV2, vdivu_vv_d, OP_UUU_D, H8, H8, H8, DO_DIVU)
1587 RVVCALL(OPIVV2, vdiv_vv_b, OP_SSS_B, H1, H1, H1, DO_DIV)
1588 RVVCALL(OPIVV2, vdiv_vv_h, OP_SSS_H, H2, H2, H2, DO_DIV)
1589 RVVCALL(OPIVV2, vdiv_vv_w, OP_SSS_W, H4, H4, H4, DO_DIV)
1590 RVVCALL(OPIVV2, vdiv_vv_d, OP_SSS_D, H8, H8, H8, DO_DIV)
1591 RVVCALL(OPIVV2, vremu_vv_b, OP_UUU_B, H1, H1, H1, DO_REMU)
1592 RVVCALL(OPIVV2, vremu_vv_h, OP_UUU_H, H2, H2, H2, DO_REMU)
1593 RVVCALL(OPIVV2, vremu_vv_w, OP_UUU_W, H4, H4, H4, DO_REMU)
1594 RVVCALL(OPIVV2, vremu_vv_d, OP_UUU_D, H8, H8, H8, DO_REMU)
1595 RVVCALL(OPIVV2, vrem_vv_b, OP_SSS_B, H1, H1, H1, DO_REM)
1596 RVVCALL(OPIVV2, vrem_vv_h, OP_SSS_H, H2, H2, H2, DO_REM)
1597 RVVCALL(OPIVV2, vrem_vv_w, OP_SSS_W, H4, H4, H4, DO_REM)
1598 RVVCALL(OPIVV2, vrem_vv_d, OP_SSS_D, H8, H8, H8, DO_REM)
1599 GEN_VEXT_VV(vdivu_vv_b, 1)
1600 GEN_VEXT_VV(vdivu_vv_h, 2)
1601 GEN_VEXT_VV(vdivu_vv_w, 4)
1602 GEN_VEXT_VV(vdivu_vv_d, 8)
1603 GEN_VEXT_VV(vdiv_vv_b, 1)
1604 GEN_VEXT_VV(vdiv_vv_h, 2)
1605 GEN_VEXT_VV(vdiv_vv_w, 4)
1606 GEN_VEXT_VV(vdiv_vv_d, 8)
1607 GEN_VEXT_VV(vremu_vv_b, 1)
1608 GEN_VEXT_VV(vremu_vv_h, 2)
1609 GEN_VEXT_VV(vremu_vv_w, 4)
1610 GEN_VEXT_VV(vremu_vv_d, 8)
1611 GEN_VEXT_VV(vrem_vv_b, 1)
1612 GEN_VEXT_VV(vrem_vv_h, 2)
1613 GEN_VEXT_VV(vrem_vv_w, 4)
1614 GEN_VEXT_VV(vrem_vv_d, 8)
1616 RVVCALL(OPIVX2, vdivu_vx_b, OP_UUU_B, H1, H1, DO_DIVU)
1617 RVVCALL(OPIVX2, vdivu_vx_h, OP_UUU_H, H2, H2, DO_DIVU)
1618 RVVCALL(OPIVX2, vdivu_vx_w, OP_UUU_W, H4, H4, DO_DIVU)
1619 RVVCALL(OPIVX2, vdivu_vx_d, OP_UUU_D, H8, H8, DO_DIVU)
1620 RVVCALL(OPIVX2, vdiv_vx_b, OP_SSS_B, H1, H1, DO_DIV)
1621 RVVCALL(OPIVX2, vdiv_vx_h, OP_SSS_H, H2, H2, DO_DIV)
1622 RVVCALL(OPIVX2, vdiv_vx_w, OP_SSS_W, H4, H4, DO_DIV)
1623 RVVCALL(OPIVX2, vdiv_vx_d, OP_SSS_D, H8, H8, DO_DIV)
1624 RVVCALL(OPIVX2, vremu_vx_b, OP_UUU_B, H1, H1, DO_REMU)
1625 RVVCALL(OPIVX2, vremu_vx_h, OP_UUU_H, H2, H2, DO_REMU)
1626 RVVCALL(OPIVX2, vremu_vx_w, OP_UUU_W, H4, H4, DO_REMU)
1627 RVVCALL(OPIVX2, vremu_vx_d, OP_UUU_D, H8, H8, DO_REMU)
1628 RVVCALL(OPIVX2, vrem_vx_b, OP_SSS_B, H1, H1, DO_REM)
1629 RVVCALL(OPIVX2, vrem_vx_h, OP_SSS_H, H2, H2, DO_REM)
1630 RVVCALL(OPIVX2, vrem_vx_w, OP_SSS_W, H4, H4, DO_REM)
1631 RVVCALL(OPIVX2, vrem_vx_d, OP_SSS_D, H8, H8, DO_REM)
1632 GEN_VEXT_VX(vdivu_vx_b, 1)
1633 GEN_VEXT_VX(vdivu_vx_h, 2)
1634 GEN_VEXT_VX(vdivu_vx_w, 4)
1635 GEN_VEXT_VX(vdivu_vx_d, 8)
1636 GEN_VEXT_VX(vdiv_vx_b, 1)
1637 GEN_VEXT_VX(vdiv_vx_h, 2)
1638 GEN_VEXT_VX(vdiv_vx_w, 4)
1639 GEN_VEXT_VX(vdiv_vx_d, 8)
1640 GEN_VEXT_VX(vremu_vx_b, 1)
1641 GEN_VEXT_VX(vremu_vx_h, 2)
1642 GEN_VEXT_VX(vremu_vx_w, 4)
1643 GEN_VEXT_VX(vremu_vx_d, 8)
1644 GEN_VEXT_VX(vrem_vx_b, 1)
1645 GEN_VEXT_VX(vrem_vx_h, 2)
1646 GEN_VEXT_VX(vrem_vx_w, 4)
1647 GEN_VEXT_VX(vrem_vx_d, 8)
1649 /* Vector Widening Integer Multiply Instructions */
1650 RVVCALL(OPIVV2, vwmul_vv_b, WOP_SSS_B, H2, H1, H1, DO_MUL)
1651 RVVCALL(OPIVV2, vwmul_vv_h, WOP_SSS_H, H4, H2, H2, DO_MUL)
1652 RVVCALL(OPIVV2, vwmul_vv_w, WOP_SSS_W, H8, H4, H4, DO_MUL)
1653 RVVCALL(OPIVV2, vwmulu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MUL)
1654 RVVCALL(OPIVV2, vwmulu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MUL)
1655 RVVCALL(OPIVV2, vwmulu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MUL)
1656 RVVCALL(OPIVV2, vwmulsu_vv_b, WOP_SUS_B, H2, H1, H1, DO_MUL)
1657 RVVCALL(OPIVV2, vwmulsu_vv_h, WOP_SUS_H, H4, H2, H2, DO_MUL)
1658 RVVCALL(OPIVV2, vwmulsu_vv_w, WOP_SUS_W, H8, H4, H4, DO_MUL)
1659 GEN_VEXT_VV(vwmul_vv_b, 2)
1660 GEN_VEXT_VV(vwmul_vv_h, 4)
1661 GEN_VEXT_VV(vwmul_vv_w, 8)
1662 GEN_VEXT_VV(vwmulu_vv_b, 2)
1663 GEN_VEXT_VV(vwmulu_vv_h, 4)
1664 GEN_VEXT_VV(vwmulu_vv_w, 8)
1665 GEN_VEXT_VV(vwmulsu_vv_b, 2)
1666 GEN_VEXT_VV(vwmulsu_vv_h, 4)
1667 GEN_VEXT_VV(vwmulsu_vv_w, 8)
1669 RVVCALL(OPIVX2, vwmul_vx_b, WOP_SSS_B, H2, H1, DO_MUL)
1670 RVVCALL(OPIVX2, vwmul_vx_h, WOP_SSS_H, H4, H2, DO_MUL)
1671 RVVCALL(OPIVX2, vwmul_vx_w, WOP_SSS_W, H8, H4, DO_MUL)
1672 RVVCALL(OPIVX2, vwmulu_vx_b, WOP_UUU_B, H2, H1, DO_MUL)
1673 RVVCALL(OPIVX2, vwmulu_vx_h, WOP_UUU_H, H4, H2, DO_MUL)
1674 RVVCALL(OPIVX2, vwmulu_vx_w, WOP_UUU_W, H8, H4, DO_MUL)
1675 RVVCALL(OPIVX2, vwmulsu_vx_b, WOP_SUS_B, H2, H1, DO_MUL)
1676 RVVCALL(OPIVX2, vwmulsu_vx_h, WOP_SUS_H, H4, H2, DO_MUL)
1677 RVVCALL(OPIVX2, vwmulsu_vx_w, WOP_SUS_W, H8, H4, DO_MUL)
1678 GEN_VEXT_VX(vwmul_vx_b, 2)
1679 GEN_VEXT_VX(vwmul_vx_h, 4)
1680 GEN_VEXT_VX(vwmul_vx_w, 8)
1681 GEN_VEXT_VX(vwmulu_vx_b, 2)
1682 GEN_VEXT_VX(vwmulu_vx_h, 4)
1683 GEN_VEXT_VX(vwmulu_vx_w, 8)
1684 GEN_VEXT_VX(vwmulsu_vx_b, 2)
1685 GEN_VEXT_VX(vwmulsu_vx_h, 4)
1686 GEN_VEXT_VX(vwmulsu_vx_w, 8)
1688 /* Vector Single-Width Integer Multiply-Add Instructions */
1689 #define OPIVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1690 static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \
1692 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1693 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1694 TD d = *((TD *)vd + HD(i)); \
1695 *((TD *)vd + HD(i)) = OP(s2, s1, d); \
1698 #define DO_MACC(N, M, D) (M * N + D)
1699 #define DO_NMSAC(N, M, D) (-(M * N) + D)
1700 #define DO_MADD(N, M, D) (M * D + N)
1701 #define DO_NMSUB(N, M, D) (-(M * D) + N)
1702 RVVCALL(OPIVV3, vmacc_vv_b, OP_SSS_B, H1, H1, H1, DO_MACC)
1703 RVVCALL(OPIVV3, vmacc_vv_h, OP_SSS_H, H2, H2, H2, DO_MACC)
1704 RVVCALL(OPIVV3, vmacc_vv_w, OP_SSS_W, H4, H4, H4, DO_MACC)
1705 RVVCALL(OPIVV3, vmacc_vv_d, OP_SSS_D, H8, H8, H8, DO_MACC)
1706 RVVCALL(OPIVV3, vnmsac_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSAC)
1707 RVVCALL(OPIVV3, vnmsac_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSAC)
1708 RVVCALL(OPIVV3, vnmsac_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSAC)
1709 RVVCALL(OPIVV3, vnmsac_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSAC)
1710 RVVCALL(OPIVV3, vmadd_vv_b, OP_SSS_B, H1, H1, H1, DO_MADD)
1711 RVVCALL(OPIVV3, vmadd_vv_h, OP_SSS_H, H2, H2, H2, DO_MADD)
1712 RVVCALL(OPIVV3, vmadd_vv_w, OP_SSS_W, H4, H4, H4, DO_MADD)
1713 RVVCALL(OPIVV3, vmadd_vv_d, OP_SSS_D, H8, H8, H8, DO_MADD)
1714 RVVCALL(OPIVV3, vnmsub_vv_b, OP_SSS_B, H1, H1, H1, DO_NMSUB)
1715 RVVCALL(OPIVV3, vnmsub_vv_h, OP_SSS_H, H2, H2, H2, DO_NMSUB)
1716 RVVCALL(OPIVV3, vnmsub_vv_w, OP_SSS_W, H4, H4, H4, DO_NMSUB)
1717 RVVCALL(OPIVV3, vnmsub_vv_d, OP_SSS_D, H8, H8, H8, DO_NMSUB)
1718 GEN_VEXT_VV(vmacc_vv_b, 1)
1719 GEN_VEXT_VV(vmacc_vv_h, 2)
1720 GEN_VEXT_VV(vmacc_vv_w, 4)
1721 GEN_VEXT_VV(vmacc_vv_d, 8)
1722 GEN_VEXT_VV(vnmsac_vv_b, 1)
1723 GEN_VEXT_VV(vnmsac_vv_h, 2)
1724 GEN_VEXT_VV(vnmsac_vv_w, 4)
1725 GEN_VEXT_VV(vnmsac_vv_d, 8)
1726 GEN_VEXT_VV(vmadd_vv_b, 1)
1727 GEN_VEXT_VV(vmadd_vv_h, 2)
1728 GEN_VEXT_VV(vmadd_vv_w, 4)
1729 GEN_VEXT_VV(vmadd_vv_d, 8)
1730 GEN_VEXT_VV(vnmsub_vv_b, 1)
1731 GEN_VEXT_VV(vnmsub_vv_h, 2)
1732 GEN_VEXT_VV(vnmsub_vv_w, 4)
1733 GEN_VEXT_VV(vnmsub_vv_d, 8)
1735 #define OPIVX3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
1736 static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \
1738 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1739 TD d = *((TD *)vd + HD(i)); \
1740 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d); \
1743 RVVCALL(OPIVX3, vmacc_vx_b, OP_SSS_B, H1, H1, DO_MACC)
1744 RVVCALL(OPIVX3, vmacc_vx_h, OP_SSS_H, H2, H2, DO_MACC)
1745 RVVCALL(OPIVX3, vmacc_vx_w, OP_SSS_W, H4, H4, DO_MACC)
1746 RVVCALL(OPIVX3, vmacc_vx_d, OP_SSS_D, H8, H8, DO_MACC)
1747 RVVCALL(OPIVX3, vnmsac_vx_b, OP_SSS_B, H1, H1, DO_NMSAC)
1748 RVVCALL(OPIVX3, vnmsac_vx_h, OP_SSS_H, H2, H2, DO_NMSAC)
1749 RVVCALL(OPIVX3, vnmsac_vx_w, OP_SSS_W, H4, H4, DO_NMSAC)
1750 RVVCALL(OPIVX3, vnmsac_vx_d, OP_SSS_D, H8, H8, DO_NMSAC)
1751 RVVCALL(OPIVX3, vmadd_vx_b, OP_SSS_B, H1, H1, DO_MADD)
1752 RVVCALL(OPIVX3, vmadd_vx_h, OP_SSS_H, H2, H2, DO_MADD)
1753 RVVCALL(OPIVX3, vmadd_vx_w, OP_SSS_W, H4, H4, DO_MADD)
1754 RVVCALL(OPIVX3, vmadd_vx_d, OP_SSS_D, H8, H8, DO_MADD)
1755 RVVCALL(OPIVX3, vnmsub_vx_b, OP_SSS_B, H1, H1, DO_NMSUB)
1756 RVVCALL(OPIVX3, vnmsub_vx_h, OP_SSS_H, H2, H2, DO_NMSUB)
1757 RVVCALL(OPIVX3, vnmsub_vx_w, OP_SSS_W, H4, H4, DO_NMSUB)
1758 RVVCALL(OPIVX3, vnmsub_vx_d, OP_SSS_D, H8, H8, DO_NMSUB)
1759 GEN_VEXT_VX(vmacc_vx_b, 1)
1760 GEN_VEXT_VX(vmacc_vx_h, 2)
1761 GEN_VEXT_VX(vmacc_vx_w, 4)
1762 GEN_VEXT_VX(vmacc_vx_d, 8)
1763 GEN_VEXT_VX(vnmsac_vx_b, 1)
1764 GEN_VEXT_VX(vnmsac_vx_h, 2)
1765 GEN_VEXT_VX(vnmsac_vx_w, 4)
1766 GEN_VEXT_VX(vnmsac_vx_d, 8)
1767 GEN_VEXT_VX(vmadd_vx_b, 1)
1768 GEN_VEXT_VX(vmadd_vx_h, 2)
1769 GEN_VEXT_VX(vmadd_vx_w, 4)
1770 GEN_VEXT_VX(vmadd_vx_d, 8)
1771 GEN_VEXT_VX(vnmsub_vx_b, 1)
1772 GEN_VEXT_VX(vnmsub_vx_h, 2)
1773 GEN_VEXT_VX(vnmsub_vx_w, 4)
1774 GEN_VEXT_VX(vnmsub_vx_d, 8)
1776 /* Vector Widening Integer Multiply-Add Instructions */
1777 RVVCALL(OPIVV3, vwmaccu_vv_b, WOP_UUU_B, H2, H1, H1, DO_MACC)
1778 RVVCALL(OPIVV3, vwmaccu_vv_h, WOP_UUU_H, H4, H2, H2, DO_MACC)
1779 RVVCALL(OPIVV3, vwmaccu_vv_w, WOP_UUU_W, H8, H4, H4, DO_MACC)
1780 RVVCALL(OPIVV3, vwmacc_vv_b, WOP_SSS_B, H2, H1, H1, DO_MACC)
1781 RVVCALL(OPIVV3, vwmacc_vv_h, WOP_SSS_H, H4, H2, H2, DO_MACC)
1782 RVVCALL(OPIVV3, vwmacc_vv_w, WOP_SSS_W, H8, H4, H4, DO_MACC)
1783 RVVCALL(OPIVV3, vwmaccsu_vv_b, WOP_SSU_B, H2, H1, H1, DO_MACC)
1784 RVVCALL(OPIVV3, vwmaccsu_vv_h, WOP_SSU_H, H4, H2, H2, DO_MACC)
1785 RVVCALL(OPIVV3, vwmaccsu_vv_w, WOP_SSU_W, H8, H4, H4, DO_MACC)
1786 GEN_VEXT_VV(vwmaccu_vv_b, 2)
1787 GEN_VEXT_VV(vwmaccu_vv_h, 4)
1788 GEN_VEXT_VV(vwmaccu_vv_w, 8)
1789 GEN_VEXT_VV(vwmacc_vv_b, 2)
1790 GEN_VEXT_VV(vwmacc_vv_h, 4)
1791 GEN_VEXT_VV(vwmacc_vv_w, 8)
1792 GEN_VEXT_VV(vwmaccsu_vv_b, 2)
1793 GEN_VEXT_VV(vwmaccsu_vv_h, 4)
1794 GEN_VEXT_VV(vwmaccsu_vv_w, 8)
1796 RVVCALL(OPIVX3, vwmaccu_vx_b, WOP_UUU_B, H2, H1, DO_MACC)
1797 RVVCALL(OPIVX3, vwmaccu_vx_h, WOP_UUU_H, H4, H2, DO_MACC)
1798 RVVCALL(OPIVX3, vwmaccu_vx_w, WOP_UUU_W, H8, H4, DO_MACC)
1799 RVVCALL(OPIVX3, vwmacc_vx_b, WOP_SSS_B, H2, H1, DO_MACC)
1800 RVVCALL(OPIVX3, vwmacc_vx_h, WOP_SSS_H, H4, H2, DO_MACC)
1801 RVVCALL(OPIVX3, vwmacc_vx_w, WOP_SSS_W, H8, H4, DO_MACC)
1802 RVVCALL(OPIVX3, vwmaccsu_vx_b, WOP_SSU_B, H2, H1, DO_MACC)
1803 RVVCALL(OPIVX3, vwmaccsu_vx_h, WOP_SSU_H, H4, H2, DO_MACC)
1804 RVVCALL(OPIVX3, vwmaccsu_vx_w, WOP_SSU_W, H8, H4, DO_MACC)
1805 RVVCALL(OPIVX3, vwmaccus_vx_b, WOP_SUS_B, H2, H1, DO_MACC)
1806 RVVCALL(OPIVX3, vwmaccus_vx_h, WOP_SUS_H, H4, H2, DO_MACC)
1807 RVVCALL(OPIVX3, vwmaccus_vx_w, WOP_SUS_W, H8, H4, DO_MACC)
1808 GEN_VEXT_VX(vwmaccu_vx_b, 2)
1809 GEN_VEXT_VX(vwmaccu_vx_h, 4)
1810 GEN_VEXT_VX(vwmaccu_vx_w, 8)
1811 GEN_VEXT_VX(vwmacc_vx_b, 2)
1812 GEN_VEXT_VX(vwmacc_vx_h, 4)
1813 GEN_VEXT_VX(vwmacc_vx_w, 8)
1814 GEN_VEXT_VX(vwmaccsu_vx_b, 2)
1815 GEN_VEXT_VX(vwmaccsu_vx_h, 4)
1816 GEN_VEXT_VX(vwmaccsu_vx_w, 8)
1817 GEN_VEXT_VX(vwmaccus_vx_b, 2)
1818 GEN_VEXT_VX(vwmaccus_vx_h, 4)
1819 GEN_VEXT_VX(vwmaccus_vx_w, 8)
1821 /* Vector Integer Merge and Move Instructions */
1822 #define GEN_VEXT_VMV_VV(NAME, ETYPE, H) \
1823 void HELPER(NAME)(void *vd, void *vs1, CPURISCVState *env, \
1824 uint32_t desc) \
1826 uint32_t vl = env->vl; \
1827 uint32_t esz = sizeof(ETYPE); \
1828 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1829 uint32_t vta = vext_vta(desc); \
1830 uint32_t i; \
1832 VSTART_CHECK_EARLY_EXIT(env); \
1834 for (i = env->vstart; i < vl; i++) { \
1835 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
1836 *((ETYPE *)vd + H(i)) = s1; \
1838 env->vstart = 0; \
1839 /* set tail elements to 1s */ \
1840 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1843 GEN_VEXT_VMV_VV(vmv_v_v_b, int8_t, H1)
1844 GEN_VEXT_VMV_VV(vmv_v_v_h, int16_t, H2)
1845 GEN_VEXT_VMV_VV(vmv_v_v_w, int32_t, H4)
1846 GEN_VEXT_VMV_VV(vmv_v_v_d, int64_t, H8)
1848 #define GEN_VEXT_VMV_VX(NAME, ETYPE, H) \
1849 void HELPER(NAME)(void *vd, uint64_t s1, CPURISCVState *env, \
1850 uint32_t desc) \
1852 uint32_t vl = env->vl; \
1853 uint32_t esz = sizeof(ETYPE); \
1854 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1855 uint32_t vta = vext_vta(desc); \
1856 uint32_t i; \
1858 VSTART_CHECK_EARLY_EXIT(env); \
1860 for (i = env->vstart; i < vl; i++) { \
1861 *((ETYPE *)vd + H(i)) = (ETYPE)s1; \
1863 env->vstart = 0; \
1864 /* set tail elements to 1s */ \
1865 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1868 GEN_VEXT_VMV_VX(vmv_v_x_b, int8_t, H1)
1869 GEN_VEXT_VMV_VX(vmv_v_x_h, int16_t, H2)
1870 GEN_VEXT_VMV_VX(vmv_v_x_w, int32_t, H4)
1871 GEN_VEXT_VMV_VX(vmv_v_x_d, int64_t, H8)
1873 #define GEN_VEXT_VMERGE_VV(NAME, ETYPE, H) \
1874 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
1875 CPURISCVState *env, uint32_t desc) \
1877 uint32_t vl = env->vl; \
1878 uint32_t esz = sizeof(ETYPE); \
1879 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1880 uint32_t vta = vext_vta(desc); \
1881 uint32_t i; \
1883 VSTART_CHECK_EARLY_EXIT(env); \
1885 for (i = env->vstart; i < vl; i++) { \
1886 ETYPE *vt = (!vext_elem_mask(v0, i) ? vs2 : vs1); \
1887 *((ETYPE *)vd + H(i)) = *(vt + H(i)); \
1889 env->vstart = 0; \
1890 /* set tail elements to 1s */ \
1891 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1894 GEN_VEXT_VMERGE_VV(vmerge_vvm_b, int8_t, H1)
1895 GEN_VEXT_VMERGE_VV(vmerge_vvm_h, int16_t, H2)
1896 GEN_VEXT_VMERGE_VV(vmerge_vvm_w, int32_t, H4)
1897 GEN_VEXT_VMERGE_VV(vmerge_vvm_d, int64_t, H8)
1899 #define GEN_VEXT_VMERGE_VX(NAME, ETYPE, H) \
1900 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
1901 void *vs2, CPURISCVState *env, uint32_t desc) \
1903 uint32_t vl = env->vl; \
1904 uint32_t esz = sizeof(ETYPE); \
1905 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
1906 uint32_t vta = vext_vta(desc); \
1907 uint32_t i; \
1909 VSTART_CHECK_EARLY_EXIT(env); \
1911 for (i = env->vstart; i < vl; i++) { \
1912 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
1913 ETYPE d = (!vext_elem_mask(v0, i) ? s2 : \
1914 (ETYPE)(target_long)s1); \
1915 *((ETYPE *)vd + H(i)) = d; \
1917 env->vstart = 0; \
1918 /* set tail elements to 1s */ \
1919 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
1922 GEN_VEXT_VMERGE_VX(vmerge_vxm_b, int8_t, H1)
1923 GEN_VEXT_VMERGE_VX(vmerge_vxm_h, int16_t, H2)
1924 GEN_VEXT_VMERGE_VX(vmerge_vxm_w, int32_t, H4)
1925 GEN_VEXT_VMERGE_VX(vmerge_vxm_d, int64_t, H8)
1928 * Vector Fixed-Point Arithmetic Instructions
1931 /* Vector Single-Width Saturating Add and Subtract */
1934 * As fixed point instructions probably have round mode and saturation,
1935 * define common macros for fixed point here.
1937 typedef void opivv2_rm_fn(void *vd, void *vs1, void *vs2, int i,
1938 CPURISCVState *env, int vxrm);
1940 #define OPIVV2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
1941 static inline void \
1942 do_##NAME(void *vd, void *vs1, void *vs2, int i, \
1943 CPURISCVState *env, int vxrm) \
1945 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
1946 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
1947 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, s1); \
1950 static inline void
1951 vext_vv_rm_1(void *vd, void *v0, void *vs1, void *vs2,
1952 CPURISCVState *env,
1953 uint32_t vl, uint32_t vm, int vxrm,
1954 opivv2_rm_fn *fn, uint32_t vma, uint32_t esz)
1956 VSTART_CHECK_EARLY_EXIT(env);
1958 for (uint32_t i = env->vstart; i < vl; i++) {
1959 if (!vm && !vext_elem_mask(v0, i)) {
1960 /* set masked-off elements to 1s */
1961 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
1962 continue;
1964 fn(vd, vs1, vs2, i, env, vxrm);
1966 env->vstart = 0;
1969 static inline void
1970 vext_vv_rm_2(void *vd, void *v0, void *vs1, void *vs2,
1971 CPURISCVState *env,
1972 uint32_t desc,
1973 opivv2_rm_fn *fn, uint32_t esz)
1975 uint32_t vm = vext_vm(desc);
1976 uint32_t vl = env->vl;
1977 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
1978 uint32_t vta = vext_vta(desc);
1979 uint32_t vma = vext_vma(desc);
1981 switch (env->vxrm) {
1982 case 0: /* rnu */
1983 vext_vv_rm_1(vd, v0, vs1, vs2,
1984 env, vl, vm, 0, fn, vma, esz);
1985 break;
1986 case 1: /* rne */
1987 vext_vv_rm_1(vd, v0, vs1, vs2,
1988 env, vl, vm, 1, fn, vma, esz);
1989 break;
1990 case 2: /* rdn */
1991 vext_vv_rm_1(vd, v0, vs1, vs2,
1992 env, vl, vm, 2, fn, vma, esz);
1993 break;
1994 default: /* rod */
1995 vext_vv_rm_1(vd, v0, vs1, vs2,
1996 env, vl, vm, 3, fn, vma, esz);
1997 break;
1999 /* set tail elements to 1s */
2000 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2003 /* generate helpers for fixed point instructions with OPIVV format */
2004 #define GEN_VEXT_VV_RM(NAME, ESZ) \
2005 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
2006 CPURISCVState *env, uint32_t desc) \
2008 vext_vv_rm_2(vd, v0, vs1, vs2, env, desc, \
2009 do_##NAME, ESZ); \
2012 static inline uint8_t saddu8(CPURISCVState *env, int vxrm, uint8_t a,
2013 uint8_t b)
2015 uint8_t res = a + b;
2016 if (res < a) {
2017 res = UINT8_MAX;
2018 env->vxsat = 0x1;
2020 return res;
2023 static inline uint16_t saddu16(CPURISCVState *env, int vxrm, uint16_t a,
2024 uint16_t b)
2026 uint16_t res = a + b;
2027 if (res < a) {
2028 res = UINT16_MAX;
2029 env->vxsat = 0x1;
2031 return res;
2034 static inline uint32_t saddu32(CPURISCVState *env, int vxrm, uint32_t a,
2035 uint32_t b)
2037 uint32_t res = a + b;
2038 if (res < a) {
2039 res = UINT32_MAX;
2040 env->vxsat = 0x1;
2042 return res;
2045 static inline uint64_t saddu64(CPURISCVState *env, int vxrm, uint64_t a,
2046 uint64_t b)
2048 uint64_t res = a + b;
2049 if (res < a) {
2050 res = UINT64_MAX;
2051 env->vxsat = 0x1;
2053 return res;
2056 RVVCALL(OPIVV2_RM, vsaddu_vv_b, OP_UUU_B, H1, H1, H1, saddu8)
2057 RVVCALL(OPIVV2_RM, vsaddu_vv_h, OP_UUU_H, H2, H2, H2, saddu16)
2058 RVVCALL(OPIVV2_RM, vsaddu_vv_w, OP_UUU_W, H4, H4, H4, saddu32)
2059 RVVCALL(OPIVV2_RM, vsaddu_vv_d, OP_UUU_D, H8, H8, H8, saddu64)
2060 GEN_VEXT_VV_RM(vsaddu_vv_b, 1)
2061 GEN_VEXT_VV_RM(vsaddu_vv_h, 2)
2062 GEN_VEXT_VV_RM(vsaddu_vv_w, 4)
2063 GEN_VEXT_VV_RM(vsaddu_vv_d, 8)
2065 typedef void opivx2_rm_fn(void *vd, target_long s1, void *vs2, int i,
2066 CPURISCVState *env, int vxrm);
2068 #define OPIVX2_RM(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2069 static inline void \
2070 do_##NAME(void *vd, target_long s1, void *vs2, int i, \
2071 CPURISCVState *env, int vxrm) \
2073 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2074 *((TD *)vd + HD(i)) = OP(env, vxrm, s2, (TX1)(T1)s1); \
2077 static inline void
2078 vext_vx_rm_1(void *vd, void *v0, target_long s1, void *vs2,
2079 CPURISCVState *env,
2080 uint32_t vl, uint32_t vm, int vxrm,
2081 opivx2_rm_fn *fn, uint32_t vma, uint32_t esz)
2083 VSTART_CHECK_EARLY_EXIT(env);
2085 for (uint32_t i = env->vstart; i < vl; i++) {
2086 if (!vm && !vext_elem_mask(v0, i)) {
2087 /* set masked-off elements to 1s */
2088 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz);
2089 continue;
2091 fn(vd, s1, vs2, i, env, vxrm);
2093 env->vstart = 0;
2096 static inline void
2097 vext_vx_rm_2(void *vd, void *v0, target_long s1, void *vs2,
2098 CPURISCVState *env,
2099 uint32_t desc,
2100 opivx2_rm_fn *fn, uint32_t esz)
2102 uint32_t vm = vext_vm(desc);
2103 uint32_t vl = env->vl;
2104 uint32_t total_elems = vext_get_total_elems(env, desc, esz);
2105 uint32_t vta = vext_vta(desc);
2106 uint32_t vma = vext_vma(desc);
2108 switch (env->vxrm) {
2109 case 0: /* rnu */
2110 vext_vx_rm_1(vd, v0, s1, vs2,
2111 env, vl, vm, 0, fn, vma, esz);
2112 break;
2113 case 1: /* rne */
2114 vext_vx_rm_1(vd, v0, s1, vs2,
2115 env, vl, vm, 1, fn, vma, esz);
2116 break;
2117 case 2: /* rdn */
2118 vext_vx_rm_1(vd, v0, s1, vs2,
2119 env, vl, vm, 2, fn, vma, esz);
2120 break;
2121 default: /* rod */
2122 vext_vx_rm_1(vd, v0, s1, vs2,
2123 env, vl, vm, 3, fn, vma, esz);
2124 break;
2126 /* set tail elements to 1s */
2127 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);
2130 /* generate helpers for fixed point instructions with OPIVX format */
2131 #define GEN_VEXT_VX_RM(NAME, ESZ) \
2132 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \
2133 void *vs2, CPURISCVState *env, \
2134 uint32_t desc) \
2136 vext_vx_rm_2(vd, v0, s1, vs2, env, desc, \
2137 do_##NAME, ESZ); \
2140 RVVCALL(OPIVX2_RM, vsaddu_vx_b, OP_UUU_B, H1, H1, saddu8)
2141 RVVCALL(OPIVX2_RM, vsaddu_vx_h, OP_UUU_H, H2, H2, saddu16)
2142 RVVCALL(OPIVX2_RM, vsaddu_vx_w, OP_UUU_W, H4, H4, saddu32)
2143 RVVCALL(OPIVX2_RM, vsaddu_vx_d, OP_UUU_D, H8, H8, saddu64)
2144 GEN_VEXT_VX_RM(vsaddu_vx_b, 1)
2145 GEN_VEXT_VX_RM(vsaddu_vx_h, 2)
2146 GEN_VEXT_VX_RM(vsaddu_vx_w, 4)
2147 GEN_VEXT_VX_RM(vsaddu_vx_d, 8)
2149 static inline int8_t sadd8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2151 int8_t res = a + b;
2152 if ((res ^ a) & (res ^ b) & INT8_MIN) {
2153 res = a > 0 ? INT8_MAX : INT8_MIN;
2154 env->vxsat = 0x1;
2156 return res;
2159 static inline int16_t sadd16(CPURISCVState *env, int vxrm, int16_t a,
2160 int16_t b)
2162 int16_t res = a + b;
2163 if ((res ^ a) & (res ^ b) & INT16_MIN) {
2164 res = a > 0 ? INT16_MAX : INT16_MIN;
2165 env->vxsat = 0x1;
2167 return res;
2170 static inline int32_t sadd32(CPURISCVState *env, int vxrm, int32_t a,
2171 int32_t b)
2173 int32_t res = a + b;
2174 if ((res ^ a) & (res ^ b) & INT32_MIN) {
2175 res = a > 0 ? INT32_MAX : INT32_MIN;
2176 env->vxsat = 0x1;
2178 return res;
2181 static inline int64_t sadd64(CPURISCVState *env, int vxrm, int64_t a,
2182 int64_t b)
2184 int64_t res = a + b;
2185 if ((res ^ a) & (res ^ b) & INT64_MIN) {
2186 res = a > 0 ? INT64_MAX : INT64_MIN;
2187 env->vxsat = 0x1;
2189 return res;
2192 RVVCALL(OPIVV2_RM, vsadd_vv_b, OP_SSS_B, H1, H1, H1, sadd8)
2193 RVVCALL(OPIVV2_RM, vsadd_vv_h, OP_SSS_H, H2, H2, H2, sadd16)
2194 RVVCALL(OPIVV2_RM, vsadd_vv_w, OP_SSS_W, H4, H4, H4, sadd32)
2195 RVVCALL(OPIVV2_RM, vsadd_vv_d, OP_SSS_D, H8, H8, H8, sadd64)
2196 GEN_VEXT_VV_RM(vsadd_vv_b, 1)
2197 GEN_VEXT_VV_RM(vsadd_vv_h, 2)
2198 GEN_VEXT_VV_RM(vsadd_vv_w, 4)
2199 GEN_VEXT_VV_RM(vsadd_vv_d, 8)
2201 RVVCALL(OPIVX2_RM, vsadd_vx_b, OP_SSS_B, H1, H1, sadd8)
2202 RVVCALL(OPIVX2_RM, vsadd_vx_h, OP_SSS_H, H2, H2, sadd16)
2203 RVVCALL(OPIVX2_RM, vsadd_vx_w, OP_SSS_W, H4, H4, sadd32)
2204 RVVCALL(OPIVX2_RM, vsadd_vx_d, OP_SSS_D, H8, H8, sadd64)
2205 GEN_VEXT_VX_RM(vsadd_vx_b, 1)
2206 GEN_VEXT_VX_RM(vsadd_vx_h, 2)
2207 GEN_VEXT_VX_RM(vsadd_vx_w, 4)
2208 GEN_VEXT_VX_RM(vsadd_vx_d, 8)
2210 static inline uint8_t ssubu8(CPURISCVState *env, int vxrm, uint8_t a,
2211 uint8_t b)
2213 uint8_t res = a - b;
2214 if (res > a) {
2215 res = 0;
2216 env->vxsat = 0x1;
2218 return res;
2221 static inline uint16_t ssubu16(CPURISCVState *env, int vxrm, uint16_t a,
2222 uint16_t b)
2224 uint16_t res = a - b;
2225 if (res > a) {
2226 res = 0;
2227 env->vxsat = 0x1;
2229 return res;
2232 static inline uint32_t ssubu32(CPURISCVState *env, int vxrm, uint32_t a,
2233 uint32_t b)
2235 uint32_t res = a - b;
2236 if (res > a) {
2237 res = 0;
2238 env->vxsat = 0x1;
2240 return res;
2243 static inline uint64_t ssubu64(CPURISCVState *env, int vxrm, uint64_t a,
2244 uint64_t b)
2246 uint64_t res = a - b;
2247 if (res > a) {
2248 res = 0;
2249 env->vxsat = 0x1;
2251 return res;
2254 RVVCALL(OPIVV2_RM, vssubu_vv_b, OP_UUU_B, H1, H1, H1, ssubu8)
2255 RVVCALL(OPIVV2_RM, vssubu_vv_h, OP_UUU_H, H2, H2, H2, ssubu16)
2256 RVVCALL(OPIVV2_RM, vssubu_vv_w, OP_UUU_W, H4, H4, H4, ssubu32)
2257 RVVCALL(OPIVV2_RM, vssubu_vv_d, OP_UUU_D, H8, H8, H8, ssubu64)
2258 GEN_VEXT_VV_RM(vssubu_vv_b, 1)
2259 GEN_VEXT_VV_RM(vssubu_vv_h, 2)
2260 GEN_VEXT_VV_RM(vssubu_vv_w, 4)
2261 GEN_VEXT_VV_RM(vssubu_vv_d, 8)
2263 RVVCALL(OPIVX2_RM, vssubu_vx_b, OP_UUU_B, H1, H1, ssubu8)
2264 RVVCALL(OPIVX2_RM, vssubu_vx_h, OP_UUU_H, H2, H2, ssubu16)
2265 RVVCALL(OPIVX2_RM, vssubu_vx_w, OP_UUU_W, H4, H4, ssubu32)
2266 RVVCALL(OPIVX2_RM, vssubu_vx_d, OP_UUU_D, H8, H8, ssubu64)
2267 GEN_VEXT_VX_RM(vssubu_vx_b, 1)
2268 GEN_VEXT_VX_RM(vssubu_vx_h, 2)
2269 GEN_VEXT_VX_RM(vssubu_vx_w, 4)
2270 GEN_VEXT_VX_RM(vssubu_vx_d, 8)
2272 static inline int8_t ssub8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2274 int8_t res = a - b;
2275 if ((res ^ a) & (a ^ b) & INT8_MIN) {
2276 res = a >= 0 ? INT8_MAX : INT8_MIN;
2277 env->vxsat = 0x1;
2279 return res;
2282 static inline int16_t ssub16(CPURISCVState *env, int vxrm, int16_t a,
2283 int16_t b)
2285 int16_t res = a - b;
2286 if ((res ^ a) & (a ^ b) & INT16_MIN) {
2287 res = a >= 0 ? INT16_MAX : INT16_MIN;
2288 env->vxsat = 0x1;
2290 return res;
2293 static inline int32_t ssub32(CPURISCVState *env, int vxrm, int32_t a,
2294 int32_t b)
2296 int32_t res = a - b;
2297 if ((res ^ a) & (a ^ b) & INT32_MIN) {
2298 res = a >= 0 ? INT32_MAX : INT32_MIN;
2299 env->vxsat = 0x1;
2301 return res;
2304 static inline int64_t ssub64(CPURISCVState *env, int vxrm, int64_t a,
2305 int64_t b)
2307 int64_t res = a - b;
2308 if ((res ^ a) & (a ^ b) & INT64_MIN) {
2309 res = a >= 0 ? INT64_MAX : INT64_MIN;
2310 env->vxsat = 0x1;
2312 return res;
2315 RVVCALL(OPIVV2_RM, vssub_vv_b, OP_SSS_B, H1, H1, H1, ssub8)
2316 RVVCALL(OPIVV2_RM, vssub_vv_h, OP_SSS_H, H2, H2, H2, ssub16)
2317 RVVCALL(OPIVV2_RM, vssub_vv_w, OP_SSS_W, H4, H4, H4, ssub32)
2318 RVVCALL(OPIVV2_RM, vssub_vv_d, OP_SSS_D, H8, H8, H8, ssub64)
2319 GEN_VEXT_VV_RM(vssub_vv_b, 1)
2320 GEN_VEXT_VV_RM(vssub_vv_h, 2)
2321 GEN_VEXT_VV_RM(vssub_vv_w, 4)
2322 GEN_VEXT_VV_RM(vssub_vv_d, 8)
2324 RVVCALL(OPIVX2_RM, vssub_vx_b, OP_SSS_B, H1, H1, ssub8)
2325 RVVCALL(OPIVX2_RM, vssub_vx_h, OP_SSS_H, H2, H2, ssub16)
2326 RVVCALL(OPIVX2_RM, vssub_vx_w, OP_SSS_W, H4, H4, ssub32)
2327 RVVCALL(OPIVX2_RM, vssub_vx_d, OP_SSS_D, H8, H8, ssub64)
2328 GEN_VEXT_VX_RM(vssub_vx_b, 1)
2329 GEN_VEXT_VX_RM(vssub_vx_h, 2)
2330 GEN_VEXT_VX_RM(vssub_vx_w, 4)
2331 GEN_VEXT_VX_RM(vssub_vx_d, 8)
2333 /* Vector Single-Width Averaging Add and Subtract */
2334 static inline uint8_t get_round(int vxrm, uint64_t v, uint8_t shift)
2336 uint8_t d = extract64(v, shift, 1);
2337 uint8_t d1;
2338 uint64_t D1, D2;
2340 if (shift == 0 || shift > 64) {
2341 return 0;
2344 d1 = extract64(v, shift - 1, 1);
2345 D1 = extract64(v, 0, shift);
2346 if (vxrm == 0) { /* round-to-nearest-up (add +0.5 LSB) */
2347 return d1;
2348 } else if (vxrm == 1) { /* round-to-nearest-even */
2349 if (shift > 1) {
2350 D2 = extract64(v, 0, shift - 1);
2351 return d1 & ((D2 != 0) | d);
2352 } else {
2353 return d1 & d;
2355 } else if (vxrm == 3) { /* round-to-odd (OR bits into LSB, aka "jam") */
2356 return !d & (D1 != 0);
2358 return 0; /* round-down (truncate) */
2361 static inline int32_t aadd32(CPURISCVState *env, int vxrm, int32_t a,
2362 int32_t b)
2364 int64_t res = (int64_t)a + b;
2365 uint8_t round = get_round(vxrm, res, 1);
2367 return (res >> 1) + round;
2370 static inline int64_t aadd64(CPURISCVState *env, int vxrm, int64_t a,
2371 int64_t b)
2373 int64_t res = a + b;
2374 uint8_t round = get_round(vxrm, res, 1);
2375 int64_t over = (res ^ a) & (res ^ b) & INT64_MIN;
2377 /* With signed overflow, bit 64 is inverse of bit 63. */
2378 return ((res >> 1) ^ over) + round;
2381 RVVCALL(OPIVV2_RM, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd32)
2382 RVVCALL(OPIVV2_RM, vaadd_vv_h, OP_SSS_H, H2, H2, H2, aadd32)
2383 RVVCALL(OPIVV2_RM, vaadd_vv_w, OP_SSS_W, H4, H4, H4, aadd32)
2384 RVVCALL(OPIVV2_RM, vaadd_vv_d, OP_SSS_D, H8, H8, H8, aadd64)
2385 GEN_VEXT_VV_RM(vaadd_vv_b, 1)
2386 GEN_VEXT_VV_RM(vaadd_vv_h, 2)
2387 GEN_VEXT_VV_RM(vaadd_vv_w, 4)
2388 GEN_VEXT_VV_RM(vaadd_vv_d, 8)
2390 RVVCALL(OPIVX2_RM, vaadd_vx_b, OP_SSS_B, H1, H1, aadd32)
2391 RVVCALL(OPIVX2_RM, vaadd_vx_h, OP_SSS_H, H2, H2, aadd32)
2392 RVVCALL(OPIVX2_RM, vaadd_vx_w, OP_SSS_W, H4, H4, aadd32)
2393 RVVCALL(OPIVX2_RM, vaadd_vx_d, OP_SSS_D, H8, H8, aadd64)
2394 GEN_VEXT_VX_RM(vaadd_vx_b, 1)
2395 GEN_VEXT_VX_RM(vaadd_vx_h, 2)
2396 GEN_VEXT_VX_RM(vaadd_vx_w, 4)
2397 GEN_VEXT_VX_RM(vaadd_vx_d, 8)
2399 static inline uint32_t aaddu32(CPURISCVState *env, int vxrm,
2400 uint32_t a, uint32_t b)
2402 uint64_t res = (uint64_t)a + b;
2403 uint8_t round = get_round(vxrm, res, 1);
2405 return (res >> 1) + round;
2408 static inline uint64_t aaddu64(CPURISCVState *env, int vxrm,
2409 uint64_t a, uint64_t b)
2411 uint64_t res = a + b;
2412 uint8_t round = get_round(vxrm, res, 1);
2413 uint64_t over = (uint64_t)(res < a) << 63;
2415 return ((res >> 1) | over) + round;
2418 RVVCALL(OPIVV2_RM, vaaddu_vv_b, OP_UUU_B, H1, H1, H1, aaddu32)
2419 RVVCALL(OPIVV2_RM, vaaddu_vv_h, OP_UUU_H, H2, H2, H2, aaddu32)
2420 RVVCALL(OPIVV2_RM, vaaddu_vv_w, OP_UUU_W, H4, H4, H4, aaddu32)
2421 RVVCALL(OPIVV2_RM, vaaddu_vv_d, OP_UUU_D, H8, H8, H8, aaddu64)
2422 GEN_VEXT_VV_RM(vaaddu_vv_b, 1)
2423 GEN_VEXT_VV_RM(vaaddu_vv_h, 2)
2424 GEN_VEXT_VV_RM(vaaddu_vv_w, 4)
2425 GEN_VEXT_VV_RM(vaaddu_vv_d, 8)
2427 RVVCALL(OPIVX2_RM, vaaddu_vx_b, OP_UUU_B, H1, H1, aaddu32)
2428 RVVCALL(OPIVX2_RM, vaaddu_vx_h, OP_UUU_H, H2, H2, aaddu32)
2429 RVVCALL(OPIVX2_RM, vaaddu_vx_w, OP_UUU_W, H4, H4, aaddu32)
2430 RVVCALL(OPIVX2_RM, vaaddu_vx_d, OP_UUU_D, H8, H8, aaddu64)
2431 GEN_VEXT_VX_RM(vaaddu_vx_b, 1)
2432 GEN_VEXT_VX_RM(vaaddu_vx_h, 2)
2433 GEN_VEXT_VX_RM(vaaddu_vx_w, 4)
2434 GEN_VEXT_VX_RM(vaaddu_vx_d, 8)
2436 static inline int32_t asub32(CPURISCVState *env, int vxrm, int32_t a,
2437 int32_t b)
2439 int64_t res = (int64_t)a - b;
2440 uint8_t round = get_round(vxrm, res, 1);
2442 return (res >> 1) + round;
2445 static inline int64_t asub64(CPURISCVState *env, int vxrm, int64_t a,
2446 int64_t b)
2448 int64_t res = (int64_t)a - b;
2449 uint8_t round = get_round(vxrm, res, 1);
2450 int64_t over = (res ^ a) & (a ^ b) & INT64_MIN;
2452 /* With signed overflow, bit 64 is inverse of bit 63. */
2453 return ((res >> 1) ^ over) + round;
2456 RVVCALL(OPIVV2_RM, vasub_vv_b, OP_SSS_B, H1, H1, H1, asub32)
2457 RVVCALL(OPIVV2_RM, vasub_vv_h, OP_SSS_H, H2, H2, H2, asub32)
2458 RVVCALL(OPIVV2_RM, vasub_vv_w, OP_SSS_W, H4, H4, H4, asub32)
2459 RVVCALL(OPIVV2_RM, vasub_vv_d, OP_SSS_D, H8, H8, H8, asub64)
2460 GEN_VEXT_VV_RM(vasub_vv_b, 1)
2461 GEN_VEXT_VV_RM(vasub_vv_h, 2)
2462 GEN_VEXT_VV_RM(vasub_vv_w, 4)
2463 GEN_VEXT_VV_RM(vasub_vv_d, 8)
2465 RVVCALL(OPIVX2_RM, vasub_vx_b, OP_SSS_B, H1, H1, asub32)
2466 RVVCALL(OPIVX2_RM, vasub_vx_h, OP_SSS_H, H2, H2, asub32)
2467 RVVCALL(OPIVX2_RM, vasub_vx_w, OP_SSS_W, H4, H4, asub32)
2468 RVVCALL(OPIVX2_RM, vasub_vx_d, OP_SSS_D, H8, H8, asub64)
2469 GEN_VEXT_VX_RM(vasub_vx_b, 1)
2470 GEN_VEXT_VX_RM(vasub_vx_h, 2)
2471 GEN_VEXT_VX_RM(vasub_vx_w, 4)
2472 GEN_VEXT_VX_RM(vasub_vx_d, 8)
2474 static inline uint32_t asubu32(CPURISCVState *env, int vxrm,
2475 uint32_t a, uint32_t b)
2477 int64_t res = (int64_t)a - b;
2478 uint8_t round = get_round(vxrm, res, 1);
2480 return (res >> 1) + round;
2483 static inline uint64_t asubu64(CPURISCVState *env, int vxrm,
2484 uint64_t a, uint64_t b)
2486 uint64_t res = (uint64_t)a - b;
2487 uint8_t round = get_round(vxrm, res, 1);
2488 uint64_t over = (uint64_t)(res > a) << 63;
2490 return ((res >> 1) | over) + round;
2493 RVVCALL(OPIVV2_RM, vasubu_vv_b, OP_UUU_B, H1, H1, H1, asubu32)
2494 RVVCALL(OPIVV2_RM, vasubu_vv_h, OP_UUU_H, H2, H2, H2, asubu32)
2495 RVVCALL(OPIVV2_RM, vasubu_vv_w, OP_UUU_W, H4, H4, H4, asubu32)
2496 RVVCALL(OPIVV2_RM, vasubu_vv_d, OP_UUU_D, H8, H8, H8, asubu64)
2497 GEN_VEXT_VV_RM(vasubu_vv_b, 1)
2498 GEN_VEXT_VV_RM(vasubu_vv_h, 2)
2499 GEN_VEXT_VV_RM(vasubu_vv_w, 4)
2500 GEN_VEXT_VV_RM(vasubu_vv_d, 8)
2502 RVVCALL(OPIVX2_RM, vasubu_vx_b, OP_UUU_B, H1, H1, asubu32)
2503 RVVCALL(OPIVX2_RM, vasubu_vx_h, OP_UUU_H, H2, H2, asubu32)
2504 RVVCALL(OPIVX2_RM, vasubu_vx_w, OP_UUU_W, H4, H4, asubu32)
2505 RVVCALL(OPIVX2_RM, vasubu_vx_d, OP_UUU_D, H8, H8, asubu64)
2506 GEN_VEXT_VX_RM(vasubu_vx_b, 1)
2507 GEN_VEXT_VX_RM(vasubu_vx_h, 2)
2508 GEN_VEXT_VX_RM(vasubu_vx_w, 4)
2509 GEN_VEXT_VX_RM(vasubu_vx_d, 8)
2511 /* Vector Single-Width Fractional Multiply with Rounding and Saturation */
2512 static inline int8_t vsmul8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2514 uint8_t round;
2515 int16_t res;
2517 res = (int16_t)a * (int16_t)b;
2518 round = get_round(vxrm, res, 7);
2519 res = (res >> 7) + round;
2521 if (res > INT8_MAX) {
2522 env->vxsat = 0x1;
2523 return INT8_MAX;
2524 } else if (res < INT8_MIN) {
2525 env->vxsat = 0x1;
2526 return INT8_MIN;
2527 } else {
2528 return res;
2532 static int16_t vsmul16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2534 uint8_t round;
2535 int32_t res;
2537 res = (int32_t)a * (int32_t)b;
2538 round = get_round(vxrm, res, 15);
2539 res = (res >> 15) + round;
2541 if (res > INT16_MAX) {
2542 env->vxsat = 0x1;
2543 return INT16_MAX;
2544 } else if (res < INT16_MIN) {
2545 env->vxsat = 0x1;
2546 return INT16_MIN;
2547 } else {
2548 return res;
2552 static int32_t vsmul32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2554 uint8_t round;
2555 int64_t res;
2557 res = (int64_t)a * (int64_t)b;
2558 round = get_round(vxrm, res, 31);
2559 res = (res >> 31) + round;
2561 if (res > INT32_MAX) {
2562 env->vxsat = 0x1;
2563 return INT32_MAX;
2564 } else if (res < INT32_MIN) {
2565 env->vxsat = 0x1;
2566 return INT32_MIN;
2567 } else {
2568 return res;
2572 static int64_t vsmul64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2574 uint8_t round;
2575 uint64_t hi_64, lo_64;
2576 int64_t res;
2578 if (a == INT64_MIN && b == INT64_MIN) {
2579 env->vxsat = 1;
2580 return INT64_MAX;
2583 muls64(&lo_64, &hi_64, a, b);
2584 round = get_round(vxrm, lo_64, 63);
2586 * Cannot overflow, as there are always
2587 * 2 sign bits after multiply.
2589 res = (hi_64 << 1) | (lo_64 >> 63);
2590 if (round) {
2591 if (res == INT64_MAX) {
2592 env->vxsat = 1;
2593 } else {
2594 res += 1;
2597 return res;
2600 RVVCALL(OPIVV2_RM, vsmul_vv_b, OP_SSS_B, H1, H1, H1, vsmul8)
2601 RVVCALL(OPIVV2_RM, vsmul_vv_h, OP_SSS_H, H2, H2, H2, vsmul16)
2602 RVVCALL(OPIVV2_RM, vsmul_vv_w, OP_SSS_W, H4, H4, H4, vsmul32)
2603 RVVCALL(OPIVV2_RM, vsmul_vv_d, OP_SSS_D, H8, H8, H8, vsmul64)
2604 GEN_VEXT_VV_RM(vsmul_vv_b, 1)
2605 GEN_VEXT_VV_RM(vsmul_vv_h, 2)
2606 GEN_VEXT_VV_RM(vsmul_vv_w, 4)
2607 GEN_VEXT_VV_RM(vsmul_vv_d, 8)
2609 RVVCALL(OPIVX2_RM, vsmul_vx_b, OP_SSS_B, H1, H1, vsmul8)
2610 RVVCALL(OPIVX2_RM, vsmul_vx_h, OP_SSS_H, H2, H2, vsmul16)
2611 RVVCALL(OPIVX2_RM, vsmul_vx_w, OP_SSS_W, H4, H4, vsmul32)
2612 RVVCALL(OPIVX2_RM, vsmul_vx_d, OP_SSS_D, H8, H8, vsmul64)
2613 GEN_VEXT_VX_RM(vsmul_vx_b, 1)
2614 GEN_VEXT_VX_RM(vsmul_vx_h, 2)
2615 GEN_VEXT_VX_RM(vsmul_vx_w, 4)
2616 GEN_VEXT_VX_RM(vsmul_vx_d, 8)
2618 /* Vector Single-Width Scaling Shift Instructions */
2619 static inline uint8_t
2620 vssrl8(CPURISCVState *env, int vxrm, uint8_t a, uint8_t b)
2622 uint8_t round, shift = b & 0x7;
2623 uint8_t res;
2625 round = get_round(vxrm, a, shift);
2626 res = (a >> shift) + round;
2627 return res;
2629 static inline uint16_t
2630 vssrl16(CPURISCVState *env, int vxrm, uint16_t a, uint16_t b)
2632 uint8_t round, shift = b & 0xf;
2634 round = get_round(vxrm, a, shift);
2635 return (a >> shift) + round;
2637 static inline uint32_t
2638 vssrl32(CPURISCVState *env, int vxrm, uint32_t a, uint32_t b)
2640 uint8_t round, shift = b & 0x1f;
2642 round = get_round(vxrm, a, shift);
2643 return (a >> shift) + round;
2645 static inline uint64_t
2646 vssrl64(CPURISCVState *env, int vxrm, uint64_t a, uint64_t b)
2648 uint8_t round, shift = b & 0x3f;
2650 round = get_round(vxrm, a, shift);
2651 return (a >> shift) + round;
2653 RVVCALL(OPIVV2_RM, vssrl_vv_b, OP_UUU_B, H1, H1, H1, vssrl8)
2654 RVVCALL(OPIVV2_RM, vssrl_vv_h, OP_UUU_H, H2, H2, H2, vssrl16)
2655 RVVCALL(OPIVV2_RM, vssrl_vv_w, OP_UUU_W, H4, H4, H4, vssrl32)
2656 RVVCALL(OPIVV2_RM, vssrl_vv_d, OP_UUU_D, H8, H8, H8, vssrl64)
2657 GEN_VEXT_VV_RM(vssrl_vv_b, 1)
2658 GEN_VEXT_VV_RM(vssrl_vv_h, 2)
2659 GEN_VEXT_VV_RM(vssrl_vv_w, 4)
2660 GEN_VEXT_VV_RM(vssrl_vv_d, 8)
2662 RVVCALL(OPIVX2_RM, vssrl_vx_b, OP_UUU_B, H1, H1, vssrl8)
2663 RVVCALL(OPIVX2_RM, vssrl_vx_h, OP_UUU_H, H2, H2, vssrl16)
2664 RVVCALL(OPIVX2_RM, vssrl_vx_w, OP_UUU_W, H4, H4, vssrl32)
2665 RVVCALL(OPIVX2_RM, vssrl_vx_d, OP_UUU_D, H8, H8, vssrl64)
2666 GEN_VEXT_VX_RM(vssrl_vx_b, 1)
2667 GEN_VEXT_VX_RM(vssrl_vx_h, 2)
2668 GEN_VEXT_VX_RM(vssrl_vx_w, 4)
2669 GEN_VEXT_VX_RM(vssrl_vx_d, 8)
2671 static inline int8_t
2672 vssra8(CPURISCVState *env, int vxrm, int8_t a, int8_t b)
2674 uint8_t round, shift = b & 0x7;
2676 round = get_round(vxrm, a, shift);
2677 return (a >> shift) + round;
2679 static inline int16_t
2680 vssra16(CPURISCVState *env, int vxrm, int16_t a, int16_t b)
2682 uint8_t round, shift = b & 0xf;
2684 round = get_round(vxrm, a, shift);
2685 return (a >> shift) + round;
2687 static inline int32_t
2688 vssra32(CPURISCVState *env, int vxrm, int32_t a, int32_t b)
2690 uint8_t round, shift = b & 0x1f;
2692 round = get_round(vxrm, a, shift);
2693 return (a >> shift) + round;
2695 static inline int64_t
2696 vssra64(CPURISCVState *env, int vxrm, int64_t a, int64_t b)
2698 uint8_t round, shift = b & 0x3f;
2700 round = get_round(vxrm, a, shift);
2701 return (a >> shift) + round;
2704 RVVCALL(OPIVV2_RM, vssra_vv_b, OP_SSS_B, H1, H1, H1, vssra8)
2705 RVVCALL(OPIVV2_RM, vssra_vv_h, OP_SSS_H, H2, H2, H2, vssra16)
2706 RVVCALL(OPIVV2_RM, vssra_vv_w, OP_SSS_W, H4, H4, H4, vssra32)
2707 RVVCALL(OPIVV2_RM, vssra_vv_d, OP_SSS_D, H8, H8, H8, vssra64)
2708 GEN_VEXT_VV_RM(vssra_vv_b, 1)
2709 GEN_VEXT_VV_RM(vssra_vv_h, 2)
2710 GEN_VEXT_VV_RM(vssra_vv_w, 4)
2711 GEN_VEXT_VV_RM(vssra_vv_d, 8)
2713 RVVCALL(OPIVX2_RM, vssra_vx_b, OP_SSS_B, H1, H1, vssra8)
2714 RVVCALL(OPIVX2_RM, vssra_vx_h, OP_SSS_H, H2, H2, vssra16)
2715 RVVCALL(OPIVX2_RM, vssra_vx_w, OP_SSS_W, H4, H4, vssra32)
2716 RVVCALL(OPIVX2_RM, vssra_vx_d, OP_SSS_D, H8, H8, vssra64)
2717 GEN_VEXT_VX_RM(vssra_vx_b, 1)
2718 GEN_VEXT_VX_RM(vssra_vx_h, 2)
2719 GEN_VEXT_VX_RM(vssra_vx_w, 4)
2720 GEN_VEXT_VX_RM(vssra_vx_d, 8)
2722 /* Vector Narrowing Fixed-Point Clip Instructions */
2723 static inline int8_t
2724 vnclip8(CPURISCVState *env, int vxrm, int16_t a, int8_t b)
2726 uint8_t round, shift = b & 0xf;
2727 int16_t res;
2729 round = get_round(vxrm, a, shift);
2730 res = (a >> shift) + round;
2731 if (res > INT8_MAX) {
2732 env->vxsat = 0x1;
2733 return INT8_MAX;
2734 } else if (res < INT8_MIN) {
2735 env->vxsat = 0x1;
2736 return INT8_MIN;
2737 } else {
2738 return res;
2742 static inline int16_t
2743 vnclip16(CPURISCVState *env, int vxrm, int32_t a, int16_t b)
2745 uint8_t round, shift = b & 0x1f;
2746 int32_t res;
2748 round = get_round(vxrm, a, shift);
2749 res = (a >> shift) + round;
2750 if (res > INT16_MAX) {
2751 env->vxsat = 0x1;
2752 return INT16_MAX;
2753 } else if (res < INT16_MIN) {
2754 env->vxsat = 0x1;
2755 return INT16_MIN;
2756 } else {
2757 return res;
2761 static inline int32_t
2762 vnclip32(CPURISCVState *env, int vxrm, int64_t a, int32_t b)
2764 uint8_t round, shift = b & 0x3f;
2765 int64_t res;
2767 round = get_round(vxrm, a, shift);
2768 res = (a >> shift) + round;
2769 if (res > INT32_MAX) {
2770 env->vxsat = 0x1;
2771 return INT32_MAX;
2772 } else if (res < INT32_MIN) {
2773 env->vxsat = 0x1;
2774 return INT32_MIN;
2775 } else {
2776 return res;
2780 RVVCALL(OPIVV2_RM, vnclip_wv_b, NOP_SSS_B, H1, H2, H1, vnclip8)
2781 RVVCALL(OPIVV2_RM, vnclip_wv_h, NOP_SSS_H, H2, H4, H2, vnclip16)
2782 RVVCALL(OPIVV2_RM, vnclip_wv_w, NOP_SSS_W, H4, H8, H4, vnclip32)
2783 GEN_VEXT_VV_RM(vnclip_wv_b, 1)
2784 GEN_VEXT_VV_RM(vnclip_wv_h, 2)
2785 GEN_VEXT_VV_RM(vnclip_wv_w, 4)
2787 RVVCALL(OPIVX2_RM, vnclip_wx_b, NOP_SSS_B, H1, H2, vnclip8)
2788 RVVCALL(OPIVX2_RM, vnclip_wx_h, NOP_SSS_H, H2, H4, vnclip16)
2789 RVVCALL(OPIVX2_RM, vnclip_wx_w, NOP_SSS_W, H4, H8, vnclip32)
2790 GEN_VEXT_VX_RM(vnclip_wx_b, 1)
2791 GEN_VEXT_VX_RM(vnclip_wx_h, 2)
2792 GEN_VEXT_VX_RM(vnclip_wx_w, 4)
2794 static inline uint8_t
2795 vnclipu8(CPURISCVState *env, int vxrm, uint16_t a, uint8_t b)
2797 uint8_t round, shift = b & 0xf;
2798 uint16_t res;
2800 round = get_round(vxrm, a, shift);
2801 res = (a >> shift) + round;
2802 if (res > UINT8_MAX) {
2803 env->vxsat = 0x1;
2804 return UINT8_MAX;
2805 } else {
2806 return res;
2810 static inline uint16_t
2811 vnclipu16(CPURISCVState *env, int vxrm, uint32_t a, uint16_t b)
2813 uint8_t round, shift = b & 0x1f;
2814 uint32_t res;
2816 round = get_round(vxrm, a, shift);
2817 res = (a >> shift) + round;
2818 if (res > UINT16_MAX) {
2819 env->vxsat = 0x1;
2820 return UINT16_MAX;
2821 } else {
2822 return res;
2826 static inline uint32_t
2827 vnclipu32(CPURISCVState *env, int vxrm, uint64_t a, uint32_t b)
2829 uint8_t round, shift = b & 0x3f;
2830 uint64_t res;
2832 round = get_round(vxrm, a, shift);
2833 res = (a >> shift) + round;
2834 if (res > UINT32_MAX) {
2835 env->vxsat = 0x1;
2836 return UINT32_MAX;
2837 } else {
2838 return res;
2842 RVVCALL(OPIVV2_RM, vnclipu_wv_b, NOP_UUU_B, H1, H2, H1, vnclipu8)
2843 RVVCALL(OPIVV2_RM, vnclipu_wv_h, NOP_UUU_H, H2, H4, H2, vnclipu16)
2844 RVVCALL(OPIVV2_RM, vnclipu_wv_w, NOP_UUU_W, H4, H8, H4, vnclipu32)
2845 GEN_VEXT_VV_RM(vnclipu_wv_b, 1)
2846 GEN_VEXT_VV_RM(vnclipu_wv_h, 2)
2847 GEN_VEXT_VV_RM(vnclipu_wv_w, 4)
2849 RVVCALL(OPIVX2_RM, vnclipu_wx_b, NOP_UUU_B, H1, H2, vnclipu8)
2850 RVVCALL(OPIVX2_RM, vnclipu_wx_h, NOP_UUU_H, H2, H4, vnclipu16)
2851 RVVCALL(OPIVX2_RM, vnclipu_wx_w, NOP_UUU_W, H4, H8, vnclipu32)
2852 GEN_VEXT_VX_RM(vnclipu_wx_b, 1)
2853 GEN_VEXT_VX_RM(vnclipu_wx_h, 2)
2854 GEN_VEXT_VX_RM(vnclipu_wx_w, 4)
2857 * Vector Float Point Arithmetic Instructions
2859 /* Vector Single-Width Floating-Point Add/Subtract Instructions */
2860 #define OPFVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
2861 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
2862 CPURISCVState *env) \
2864 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
2865 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2866 *((TD *)vd + HD(i)) = OP(s2, s1, &env->fp_status); \
2869 #define GEN_VEXT_VV_ENV(NAME, ESZ) \
2870 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
2871 void *vs2, CPURISCVState *env, \
2872 uint32_t desc) \
2874 uint32_t vm = vext_vm(desc); \
2875 uint32_t vl = env->vl; \
2876 uint32_t total_elems = \
2877 vext_get_total_elems(env, desc, ESZ); \
2878 uint32_t vta = vext_vta(desc); \
2879 uint32_t vma = vext_vma(desc); \
2880 uint32_t i; \
2882 VSTART_CHECK_EARLY_EXIT(env); \
2884 for (i = env->vstart; i < vl; i++) { \
2885 if (!vm && !vext_elem_mask(v0, i)) { \
2886 /* set masked-off elements to 1s */ \
2887 vext_set_elems_1s(vd, vma, i * ESZ, \
2888 (i + 1) * ESZ); \
2889 continue; \
2891 do_##NAME(vd, vs1, vs2, i, env); \
2893 env->vstart = 0; \
2894 /* set tail elements to 1s */ \
2895 vext_set_elems_1s(vd, vta, vl * ESZ, \
2896 total_elems * ESZ); \
2899 RVVCALL(OPFVV2, vfadd_vv_h, OP_UUU_H, H2, H2, H2, float16_add)
2900 RVVCALL(OPFVV2, vfadd_vv_w, OP_UUU_W, H4, H4, H4, float32_add)
2901 RVVCALL(OPFVV2, vfadd_vv_d, OP_UUU_D, H8, H8, H8, float64_add)
2902 GEN_VEXT_VV_ENV(vfadd_vv_h, 2)
2903 GEN_VEXT_VV_ENV(vfadd_vv_w, 4)
2904 GEN_VEXT_VV_ENV(vfadd_vv_d, 8)
2906 #define OPFVF2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
2907 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
2908 CPURISCVState *env) \
2910 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
2911 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, &env->fp_status);\
2914 #define GEN_VEXT_VF(NAME, ESZ) \
2915 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, \
2916 void *vs2, CPURISCVState *env, \
2917 uint32_t desc) \
2919 uint32_t vm = vext_vm(desc); \
2920 uint32_t vl = env->vl; \
2921 uint32_t total_elems = \
2922 vext_get_total_elems(env, desc, ESZ); \
2923 uint32_t vta = vext_vta(desc); \
2924 uint32_t vma = vext_vma(desc); \
2925 uint32_t i; \
2927 VSTART_CHECK_EARLY_EXIT(env); \
2929 for (i = env->vstart; i < vl; i++) { \
2930 if (!vm && !vext_elem_mask(v0, i)) { \
2931 /* set masked-off elements to 1s */ \
2932 vext_set_elems_1s(vd, vma, i * ESZ, \
2933 (i + 1) * ESZ); \
2934 continue; \
2936 do_##NAME(vd, s1, vs2, i, env); \
2938 env->vstart = 0; \
2939 /* set tail elements to 1s */ \
2940 vext_set_elems_1s(vd, vta, vl * ESZ, \
2941 total_elems * ESZ); \
2944 RVVCALL(OPFVF2, vfadd_vf_h, OP_UUU_H, H2, H2, float16_add)
2945 RVVCALL(OPFVF2, vfadd_vf_w, OP_UUU_W, H4, H4, float32_add)
2946 RVVCALL(OPFVF2, vfadd_vf_d, OP_UUU_D, H8, H8, float64_add)
2947 GEN_VEXT_VF(vfadd_vf_h, 2)
2948 GEN_VEXT_VF(vfadd_vf_w, 4)
2949 GEN_VEXT_VF(vfadd_vf_d, 8)
2951 RVVCALL(OPFVV2, vfsub_vv_h, OP_UUU_H, H2, H2, H2, float16_sub)
2952 RVVCALL(OPFVV2, vfsub_vv_w, OP_UUU_W, H4, H4, H4, float32_sub)
2953 RVVCALL(OPFVV2, vfsub_vv_d, OP_UUU_D, H8, H8, H8, float64_sub)
2954 GEN_VEXT_VV_ENV(vfsub_vv_h, 2)
2955 GEN_VEXT_VV_ENV(vfsub_vv_w, 4)
2956 GEN_VEXT_VV_ENV(vfsub_vv_d, 8)
2957 RVVCALL(OPFVF2, vfsub_vf_h, OP_UUU_H, H2, H2, float16_sub)
2958 RVVCALL(OPFVF2, vfsub_vf_w, OP_UUU_W, H4, H4, float32_sub)
2959 RVVCALL(OPFVF2, vfsub_vf_d, OP_UUU_D, H8, H8, float64_sub)
2960 GEN_VEXT_VF(vfsub_vf_h, 2)
2961 GEN_VEXT_VF(vfsub_vf_w, 4)
2962 GEN_VEXT_VF(vfsub_vf_d, 8)
2964 static uint16_t float16_rsub(uint16_t a, uint16_t b, float_status *s)
2966 return float16_sub(b, a, s);
2969 static uint32_t float32_rsub(uint32_t a, uint32_t b, float_status *s)
2971 return float32_sub(b, a, s);
2974 static uint64_t float64_rsub(uint64_t a, uint64_t b, float_status *s)
2976 return float64_sub(b, a, s);
2979 RVVCALL(OPFVF2, vfrsub_vf_h, OP_UUU_H, H2, H2, float16_rsub)
2980 RVVCALL(OPFVF2, vfrsub_vf_w, OP_UUU_W, H4, H4, float32_rsub)
2981 RVVCALL(OPFVF2, vfrsub_vf_d, OP_UUU_D, H8, H8, float64_rsub)
2982 GEN_VEXT_VF(vfrsub_vf_h, 2)
2983 GEN_VEXT_VF(vfrsub_vf_w, 4)
2984 GEN_VEXT_VF(vfrsub_vf_d, 8)
2986 /* Vector Widening Floating-Point Add/Subtract Instructions */
2987 static uint32_t vfwadd16(uint16_t a, uint16_t b, float_status *s)
2989 return float32_add(float16_to_float32(a, true, s),
2990 float16_to_float32(b, true, s), s);
2993 static uint64_t vfwadd32(uint32_t a, uint32_t b, float_status *s)
2995 return float64_add(float32_to_float64(a, s),
2996 float32_to_float64(b, s), s);
3000 RVVCALL(OPFVV2, vfwadd_vv_h, WOP_UUU_H, H4, H2, H2, vfwadd16)
3001 RVVCALL(OPFVV2, vfwadd_vv_w, WOP_UUU_W, H8, H4, H4, vfwadd32)
3002 GEN_VEXT_VV_ENV(vfwadd_vv_h, 4)
3003 GEN_VEXT_VV_ENV(vfwadd_vv_w, 8)
3004 RVVCALL(OPFVF2, vfwadd_vf_h, WOP_UUU_H, H4, H2, vfwadd16)
3005 RVVCALL(OPFVF2, vfwadd_vf_w, WOP_UUU_W, H8, H4, vfwadd32)
3006 GEN_VEXT_VF(vfwadd_vf_h, 4)
3007 GEN_VEXT_VF(vfwadd_vf_w, 8)
3009 static uint32_t vfwsub16(uint16_t a, uint16_t b, float_status *s)
3011 return float32_sub(float16_to_float32(a, true, s),
3012 float16_to_float32(b, true, s), s);
3015 static uint64_t vfwsub32(uint32_t a, uint32_t b, float_status *s)
3017 return float64_sub(float32_to_float64(a, s),
3018 float32_to_float64(b, s), s);
3022 RVVCALL(OPFVV2, vfwsub_vv_h, WOP_UUU_H, H4, H2, H2, vfwsub16)
3023 RVVCALL(OPFVV2, vfwsub_vv_w, WOP_UUU_W, H8, H4, H4, vfwsub32)
3024 GEN_VEXT_VV_ENV(vfwsub_vv_h, 4)
3025 GEN_VEXT_VV_ENV(vfwsub_vv_w, 8)
3026 RVVCALL(OPFVF2, vfwsub_vf_h, WOP_UUU_H, H4, H2, vfwsub16)
3027 RVVCALL(OPFVF2, vfwsub_vf_w, WOP_UUU_W, H8, H4, vfwsub32)
3028 GEN_VEXT_VF(vfwsub_vf_h, 4)
3029 GEN_VEXT_VF(vfwsub_vf_w, 8)
3031 static uint32_t vfwaddw16(uint32_t a, uint16_t b, float_status *s)
3033 return float32_add(a, float16_to_float32(b, true, s), s);
3036 static uint64_t vfwaddw32(uint64_t a, uint32_t b, float_status *s)
3038 return float64_add(a, float32_to_float64(b, s), s);
3041 RVVCALL(OPFVV2, vfwadd_wv_h, WOP_WUUU_H, H4, H2, H2, vfwaddw16)
3042 RVVCALL(OPFVV2, vfwadd_wv_w, WOP_WUUU_W, H8, H4, H4, vfwaddw32)
3043 GEN_VEXT_VV_ENV(vfwadd_wv_h, 4)
3044 GEN_VEXT_VV_ENV(vfwadd_wv_w, 8)
3045 RVVCALL(OPFVF2, vfwadd_wf_h, WOP_WUUU_H, H4, H2, vfwaddw16)
3046 RVVCALL(OPFVF2, vfwadd_wf_w, WOP_WUUU_W, H8, H4, vfwaddw32)
3047 GEN_VEXT_VF(vfwadd_wf_h, 4)
3048 GEN_VEXT_VF(vfwadd_wf_w, 8)
3050 static uint32_t vfwsubw16(uint32_t a, uint16_t b, float_status *s)
3052 return float32_sub(a, float16_to_float32(b, true, s), s);
3055 static uint64_t vfwsubw32(uint64_t a, uint32_t b, float_status *s)
3057 return float64_sub(a, float32_to_float64(b, s), s);
3060 RVVCALL(OPFVV2, vfwsub_wv_h, WOP_WUUU_H, H4, H2, H2, vfwsubw16)
3061 RVVCALL(OPFVV2, vfwsub_wv_w, WOP_WUUU_W, H8, H4, H4, vfwsubw32)
3062 GEN_VEXT_VV_ENV(vfwsub_wv_h, 4)
3063 GEN_VEXT_VV_ENV(vfwsub_wv_w, 8)
3064 RVVCALL(OPFVF2, vfwsub_wf_h, WOP_WUUU_H, H4, H2, vfwsubw16)
3065 RVVCALL(OPFVF2, vfwsub_wf_w, WOP_WUUU_W, H8, H4, vfwsubw32)
3066 GEN_VEXT_VF(vfwsub_wf_h, 4)
3067 GEN_VEXT_VF(vfwsub_wf_w, 8)
3069 /* Vector Single-Width Floating-Point Multiply/Divide Instructions */
3070 RVVCALL(OPFVV2, vfmul_vv_h, OP_UUU_H, H2, H2, H2, float16_mul)
3071 RVVCALL(OPFVV2, vfmul_vv_w, OP_UUU_W, H4, H4, H4, float32_mul)
3072 RVVCALL(OPFVV2, vfmul_vv_d, OP_UUU_D, H8, H8, H8, float64_mul)
3073 GEN_VEXT_VV_ENV(vfmul_vv_h, 2)
3074 GEN_VEXT_VV_ENV(vfmul_vv_w, 4)
3075 GEN_VEXT_VV_ENV(vfmul_vv_d, 8)
3076 RVVCALL(OPFVF2, vfmul_vf_h, OP_UUU_H, H2, H2, float16_mul)
3077 RVVCALL(OPFVF2, vfmul_vf_w, OP_UUU_W, H4, H4, float32_mul)
3078 RVVCALL(OPFVF2, vfmul_vf_d, OP_UUU_D, H8, H8, float64_mul)
3079 GEN_VEXT_VF(vfmul_vf_h, 2)
3080 GEN_VEXT_VF(vfmul_vf_w, 4)
3081 GEN_VEXT_VF(vfmul_vf_d, 8)
3083 RVVCALL(OPFVV2, vfdiv_vv_h, OP_UUU_H, H2, H2, H2, float16_div)
3084 RVVCALL(OPFVV2, vfdiv_vv_w, OP_UUU_W, H4, H4, H4, float32_div)
3085 RVVCALL(OPFVV2, vfdiv_vv_d, OP_UUU_D, H8, H8, H8, float64_div)
3086 GEN_VEXT_VV_ENV(vfdiv_vv_h, 2)
3087 GEN_VEXT_VV_ENV(vfdiv_vv_w, 4)
3088 GEN_VEXT_VV_ENV(vfdiv_vv_d, 8)
3089 RVVCALL(OPFVF2, vfdiv_vf_h, OP_UUU_H, H2, H2, float16_div)
3090 RVVCALL(OPFVF2, vfdiv_vf_w, OP_UUU_W, H4, H4, float32_div)
3091 RVVCALL(OPFVF2, vfdiv_vf_d, OP_UUU_D, H8, H8, float64_div)
3092 GEN_VEXT_VF(vfdiv_vf_h, 2)
3093 GEN_VEXT_VF(vfdiv_vf_w, 4)
3094 GEN_VEXT_VF(vfdiv_vf_d, 8)
3096 static uint16_t float16_rdiv(uint16_t a, uint16_t b, float_status *s)
3098 return float16_div(b, a, s);
3101 static uint32_t float32_rdiv(uint32_t a, uint32_t b, float_status *s)
3103 return float32_div(b, a, s);
3106 static uint64_t float64_rdiv(uint64_t a, uint64_t b, float_status *s)
3108 return float64_div(b, a, s);
3111 RVVCALL(OPFVF2, vfrdiv_vf_h, OP_UUU_H, H2, H2, float16_rdiv)
3112 RVVCALL(OPFVF2, vfrdiv_vf_w, OP_UUU_W, H4, H4, float32_rdiv)
3113 RVVCALL(OPFVF2, vfrdiv_vf_d, OP_UUU_D, H8, H8, float64_rdiv)
3114 GEN_VEXT_VF(vfrdiv_vf_h, 2)
3115 GEN_VEXT_VF(vfrdiv_vf_w, 4)
3116 GEN_VEXT_VF(vfrdiv_vf_d, 8)
3118 /* Vector Widening Floating-Point Multiply */
3119 static uint32_t vfwmul16(uint16_t a, uint16_t b, float_status *s)
3121 return float32_mul(float16_to_float32(a, true, s),
3122 float16_to_float32(b, true, s), s);
3125 static uint64_t vfwmul32(uint32_t a, uint32_t b, float_status *s)
3127 return float64_mul(float32_to_float64(a, s),
3128 float32_to_float64(b, s), s);
3131 RVVCALL(OPFVV2, vfwmul_vv_h, WOP_UUU_H, H4, H2, H2, vfwmul16)
3132 RVVCALL(OPFVV2, vfwmul_vv_w, WOP_UUU_W, H8, H4, H4, vfwmul32)
3133 GEN_VEXT_VV_ENV(vfwmul_vv_h, 4)
3134 GEN_VEXT_VV_ENV(vfwmul_vv_w, 8)
3135 RVVCALL(OPFVF2, vfwmul_vf_h, WOP_UUU_H, H4, H2, vfwmul16)
3136 RVVCALL(OPFVF2, vfwmul_vf_w, WOP_UUU_W, H8, H4, vfwmul32)
3137 GEN_VEXT_VF(vfwmul_vf_h, 4)
3138 GEN_VEXT_VF(vfwmul_vf_w, 8)
3140 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
3141 #define OPFVV3(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \
3142 static void do_##NAME(void *vd, void *vs1, void *vs2, int i, \
3143 CPURISCVState *env) \
3145 TX1 s1 = *((T1 *)vs1 + HS1(i)); \
3146 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3147 TD d = *((TD *)vd + HD(i)); \
3148 *((TD *)vd + HD(i)) = OP(s2, s1, d, &env->fp_status); \
3151 static uint16_t fmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3153 return float16_muladd(a, b, d, 0, s);
3156 static uint32_t fmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3158 return float32_muladd(a, b, d, 0, s);
3161 static uint64_t fmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3163 return float64_muladd(a, b, d, 0, s);
3166 RVVCALL(OPFVV3, vfmacc_vv_h, OP_UUU_H, H2, H2, H2, fmacc16)
3167 RVVCALL(OPFVV3, vfmacc_vv_w, OP_UUU_W, H4, H4, H4, fmacc32)
3168 RVVCALL(OPFVV3, vfmacc_vv_d, OP_UUU_D, H8, H8, H8, fmacc64)
3169 GEN_VEXT_VV_ENV(vfmacc_vv_h, 2)
3170 GEN_VEXT_VV_ENV(vfmacc_vv_w, 4)
3171 GEN_VEXT_VV_ENV(vfmacc_vv_d, 8)
3173 #define OPFVF3(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \
3174 static void do_##NAME(void *vd, uint64_t s1, void *vs2, int i, \
3175 CPURISCVState *env) \
3177 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3178 TD d = *((TD *)vd + HD(i)); \
3179 *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1, d, &env->fp_status);\
3182 RVVCALL(OPFVF3, vfmacc_vf_h, OP_UUU_H, H2, H2, fmacc16)
3183 RVVCALL(OPFVF3, vfmacc_vf_w, OP_UUU_W, H4, H4, fmacc32)
3184 RVVCALL(OPFVF3, vfmacc_vf_d, OP_UUU_D, H8, H8, fmacc64)
3185 GEN_VEXT_VF(vfmacc_vf_h, 2)
3186 GEN_VEXT_VF(vfmacc_vf_w, 4)
3187 GEN_VEXT_VF(vfmacc_vf_d, 8)
3189 static uint16_t fnmacc16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3191 return float16_muladd(a, b, d, float_muladd_negate_c |
3192 float_muladd_negate_product, s);
3195 static uint32_t fnmacc32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3197 return float32_muladd(a, b, d, float_muladd_negate_c |
3198 float_muladd_negate_product, s);
3201 static uint64_t fnmacc64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3203 return float64_muladd(a, b, d, float_muladd_negate_c |
3204 float_muladd_negate_product, s);
3207 RVVCALL(OPFVV3, vfnmacc_vv_h, OP_UUU_H, H2, H2, H2, fnmacc16)
3208 RVVCALL(OPFVV3, vfnmacc_vv_w, OP_UUU_W, H4, H4, H4, fnmacc32)
3209 RVVCALL(OPFVV3, vfnmacc_vv_d, OP_UUU_D, H8, H8, H8, fnmacc64)
3210 GEN_VEXT_VV_ENV(vfnmacc_vv_h, 2)
3211 GEN_VEXT_VV_ENV(vfnmacc_vv_w, 4)
3212 GEN_VEXT_VV_ENV(vfnmacc_vv_d, 8)
3213 RVVCALL(OPFVF3, vfnmacc_vf_h, OP_UUU_H, H2, H2, fnmacc16)
3214 RVVCALL(OPFVF3, vfnmacc_vf_w, OP_UUU_W, H4, H4, fnmacc32)
3215 RVVCALL(OPFVF3, vfnmacc_vf_d, OP_UUU_D, H8, H8, fnmacc64)
3216 GEN_VEXT_VF(vfnmacc_vf_h, 2)
3217 GEN_VEXT_VF(vfnmacc_vf_w, 4)
3218 GEN_VEXT_VF(vfnmacc_vf_d, 8)
3220 static uint16_t fmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3222 return float16_muladd(a, b, d, float_muladd_negate_c, s);
3225 static uint32_t fmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3227 return float32_muladd(a, b, d, float_muladd_negate_c, s);
3230 static uint64_t fmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3232 return float64_muladd(a, b, d, float_muladd_negate_c, s);
3235 RVVCALL(OPFVV3, vfmsac_vv_h, OP_UUU_H, H2, H2, H2, fmsac16)
3236 RVVCALL(OPFVV3, vfmsac_vv_w, OP_UUU_W, H4, H4, H4, fmsac32)
3237 RVVCALL(OPFVV3, vfmsac_vv_d, OP_UUU_D, H8, H8, H8, fmsac64)
3238 GEN_VEXT_VV_ENV(vfmsac_vv_h, 2)
3239 GEN_VEXT_VV_ENV(vfmsac_vv_w, 4)
3240 GEN_VEXT_VV_ENV(vfmsac_vv_d, 8)
3241 RVVCALL(OPFVF3, vfmsac_vf_h, OP_UUU_H, H2, H2, fmsac16)
3242 RVVCALL(OPFVF3, vfmsac_vf_w, OP_UUU_W, H4, H4, fmsac32)
3243 RVVCALL(OPFVF3, vfmsac_vf_d, OP_UUU_D, H8, H8, fmsac64)
3244 GEN_VEXT_VF(vfmsac_vf_h, 2)
3245 GEN_VEXT_VF(vfmsac_vf_w, 4)
3246 GEN_VEXT_VF(vfmsac_vf_d, 8)
3248 static uint16_t fnmsac16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3250 return float16_muladd(a, b, d, float_muladd_negate_product, s);
3253 static uint32_t fnmsac32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3255 return float32_muladd(a, b, d, float_muladd_negate_product, s);
3258 static uint64_t fnmsac64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3260 return float64_muladd(a, b, d, float_muladd_negate_product, s);
3263 RVVCALL(OPFVV3, vfnmsac_vv_h, OP_UUU_H, H2, H2, H2, fnmsac16)
3264 RVVCALL(OPFVV3, vfnmsac_vv_w, OP_UUU_W, H4, H4, H4, fnmsac32)
3265 RVVCALL(OPFVV3, vfnmsac_vv_d, OP_UUU_D, H8, H8, H8, fnmsac64)
3266 GEN_VEXT_VV_ENV(vfnmsac_vv_h, 2)
3267 GEN_VEXT_VV_ENV(vfnmsac_vv_w, 4)
3268 GEN_VEXT_VV_ENV(vfnmsac_vv_d, 8)
3269 RVVCALL(OPFVF3, vfnmsac_vf_h, OP_UUU_H, H2, H2, fnmsac16)
3270 RVVCALL(OPFVF3, vfnmsac_vf_w, OP_UUU_W, H4, H4, fnmsac32)
3271 RVVCALL(OPFVF3, vfnmsac_vf_d, OP_UUU_D, H8, H8, fnmsac64)
3272 GEN_VEXT_VF(vfnmsac_vf_h, 2)
3273 GEN_VEXT_VF(vfnmsac_vf_w, 4)
3274 GEN_VEXT_VF(vfnmsac_vf_d, 8)
3276 static uint16_t fmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3278 return float16_muladd(d, b, a, 0, s);
3281 static uint32_t fmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3283 return float32_muladd(d, b, a, 0, s);
3286 static uint64_t fmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3288 return float64_muladd(d, b, a, 0, s);
3291 RVVCALL(OPFVV3, vfmadd_vv_h, OP_UUU_H, H2, H2, H2, fmadd16)
3292 RVVCALL(OPFVV3, vfmadd_vv_w, OP_UUU_W, H4, H4, H4, fmadd32)
3293 RVVCALL(OPFVV3, vfmadd_vv_d, OP_UUU_D, H8, H8, H8, fmadd64)
3294 GEN_VEXT_VV_ENV(vfmadd_vv_h, 2)
3295 GEN_VEXT_VV_ENV(vfmadd_vv_w, 4)
3296 GEN_VEXT_VV_ENV(vfmadd_vv_d, 8)
3297 RVVCALL(OPFVF3, vfmadd_vf_h, OP_UUU_H, H2, H2, fmadd16)
3298 RVVCALL(OPFVF3, vfmadd_vf_w, OP_UUU_W, H4, H4, fmadd32)
3299 RVVCALL(OPFVF3, vfmadd_vf_d, OP_UUU_D, H8, H8, fmadd64)
3300 GEN_VEXT_VF(vfmadd_vf_h, 2)
3301 GEN_VEXT_VF(vfmadd_vf_w, 4)
3302 GEN_VEXT_VF(vfmadd_vf_d, 8)
3304 static uint16_t fnmadd16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3306 return float16_muladd(d, b, a, float_muladd_negate_c |
3307 float_muladd_negate_product, s);
3310 static uint32_t fnmadd32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3312 return float32_muladd(d, b, a, float_muladd_negate_c |
3313 float_muladd_negate_product, s);
3316 static uint64_t fnmadd64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3318 return float64_muladd(d, b, a, float_muladd_negate_c |
3319 float_muladd_negate_product, s);
3322 RVVCALL(OPFVV3, vfnmadd_vv_h, OP_UUU_H, H2, H2, H2, fnmadd16)
3323 RVVCALL(OPFVV3, vfnmadd_vv_w, OP_UUU_W, H4, H4, H4, fnmadd32)
3324 RVVCALL(OPFVV3, vfnmadd_vv_d, OP_UUU_D, H8, H8, H8, fnmadd64)
3325 GEN_VEXT_VV_ENV(vfnmadd_vv_h, 2)
3326 GEN_VEXT_VV_ENV(vfnmadd_vv_w, 4)
3327 GEN_VEXT_VV_ENV(vfnmadd_vv_d, 8)
3328 RVVCALL(OPFVF3, vfnmadd_vf_h, OP_UUU_H, H2, H2, fnmadd16)
3329 RVVCALL(OPFVF3, vfnmadd_vf_w, OP_UUU_W, H4, H4, fnmadd32)
3330 RVVCALL(OPFVF3, vfnmadd_vf_d, OP_UUU_D, H8, H8, fnmadd64)
3331 GEN_VEXT_VF(vfnmadd_vf_h, 2)
3332 GEN_VEXT_VF(vfnmadd_vf_w, 4)
3333 GEN_VEXT_VF(vfnmadd_vf_d, 8)
3335 static uint16_t fmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3337 return float16_muladd(d, b, a, float_muladd_negate_c, s);
3340 static uint32_t fmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3342 return float32_muladd(d, b, a, float_muladd_negate_c, s);
3345 static uint64_t fmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3347 return float64_muladd(d, b, a, float_muladd_negate_c, s);
3350 RVVCALL(OPFVV3, vfmsub_vv_h, OP_UUU_H, H2, H2, H2, fmsub16)
3351 RVVCALL(OPFVV3, vfmsub_vv_w, OP_UUU_W, H4, H4, H4, fmsub32)
3352 RVVCALL(OPFVV3, vfmsub_vv_d, OP_UUU_D, H8, H8, H8, fmsub64)
3353 GEN_VEXT_VV_ENV(vfmsub_vv_h, 2)
3354 GEN_VEXT_VV_ENV(vfmsub_vv_w, 4)
3355 GEN_VEXT_VV_ENV(vfmsub_vv_d, 8)
3356 RVVCALL(OPFVF3, vfmsub_vf_h, OP_UUU_H, H2, H2, fmsub16)
3357 RVVCALL(OPFVF3, vfmsub_vf_w, OP_UUU_W, H4, H4, fmsub32)
3358 RVVCALL(OPFVF3, vfmsub_vf_d, OP_UUU_D, H8, H8, fmsub64)
3359 GEN_VEXT_VF(vfmsub_vf_h, 2)
3360 GEN_VEXT_VF(vfmsub_vf_w, 4)
3361 GEN_VEXT_VF(vfmsub_vf_d, 8)
3363 static uint16_t fnmsub16(uint16_t a, uint16_t b, uint16_t d, float_status *s)
3365 return float16_muladd(d, b, a, float_muladd_negate_product, s);
3368 static uint32_t fnmsub32(uint32_t a, uint32_t b, uint32_t d, float_status *s)
3370 return float32_muladd(d, b, a, float_muladd_negate_product, s);
3373 static uint64_t fnmsub64(uint64_t a, uint64_t b, uint64_t d, float_status *s)
3375 return float64_muladd(d, b, a, float_muladd_negate_product, s);
3378 RVVCALL(OPFVV3, vfnmsub_vv_h, OP_UUU_H, H2, H2, H2, fnmsub16)
3379 RVVCALL(OPFVV3, vfnmsub_vv_w, OP_UUU_W, H4, H4, H4, fnmsub32)
3380 RVVCALL(OPFVV3, vfnmsub_vv_d, OP_UUU_D, H8, H8, H8, fnmsub64)
3381 GEN_VEXT_VV_ENV(vfnmsub_vv_h, 2)
3382 GEN_VEXT_VV_ENV(vfnmsub_vv_w, 4)
3383 GEN_VEXT_VV_ENV(vfnmsub_vv_d, 8)
3384 RVVCALL(OPFVF3, vfnmsub_vf_h, OP_UUU_H, H2, H2, fnmsub16)
3385 RVVCALL(OPFVF3, vfnmsub_vf_w, OP_UUU_W, H4, H4, fnmsub32)
3386 RVVCALL(OPFVF3, vfnmsub_vf_d, OP_UUU_D, H8, H8, fnmsub64)
3387 GEN_VEXT_VF(vfnmsub_vf_h, 2)
3388 GEN_VEXT_VF(vfnmsub_vf_w, 4)
3389 GEN_VEXT_VF(vfnmsub_vf_d, 8)
3391 /* Vector Widening Floating-Point Fused Multiply-Add Instructions */
3392 static uint32_t fwmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3394 return float32_muladd(float16_to_float32(a, true, s),
3395 float16_to_float32(b, true, s), d, 0, s);
3398 static uint64_t fwmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3400 return float64_muladd(float32_to_float64(a, s),
3401 float32_to_float64(b, s), d, 0, s);
3404 RVVCALL(OPFVV3, vfwmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwmacc16)
3405 RVVCALL(OPFVV3, vfwmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwmacc32)
3406 GEN_VEXT_VV_ENV(vfwmacc_vv_h, 4)
3407 GEN_VEXT_VV_ENV(vfwmacc_vv_w, 8)
3408 RVVCALL(OPFVF3, vfwmacc_vf_h, WOP_UUU_H, H4, H2, fwmacc16)
3409 RVVCALL(OPFVF3, vfwmacc_vf_w, WOP_UUU_W, H8, H4, fwmacc32)
3410 GEN_VEXT_VF(vfwmacc_vf_h, 4)
3411 GEN_VEXT_VF(vfwmacc_vf_w, 8)
3413 static uint32_t fwmaccbf16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3415 return float32_muladd(bfloat16_to_float32(a, s),
3416 bfloat16_to_float32(b, s), d, 0, s);
3419 RVVCALL(OPFVV3, vfwmaccbf16_vv, WOP_UUU_H, H4, H2, H2, fwmaccbf16)
3420 GEN_VEXT_VV_ENV(vfwmaccbf16_vv, 4)
3421 RVVCALL(OPFVF3, vfwmaccbf16_vf, WOP_UUU_H, H4, H2, fwmaccbf16)
3422 GEN_VEXT_VF(vfwmaccbf16_vf, 4)
3424 static uint32_t fwnmacc16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3426 return float32_muladd(float16_to_float32(a, true, s),
3427 float16_to_float32(b, true, s), d,
3428 float_muladd_negate_c | float_muladd_negate_product,
3432 static uint64_t fwnmacc32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3434 return float64_muladd(float32_to_float64(a, s), float32_to_float64(b, s),
3435 d, float_muladd_negate_c |
3436 float_muladd_negate_product, s);
3439 RVVCALL(OPFVV3, vfwnmacc_vv_h, WOP_UUU_H, H4, H2, H2, fwnmacc16)
3440 RVVCALL(OPFVV3, vfwnmacc_vv_w, WOP_UUU_W, H8, H4, H4, fwnmacc32)
3441 GEN_VEXT_VV_ENV(vfwnmacc_vv_h, 4)
3442 GEN_VEXT_VV_ENV(vfwnmacc_vv_w, 8)
3443 RVVCALL(OPFVF3, vfwnmacc_vf_h, WOP_UUU_H, H4, H2, fwnmacc16)
3444 RVVCALL(OPFVF3, vfwnmacc_vf_w, WOP_UUU_W, H8, H4, fwnmacc32)
3445 GEN_VEXT_VF(vfwnmacc_vf_h, 4)
3446 GEN_VEXT_VF(vfwnmacc_vf_w, 8)
3448 static uint32_t fwmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3450 return float32_muladd(float16_to_float32(a, true, s),
3451 float16_to_float32(b, true, s), d,
3452 float_muladd_negate_c, s);
3455 static uint64_t fwmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3457 return float64_muladd(float32_to_float64(a, s),
3458 float32_to_float64(b, s), d,
3459 float_muladd_negate_c, s);
3462 RVVCALL(OPFVV3, vfwmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwmsac16)
3463 RVVCALL(OPFVV3, vfwmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwmsac32)
3464 GEN_VEXT_VV_ENV(vfwmsac_vv_h, 4)
3465 GEN_VEXT_VV_ENV(vfwmsac_vv_w, 8)
3466 RVVCALL(OPFVF3, vfwmsac_vf_h, WOP_UUU_H, H4, H2, fwmsac16)
3467 RVVCALL(OPFVF3, vfwmsac_vf_w, WOP_UUU_W, H8, H4, fwmsac32)
3468 GEN_VEXT_VF(vfwmsac_vf_h, 4)
3469 GEN_VEXT_VF(vfwmsac_vf_w, 8)
3471 static uint32_t fwnmsac16(uint16_t a, uint16_t b, uint32_t d, float_status *s)
3473 return float32_muladd(float16_to_float32(a, true, s),
3474 float16_to_float32(b, true, s), d,
3475 float_muladd_negate_product, s);
3478 static uint64_t fwnmsac32(uint32_t a, uint32_t b, uint64_t d, float_status *s)
3480 return float64_muladd(float32_to_float64(a, s),
3481 float32_to_float64(b, s), d,
3482 float_muladd_negate_product, s);
3485 RVVCALL(OPFVV3, vfwnmsac_vv_h, WOP_UUU_H, H4, H2, H2, fwnmsac16)
3486 RVVCALL(OPFVV3, vfwnmsac_vv_w, WOP_UUU_W, H8, H4, H4, fwnmsac32)
3487 GEN_VEXT_VV_ENV(vfwnmsac_vv_h, 4)
3488 GEN_VEXT_VV_ENV(vfwnmsac_vv_w, 8)
3489 RVVCALL(OPFVF3, vfwnmsac_vf_h, WOP_UUU_H, H4, H2, fwnmsac16)
3490 RVVCALL(OPFVF3, vfwnmsac_vf_w, WOP_UUU_W, H8, H4, fwnmsac32)
3491 GEN_VEXT_VF(vfwnmsac_vf_h, 4)
3492 GEN_VEXT_VF(vfwnmsac_vf_w, 8)
3494 /* Vector Floating-Point Square-Root Instruction */
3495 #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \
3496 static void do_##NAME(void *vd, void *vs2, int i, \
3497 CPURISCVState *env) \
3499 TX2 s2 = *((T2 *)vs2 + HS2(i)); \
3500 *((TD *)vd + HD(i)) = OP(s2, &env->fp_status); \
3503 #define GEN_VEXT_V_ENV(NAME, ESZ) \
3504 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
3505 CPURISCVState *env, uint32_t desc) \
3507 uint32_t vm = vext_vm(desc); \
3508 uint32_t vl = env->vl; \
3509 uint32_t total_elems = \
3510 vext_get_total_elems(env, desc, ESZ); \
3511 uint32_t vta = vext_vta(desc); \
3512 uint32_t vma = vext_vma(desc); \
3513 uint32_t i; \
3515 VSTART_CHECK_EARLY_EXIT(env); \
3517 if (vl == 0) { \
3518 return; \
3520 for (i = env->vstart; i < vl; i++) { \
3521 if (!vm && !vext_elem_mask(v0, i)) { \
3522 /* set masked-off elements to 1s */ \
3523 vext_set_elems_1s(vd, vma, i * ESZ, \
3524 (i + 1) * ESZ); \
3525 continue; \
3527 do_##NAME(vd, vs2, i, env); \
3529 env->vstart = 0; \
3530 vext_set_elems_1s(vd, vta, vl * ESZ, \
3531 total_elems * ESZ); \
3534 RVVCALL(OPFVV1, vfsqrt_v_h, OP_UU_H, H2, H2, float16_sqrt)
3535 RVVCALL(OPFVV1, vfsqrt_v_w, OP_UU_W, H4, H4, float32_sqrt)
3536 RVVCALL(OPFVV1, vfsqrt_v_d, OP_UU_D, H8, H8, float64_sqrt)
3537 GEN_VEXT_V_ENV(vfsqrt_v_h, 2)
3538 GEN_VEXT_V_ENV(vfsqrt_v_w, 4)
3539 GEN_VEXT_V_ENV(vfsqrt_v_d, 8)
3542 * Vector Floating-Point Reciprocal Square-Root Estimate Instruction
3544 * Adapted from riscv-v-spec recip.c:
3545 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3547 static uint64_t frsqrt7(uint64_t f, int exp_size, int frac_size)
3549 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3550 uint64_t exp = extract64(f, frac_size, exp_size);
3551 uint64_t frac = extract64(f, 0, frac_size);
3553 const uint8_t lookup_table[] = {
3554 52, 51, 50, 48, 47, 46, 44, 43,
3555 42, 41, 40, 39, 38, 36, 35, 34,
3556 33, 32, 31, 30, 30, 29, 28, 27,
3557 26, 25, 24, 23, 23, 22, 21, 20,
3558 19, 19, 18, 17, 16, 16, 15, 14,
3559 14, 13, 12, 12, 11, 10, 10, 9,
3560 9, 8, 7, 7, 6, 6, 5, 4,
3561 4, 3, 3, 2, 2, 1, 1, 0,
3562 127, 125, 123, 121, 119, 118, 116, 114,
3563 113, 111, 109, 108, 106, 105, 103, 102,
3564 100, 99, 97, 96, 95, 93, 92, 91,
3565 90, 88, 87, 86, 85, 84, 83, 82,
3566 80, 79, 78, 77, 76, 75, 74, 73,
3567 72, 71, 70, 70, 69, 68, 67, 66,
3568 65, 64, 63, 63, 62, 61, 60, 59,
3569 59, 58, 57, 56, 56, 55, 54, 53
3571 const int precision = 7;
3573 if (exp == 0 && frac != 0) { /* subnormal */
3574 /* Normalize the subnormal. */
3575 while (extract64(frac, frac_size - 1, 1) == 0) {
3576 exp--;
3577 frac <<= 1;
3580 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3583 int idx = ((exp & 1) << (precision - 1)) |
3584 (frac >> (frac_size - precision + 1));
3585 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3586 (frac_size - precision);
3587 uint64_t out_exp = (3 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp) / 2;
3589 uint64_t val = 0;
3590 val = deposit64(val, 0, frac_size, out_frac);
3591 val = deposit64(val, frac_size, exp_size, out_exp);
3592 val = deposit64(val, frac_size + exp_size, 1, sign);
3593 return val;
3596 static float16 frsqrt7_h(float16 f, float_status *s)
3598 int exp_size = 5, frac_size = 10;
3599 bool sign = float16_is_neg(f);
3602 * frsqrt7(sNaN) = canonical NaN
3603 * frsqrt7(-inf) = canonical NaN
3604 * frsqrt7(-normal) = canonical NaN
3605 * frsqrt7(-subnormal) = canonical NaN
3607 if (float16_is_signaling_nan(f, s) ||
3608 (float16_is_infinity(f) && sign) ||
3609 (float16_is_normal(f) && sign) ||
3610 (float16_is_zero_or_denormal(f) && !float16_is_zero(f) && sign)) {
3611 s->float_exception_flags |= float_flag_invalid;
3612 return float16_default_nan(s);
3615 /* frsqrt7(qNaN) = canonical NaN */
3616 if (float16_is_quiet_nan(f, s)) {
3617 return float16_default_nan(s);
3620 /* frsqrt7(+-0) = +-inf */
3621 if (float16_is_zero(f)) {
3622 s->float_exception_flags |= float_flag_divbyzero;
3623 return float16_set_sign(float16_infinity, sign);
3626 /* frsqrt7(+inf) = +0 */
3627 if (float16_is_infinity(f) && !sign) {
3628 return float16_set_sign(float16_zero, sign);
3631 /* +normal, +subnormal */
3632 uint64_t val = frsqrt7(f, exp_size, frac_size);
3633 return make_float16(val);
3636 static float32 frsqrt7_s(float32 f, float_status *s)
3638 int exp_size = 8, frac_size = 23;
3639 bool sign = float32_is_neg(f);
3642 * frsqrt7(sNaN) = canonical NaN
3643 * frsqrt7(-inf) = canonical NaN
3644 * frsqrt7(-normal) = canonical NaN
3645 * frsqrt7(-subnormal) = canonical NaN
3647 if (float32_is_signaling_nan(f, s) ||
3648 (float32_is_infinity(f) && sign) ||
3649 (float32_is_normal(f) && sign) ||
3650 (float32_is_zero_or_denormal(f) && !float32_is_zero(f) && sign)) {
3651 s->float_exception_flags |= float_flag_invalid;
3652 return float32_default_nan(s);
3655 /* frsqrt7(qNaN) = canonical NaN */
3656 if (float32_is_quiet_nan(f, s)) {
3657 return float32_default_nan(s);
3660 /* frsqrt7(+-0) = +-inf */
3661 if (float32_is_zero(f)) {
3662 s->float_exception_flags |= float_flag_divbyzero;
3663 return float32_set_sign(float32_infinity, sign);
3666 /* frsqrt7(+inf) = +0 */
3667 if (float32_is_infinity(f) && !sign) {
3668 return float32_set_sign(float32_zero, sign);
3671 /* +normal, +subnormal */
3672 uint64_t val = frsqrt7(f, exp_size, frac_size);
3673 return make_float32(val);
3676 static float64 frsqrt7_d(float64 f, float_status *s)
3678 int exp_size = 11, frac_size = 52;
3679 bool sign = float64_is_neg(f);
3682 * frsqrt7(sNaN) = canonical NaN
3683 * frsqrt7(-inf) = canonical NaN
3684 * frsqrt7(-normal) = canonical NaN
3685 * frsqrt7(-subnormal) = canonical NaN
3687 if (float64_is_signaling_nan(f, s) ||
3688 (float64_is_infinity(f) && sign) ||
3689 (float64_is_normal(f) && sign) ||
3690 (float64_is_zero_or_denormal(f) && !float64_is_zero(f) && sign)) {
3691 s->float_exception_flags |= float_flag_invalid;
3692 return float64_default_nan(s);
3695 /* frsqrt7(qNaN) = canonical NaN */
3696 if (float64_is_quiet_nan(f, s)) {
3697 return float64_default_nan(s);
3700 /* frsqrt7(+-0) = +-inf */
3701 if (float64_is_zero(f)) {
3702 s->float_exception_flags |= float_flag_divbyzero;
3703 return float64_set_sign(float64_infinity, sign);
3706 /* frsqrt7(+inf) = +0 */
3707 if (float64_is_infinity(f) && !sign) {
3708 return float64_set_sign(float64_zero, sign);
3711 /* +normal, +subnormal */
3712 uint64_t val = frsqrt7(f, exp_size, frac_size);
3713 return make_float64(val);
3716 RVVCALL(OPFVV1, vfrsqrt7_v_h, OP_UU_H, H2, H2, frsqrt7_h)
3717 RVVCALL(OPFVV1, vfrsqrt7_v_w, OP_UU_W, H4, H4, frsqrt7_s)
3718 RVVCALL(OPFVV1, vfrsqrt7_v_d, OP_UU_D, H8, H8, frsqrt7_d)
3719 GEN_VEXT_V_ENV(vfrsqrt7_v_h, 2)
3720 GEN_VEXT_V_ENV(vfrsqrt7_v_w, 4)
3721 GEN_VEXT_V_ENV(vfrsqrt7_v_d, 8)
3724 * Vector Floating-Point Reciprocal Estimate Instruction
3726 * Adapted from riscv-v-spec recip.c:
3727 * https://github.com/riscv/riscv-v-spec/blob/master/recip.c
3729 static uint64_t frec7(uint64_t f, int exp_size, int frac_size,
3730 float_status *s)
3732 uint64_t sign = extract64(f, frac_size + exp_size, 1);
3733 uint64_t exp = extract64(f, frac_size, exp_size);
3734 uint64_t frac = extract64(f, 0, frac_size);
3736 const uint8_t lookup_table[] = {
3737 127, 125, 123, 121, 119, 117, 116, 114,
3738 112, 110, 109, 107, 105, 104, 102, 100,
3739 99, 97, 96, 94, 93, 91, 90, 88,
3740 87, 85, 84, 83, 81, 80, 79, 77,
3741 76, 75, 74, 72, 71, 70, 69, 68,
3742 66, 65, 64, 63, 62, 61, 60, 59,
3743 58, 57, 56, 55, 54, 53, 52, 51,
3744 50, 49, 48, 47, 46, 45, 44, 43,
3745 42, 41, 40, 40, 39, 38, 37, 36,
3746 35, 35, 34, 33, 32, 31, 31, 30,
3747 29, 28, 28, 27, 26, 25, 25, 24,
3748 23, 23, 22, 21, 21, 20, 19, 19,
3749 18, 17, 17, 16, 15, 15, 14, 14,
3750 13, 12, 12, 11, 11, 10, 9, 9,
3751 8, 8, 7, 7, 6, 5, 5, 4,
3752 4, 3, 3, 2, 2, 1, 1, 0
3754 const int precision = 7;
3756 if (exp == 0 && frac != 0) { /* subnormal */
3757 /* Normalize the subnormal. */
3758 while (extract64(frac, frac_size - 1, 1) == 0) {
3759 exp--;
3760 frac <<= 1;
3763 frac = (frac << 1) & MAKE_64BIT_MASK(0, frac_size);
3765 if (exp != 0 && exp != UINT64_MAX) {
3767 * Overflow to inf or max value of same sign,
3768 * depending on sign and rounding mode.
3770 s->float_exception_flags |= (float_flag_inexact |
3771 float_flag_overflow);
3773 if ((s->float_rounding_mode == float_round_to_zero) ||
3774 ((s->float_rounding_mode == float_round_down) && !sign) ||
3775 ((s->float_rounding_mode == float_round_up) && sign)) {
3776 /* Return greatest/negative finite value. */
3777 return (sign << (exp_size + frac_size)) |
3778 (MAKE_64BIT_MASK(frac_size, exp_size) - 1);
3779 } else {
3780 /* Return +-inf. */
3781 return (sign << (exp_size + frac_size)) |
3782 MAKE_64BIT_MASK(frac_size, exp_size);
3787 int idx = frac >> (frac_size - precision);
3788 uint64_t out_frac = (uint64_t)(lookup_table[idx]) <<
3789 (frac_size - precision);
3790 uint64_t out_exp = 2 * MAKE_64BIT_MASK(0, exp_size - 1) + ~exp;
3792 if (out_exp == 0 || out_exp == UINT64_MAX) {
3794 * The result is subnormal, but don't raise the underflow exception,
3795 * because there's no additional loss of precision.
3797 out_frac = (out_frac >> 1) | MAKE_64BIT_MASK(frac_size - 1, 1);
3798 if (out_exp == UINT64_MAX) {
3799 out_frac >>= 1;
3800 out_exp = 0;
3804 uint64_t val = 0;
3805 val = deposit64(val, 0, frac_size, out_frac);
3806 val = deposit64(val, frac_size, exp_size, out_exp);
3807 val = deposit64(val, frac_size + exp_size, 1, sign);
3808 return val;
3811 static float16 frec7_h(float16 f, float_status *s)
3813 int exp_size = 5, frac_size = 10;
3814 bool sign = float16_is_neg(f);
3816 /* frec7(+-inf) = +-0 */
3817 if (float16_is_infinity(f)) {
3818 return float16_set_sign(float16_zero, sign);
3821 /* frec7(+-0) = +-inf */
3822 if (float16_is_zero(f)) {
3823 s->float_exception_flags |= float_flag_divbyzero;
3824 return float16_set_sign(float16_infinity, sign);
3827 /* frec7(sNaN) = canonical NaN */
3828 if (float16_is_signaling_nan(f, s)) {
3829 s->float_exception_flags |= float_flag_invalid;
3830 return float16_default_nan(s);
3833 /* frec7(qNaN) = canonical NaN */
3834 if (float16_is_quiet_nan(f, s)) {
3835 return float16_default_nan(s);
3838 /* +-normal, +-subnormal */
3839 uint64_t val = frec7(f, exp_size, frac_size, s);
3840 return make_float16(val);
3843 static float32 frec7_s(float32 f, float_status *s)
3845 int exp_size = 8, frac_size = 23;
3846 bool sign = float32_is_neg(f);
3848 /* frec7(+-inf) = +-0 */
3849 if (float32_is_infinity(f)) {
3850 return float32_set_sign(float32_zero, sign);
3853 /* frec7(+-0) = +-inf */
3854 if (float32_is_zero(f)) {
3855 s->float_exception_flags |= float_flag_divbyzero;
3856 return float32_set_sign(float32_infinity, sign);
3859 /* frec7(sNaN) = canonical NaN */
3860 if (float32_is_signaling_nan(f, s)) {
3861 s->float_exception_flags |= float_flag_invalid;
3862 return float32_default_nan(s);
3865 /* frec7(qNaN) = canonical NaN */
3866 if (float32_is_quiet_nan(f, s)) {
3867 return float32_default_nan(s);
3870 /* +-normal, +-subnormal */
3871 uint64_t val = frec7(f, exp_size, frac_size, s);
3872 return make_float32(val);
3875 static float64 frec7_d(float64 f, float_status *s)
3877 int exp_size = 11, frac_size = 52;
3878 bool sign = float64_is_neg(f);
3880 /* frec7(+-inf) = +-0 */
3881 if (float64_is_infinity(f)) {
3882 return float64_set_sign(float64_zero, sign);
3885 /* frec7(+-0) = +-inf */
3886 if (float64_is_zero(f)) {
3887 s->float_exception_flags |= float_flag_divbyzero;
3888 return float64_set_sign(float64_infinity, sign);
3891 /* frec7(sNaN) = canonical NaN */
3892 if (float64_is_signaling_nan(f, s)) {
3893 s->float_exception_flags |= float_flag_invalid;
3894 return float64_default_nan(s);
3897 /* frec7(qNaN) = canonical NaN */
3898 if (float64_is_quiet_nan(f, s)) {
3899 return float64_default_nan(s);
3902 /* +-normal, +-subnormal */
3903 uint64_t val = frec7(f, exp_size, frac_size, s);
3904 return make_float64(val);
3907 RVVCALL(OPFVV1, vfrec7_v_h, OP_UU_H, H2, H2, frec7_h)
3908 RVVCALL(OPFVV1, vfrec7_v_w, OP_UU_W, H4, H4, frec7_s)
3909 RVVCALL(OPFVV1, vfrec7_v_d, OP_UU_D, H8, H8, frec7_d)
3910 GEN_VEXT_V_ENV(vfrec7_v_h, 2)
3911 GEN_VEXT_V_ENV(vfrec7_v_w, 4)
3912 GEN_VEXT_V_ENV(vfrec7_v_d, 8)
3914 /* Vector Floating-Point MIN/MAX Instructions */
3915 RVVCALL(OPFVV2, vfmin_vv_h, OP_UUU_H, H2, H2, H2, float16_minimum_number)
3916 RVVCALL(OPFVV2, vfmin_vv_w, OP_UUU_W, H4, H4, H4, float32_minimum_number)
3917 RVVCALL(OPFVV2, vfmin_vv_d, OP_UUU_D, H8, H8, H8, float64_minimum_number)
3918 GEN_VEXT_VV_ENV(vfmin_vv_h, 2)
3919 GEN_VEXT_VV_ENV(vfmin_vv_w, 4)
3920 GEN_VEXT_VV_ENV(vfmin_vv_d, 8)
3921 RVVCALL(OPFVF2, vfmin_vf_h, OP_UUU_H, H2, H2, float16_minimum_number)
3922 RVVCALL(OPFVF2, vfmin_vf_w, OP_UUU_W, H4, H4, float32_minimum_number)
3923 RVVCALL(OPFVF2, vfmin_vf_d, OP_UUU_D, H8, H8, float64_minimum_number)
3924 GEN_VEXT_VF(vfmin_vf_h, 2)
3925 GEN_VEXT_VF(vfmin_vf_w, 4)
3926 GEN_VEXT_VF(vfmin_vf_d, 8)
3928 RVVCALL(OPFVV2, vfmax_vv_h, OP_UUU_H, H2, H2, H2, float16_maximum_number)
3929 RVVCALL(OPFVV2, vfmax_vv_w, OP_UUU_W, H4, H4, H4, float32_maximum_number)
3930 RVVCALL(OPFVV2, vfmax_vv_d, OP_UUU_D, H8, H8, H8, float64_maximum_number)
3931 GEN_VEXT_VV_ENV(vfmax_vv_h, 2)
3932 GEN_VEXT_VV_ENV(vfmax_vv_w, 4)
3933 GEN_VEXT_VV_ENV(vfmax_vv_d, 8)
3934 RVVCALL(OPFVF2, vfmax_vf_h, OP_UUU_H, H2, H2, float16_maximum_number)
3935 RVVCALL(OPFVF2, vfmax_vf_w, OP_UUU_W, H4, H4, float32_maximum_number)
3936 RVVCALL(OPFVF2, vfmax_vf_d, OP_UUU_D, H8, H8, float64_maximum_number)
3937 GEN_VEXT_VF(vfmax_vf_h, 2)
3938 GEN_VEXT_VF(vfmax_vf_w, 4)
3939 GEN_VEXT_VF(vfmax_vf_d, 8)
3941 /* Vector Floating-Point Sign-Injection Instructions */
3942 static uint16_t fsgnj16(uint16_t a, uint16_t b, float_status *s)
3944 return deposit64(b, 0, 15, a);
3947 static uint32_t fsgnj32(uint32_t a, uint32_t b, float_status *s)
3949 return deposit64(b, 0, 31, a);
3952 static uint64_t fsgnj64(uint64_t a, uint64_t b, float_status *s)
3954 return deposit64(b, 0, 63, a);
3957 RVVCALL(OPFVV2, vfsgnj_vv_h, OP_UUU_H, H2, H2, H2, fsgnj16)
3958 RVVCALL(OPFVV2, vfsgnj_vv_w, OP_UUU_W, H4, H4, H4, fsgnj32)
3959 RVVCALL(OPFVV2, vfsgnj_vv_d, OP_UUU_D, H8, H8, H8, fsgnj64)
3960 GEN_VEXT_VV_ENV(vfsgnj_vv_h, 2)
3961 GEN_VEXT_VV_ENV(vfsgnj_vv_w, 4)
3962 GEN_VEXT_VV_ENV(vfsgnj_vv_d, 8)
3963 RVVCALL(OPFVF2, vfsgnj_vf_h, OP_UUU_H, H2, H2, fsgnj16)
3964 RVVCALL(OPFVF2, vfsgnj_vf_w, OP_UUU_W, H4, H4, fsgnj32)
3965 RVVCALL(OPFVF2, vfsgnj_vf_d, OP_UUU_D, H8, H8, fsgnj64)
3966 GEN_VEXT_VF(vfsgnj_vf_h, 2)
3967 GEN_VEXT_VF(vfsgnj_vf_w, 4)
3968 GEN_VEXT_VF(vfsgnj_vf_d, 8)
3970 static uint16_t fsgnjn16(uint16_t a, uint16_t b, float_status *s)
3972 return deposit64(~b, 0, 15, a);
3975 static uint32_t fsgnjn32(uint32_t a, uint32_t b, float_status *s)
3977 return deposit64(~b, 0, 31, a);
3980 static uint64_t fsgnjn64(uint64_t a, uint64_t b, float_status *s)
3982 return deposit64(~b, 0, 63, a);
3985 RVVCALL(OPFVV2, vfsgnjn_vv_h, OP_UUU_H, H2, H2, H2, fsgnjn16)
3986 RVVCALL(OPFVV2, vfsgnjn_vv_w, OP_UUU_W, H4, H4, H4, fsgnjn32)
3987 RVVCALL(OPFVV2, vfsgnjn_vv_d, OP_UUU_D, H8, H8, H8, fsgnjn64)
3988 GEN_VEXT_VV_ENV(vfsgnjn_vv_h, 2)
3989 GEN_VEXT_VV_ENV(vfsgnjn_vv_w, 4)
3990 GEN_VEXT_VV_ENV(vfsgnjn_vv_d, 8)
3991 RVVCALL(OPFVF2, vfsgnjn_vf_h, OP_UUU_H, H2, H2, fsgnjn16)
3992 RVVCALL(OPFVF2, vfsgnjn_vf_w, OP_UUU_W, H4, H4, fsgnjn32)
3993 RVVCALL(OPFVF2, vfsgnjn_vf_d, OP_UUU_D, H8, H8, fsgnjn64)
3994 GEN_VEXT_VF(vfsgnjn_vf_h, 2)
3995 GEN_VEXT_VF(vfsgnjn_vf_w, 4)
3996 GEN_VEXT_VF(vfsgnjn_vf_d, 8)
3998 static uint16_t fsgnjx16(uint16_t a, uint16_t b, float_status *s)
4000 return deposit64(b ^ a, 0, 15, a);
4003 static uint32_t fsgnjx32(uint32_t a, uint32_t b, float_status *s)
4005 return deposit64(b ^ a, 0, 31, a);
4008 static uint64_t fsgnjx64(uint64_t a, uint64_t b, float_status *s)
4010 return deposit64(b ^ a, 0, 63, a);
4013 RVVCALL(OPFVV2, vfsgnjx_vv_h, OP_UUU_H, H2, H2, H2, fsgnjx16)
4014 RVVCALL(OPFVV2, vfsgnjx_vv_w, OP_UUU_W, H4, H4, H4, fsgnjx32)
4015 RVVCALL(OPFVV2, vfsgnjx_vv_d, OP_UUU_D, H8, H8, H8, fsgnjx64)
4016 GEN_VEXT_VV_ENV(vfsgnjx_vv_h, 2)
4017 GEN_VEXT_VV_ENV(vfsgnjx_vv_w, 4)
4018 GEN_VEXT_VV_ENV(vfsgnjx_vv_d, 8)
4019 RVVCALL(OPFVF2, vfsgnjx_vf_h, OP_UUU_H, H2, H2, fsgnjx16)
4020 RVVCALL(OPFVF2, vfsgnjx_vf_w, OP_UUU_W, H4, H4, fsgnjx32)
4021 RVVCALL(OPFVF2, vfsgnjx_vf_d, OP_UUU_D, H8, H8, fsgnjx64)
4022 GEN_VEXT_VF(vfsgnjx_vf_h, 2)
4023 GEN_VEXT_VF(vfsgnjx_vf_w, 4)
4024 GEN_VEXT_VF(vfsgnjx_vf_d, 8)
4026 /* Vector Floating-Point Compare Instructions */
4027 #define GEN_VEXT_CMP_VV_ENV(NAME, ETYPE, H, DO_OP) \
4028 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
4029 CPURISCVState *env, uint32_t desc) \
4031 uint32_t vm = vext_vm(desc); \
4032 uint32_t vl = env->vl; \
4033 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4034 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4035 uint32_t vma = vext_vma(desc); \
4036 uint32_t i; \
4038 VSTART_CHECK_EARLY_EXIT(env); \
4040 for (i = env->vstart; i < vl; i++) { \
4041 ETYPE s1 = *((ETYPE *)vs1 + H(i)); \
4042 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4043 if (!vm && !vext_elem_mask(v0, i)) { \
4044 /* set masked-off elements to 1s */ \
4045 if (vma) { \
4046 vext_set_elem_mask(vd, i, 1); \
4048 continue; \
4050 vext_set_elem_mask(vd, i, \
4051 DO_OP(s2, s1, &env->fp_status)); \
4053 env->vstart = 0; \
4055 * mask destination register are always tail-agnostic
4056 * set tail elements to 1s
4057 */ \
4058 if (vta_all_1s) { \
4059 for (; i < total_elems; i++) { \
4060 vext_set_elem_mask(vd, i, 1); \
4065 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_h, uint16_t, H2, float16_eq_quiet)
4066 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_w, uint32_t, H4, float32_eq_quiet)
4067 GEN_VEXT_CMP_VV_ENV(vmfeq_vv_d, uint64_t, H8, float64_eq_quiet)
4069 #define GEN_VEXT_CMP_VF(NAME, ETYPE, H, DO_OP) \
4070 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4071 CPURISCVState *env, uint32_t desc) \
4073 uint32_t vm = vext_vm(desc); \
4074 uint32_t vl = env->vl; \
4075 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3; \
4076 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4077 uint32_t vma = vext_vma(desc); \
4078 uint32_t i; \
4080 VSTART_CHECK_EARLY_EXIT(env); \
4082 for (i = env->vstart; i < vl; i++) { \
4083 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4084 if (!vm && !vext_elem_mask(v0, i)) { \
4085 /* set masked-off elements to 1s */ \
4086 if (vma) { \
4087 vext_set_elem_mask(vd, i, 1); \
4089 continue; \
4091 vext_set_elem_mask(vd, i, \
4092 DO_OP(s2, (ETYPE)s1, &env->fp_status)); \
4094 env->vstart = 0; \
4096 * mask destination register are always tail-agnostic
4097 * set tail elements to 1s
4098 */ \
4099 if (vta_all_1s) { \
4100 for (; i < total_elems; i++) { \
4101 vext_set_elem_mask(vd, i, 1); \
4106 GEN_VEXT_CMP_VF(vmfeq_vf_h, uint16_t, H2, float16_eq_quiet)
4107 GEN_VEXT_CMP_VF(vmfeq_vf_w, uint32_t, H4, float32_eq_quiet)
4108 GEN_VEXT_CMP_VF(vmfeq_vf_d, uint64_t, H8, float64_eq_quiet)
4110 static bool vmfne16(uint16_t a, uint16_t b, float_status *s)
4112 FloatRelation compare = float16_compare_quiet(a, b, s);
4113 return compare != float_relation_equal;
4116 static bool vmfne32(uint32_t a, uint32_t b, float_status *s)
4118 FloatRelation compare = float32_compare_quiet(a, b, s);
4119 return compare != float_relation_equal;
4122 static bool vmfne64(uint64_t a, uint64_t b, float_status *s)
4124 FloatRelation compare = float64_compare_quiet(a, b, s);
4125 return compare != float_relation_equal;
4128 GEN_VEXT_CMP_VV_ENV(vmfne_vv_h, uint16_t, H2, vmfne16)
4129 GEN_VEXT_CMP_VV_ENV(vmfne_vv_w, uint32_t, H4, vmfne32)
4130 GEN_VEXT_CMP_VV_ENV(vmfne_vv_d, uint64_t, H8, vmfne64)
4131 GEN_VEXT_CMP_VF(vmfne_vf_h, uint16_t, H2, vmfne16)
4132 GEN_VEXT_CMP_VF(vmfne_vf_w, uint32_t, H4, vmfne32)
4133 GEN_VEXT_CMP_VF(vmfne_vf_d, uint64_t, H8, vmfne64)
4135 GEN_VEXT_CMP_VV_ENV(vmflt_vv_h, uint16_t, H2, float16_lt)
4136 GEN_VEXT_CMP_VV_ENV(vmflt_vv_w, uint32_t, H4, float32_lt)
4137 GEN_VEXT_CMP_VV_ENV(vmflt_vv_d, uint64_t, H8, float64_lt)
4138 GEN_VEXT_CMP_VF(vmflt_vf_h, uint16_t, H2, float16_lt)
4139 GEN_VEXT_CMP_VF(vmflt_vf_w, uint32_t, H4, float32_lt)
4140 GEN_VEXT_CMP_VF(vmflt_vf_d, uint64_t, H8, float64_lt)
4142 GEN_VEXT_CMP_VV_ENV(vmfle_vv_h, uint16_t, H2, float16_le)
4143 GEN_VEXT_CMP_VV_ENV(vmfle_vv_w, uint32_t, H4, float32_le)
4144 GEN_VEXT_CMP_VV_ENV(vmfle_vv_d, uint64_t, H8, float64_le)
4145 GEN_VEXT_CMP_VF(vmfle_vf_h, uint16_t, H2, float16_le)
4146 GEN_VEXT_CMP_VF(vmfle_vf_w, uint32_t, H4, float32_le)
4147 GEN_VEXT_CMP_VF(vmfle_vf_d, uint64_t, H8, float64_le)
4149 static bool vmfgt16(uint16_t a, uint16_t b, float_status *s)
4151 FloatRelation compare = float16_compare(a, b, s);
4152 return compare == float_relation_greater;
4155 static bool vmfgt32(uint32_t a, uint32_t b, float_status *s)
4157 FloatRelation compare = float32_compare(a, b, s);
4158 return compare == float_relation_greater;
4161 static bool vmfgt64(uint64_t a, uint64_t b, float_status *s)
4163 FloatRelation compare = float64_compare(a, b, s);
4164 return compare == float_relation_greater;
4167 GEN_VEXT_CMP_VF(vmfgt_vf_h, uint16_t, H2, vmfgt16)
4168 GEN_VEXT_CMP_VF(vmfgt_vf_w, uint32_t, H4, vmfgt32)
4169 GEN_VEXT_CMP_VF(vmfgt_vf_d, uint64_t, H8, vmfgt64)
4171 static bool vmfge16(uint16_t a, uint16_t b, float_status *s)
4173 FloatRelation compare = float16_compare(a, b, s);
4174 return compare == float_relation_greater ||
4175 compare == float_relation_equal;
4178 static bool vmfge32(uint32_t a, uint32_t b, float_status *s)
4180 FloatRelation compare = float32_compare(a, b, s);
4181 return compare == float_relation_greater ||
4182 compare == float_relation_equal;
4185 static bool vmfge64(uint64_t a, uint64_t b, float_status *s)
4187 FloatRelation compare = float64_compare(a, b, s);
4188 return compare == float_relation_greater ||
4189 compare == float_relation_equal;
4192 GEN_VEXT_CMP_VF(vmfge_vf_h, uint16_t, H2, vmfge16)
4193 GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32)
4194 GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64)
4196 /* Vector Floating-Point Classify Instruction */
4197 target_ulong fclass_h(uint64_t frs1)
4199 float16 f = frs1;
4200 bool sign = float16_is_neg(f);
4202 if (float16_is_infinity(f)) {
4203 return sign ? 1 << 0 : 1 << 7;
4204 } else if (float16_is_zero(f)) {
4205 return sign ? 1 << 3 : 1 << 4;
4206 } else if (float16_is_zero_or_denormal(f)) {
4207 return sign ? 1 << 2 : 1 << 5;
4208 } else if (float16_is_any_nan(f)) {
4209 float_status s = { }; /* for snan_bit_is_one */
4210 return float16_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4211 } else {
4212 return sign ? 1 << 1 : 1 << 6;
4216 target_ulong fclass_s(uint64_t frs1)
4218 float32 f = frs1;
4219 bool sign = float32_is_neg(f);
4221 if (float32_is_infinity(f)) {
4222 return sign ? 1 << 0 : 1 << 7;
4223 } else if (float32_is_zero(f)) {
4224 return sign ? 1 << 3 : 1 << 4;
4225 } else if (float32_is_zero_or_denormal(f)) {
4226 return sign ? 1 << 2 : 1 << 5;
4227 } else if (float32_is_any_nan(f)) {
4228 float_status s = { }; /* for snan_bit_is_one */
4229 return float32_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4230 } else {
4231 return sign ? 1 << 1 : 1 << 6;
4235 target_ulong fclass_d(uint64_t frs1)
4237 float64 f = frs1;
4238 bool sign = float64_is_neg(f);
4240 if (float64_is_infinity(f)) {
4241 return sign ? 1 << 0 : 1 << 7;
4242 } else if (float64_is_zero(f)) {
4243 return sign ? 1 << 3 : 1 << 4;
4244 } else if (float64_is_zero_or_denormal(f)) {
4245 return sign ? 1 << 2 : 1 << 5;
4246 } else if (float64_is_any_nan(f)) {
4247 float_status s = { }; /* for snan_bit_is_one */
4248 return float64_is_quiet_nan(f, &s) ? 1 << 9 : 1 << 8;
4249 } else {
4250 return sign ? 1 << 1 : 1 << 6;
4254 RVVCALL(OPIVV1, vfclass_v_h, OP_UU_H, H2, H2, fclass_h)
4255 RVVCALL(OPIVV1, vfclass_v_w, OP_UU_W, H4, H4, fclass_s)
4256 RVVCALL(OPIVV1, vfclass_v_d, OP_UU_D, H8, H8, fclass_d)
4257 GEN_VEXT_V(vfclass_v_h, 2)
4258 GEN_VEXT_V(vfclass_v_w, 4)
4259 GEN_VEXT_V(vfclass_v_d, 8)
4261 /* Vector Floating-Point Merge Instruction */
4263 #define GEN_VFMERGE_VF(NAME, ETYPE, H) \
4264 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
4265 CPURISCVState *env, uint32_t desc) \
4267 uint32_t vm = vext_vm(desc); \
4268 uint32_t vl = env->vl; \
4269 uint32_t esz = sizeof(ETYPE); \
4270 uint32_t total_elems = \
4271 vext_get_total_elems(env, desc, esz); \
4272 uint32_t vta = vext_vta(desc); \
4273 uint32_t i; \
4275 VSTART_CHECK_EARLY_EXIT(env); \
4277 for (i = env->vstart; i < vl; i++) { \
4278 ETYPE s2 = *((ETYPE *)vs2 + H(i)); \
4279 *((ETYPE *)vd + H(i)) = \
4280 (!vm && !vext_elem_mask(v0, i) ? s2 : s1); \
4282 env->vstart = 0; \
4283 /* set tail elements to 1s */ \
4284 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4287 GEN_VFMERGE_VF(vfmerge_vfm_h, int16_t, H2)
4288 GEN_VFMERGE_VF(vfmerge_vfm_w, int32_t, H4)
4289 GEN_VFMERGE_VF(vfmerge_vfm_d, int64_t, H8)
4291 /* Single-Width Floating-Point/Integer Type-Convert Instructions */
4292 /* vfcvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4293 RVVCALL(OPFVV1, vfcvt_xu_f_v_h, OP_UU_H, H2, H2, float16_to_uint16)
4294 RVVCALL(OPFVV1, vfcvt_xu_f_v_w, OP_UU_W, H4, H4, float32_to_uint32)
4295 RVVCALL(OPFVV1, vfcvt_xu_f_v_d, OP_UU_D, H8, H8, float64_to_uint64)
4296 GEN_VEXT_V_ENV(vfcvt_xu_f_v_h, 2)
4297 GEN_VEXT_V_ENV(vfcvt_xu_f_v_w, 4)
4298 GEN_VEXT_V_ENV(vfcvt_xu_f_v_d, 8)
4300 /* vfcvt.x.f.v vd, vs2, vm # Convert float to signed integer. */
4301 RVVCALL(OPFVV1, vfcvt_x_f_v_h, OP_UU_H, H2, H2, float16_to_int16)
4302 RVVCALL(OPFVV1, vfcvt_x_f_v_w, OP_UU_W, H4, H4, float32_to_int32)
4303 RVVCALL(OPFVV1, vfcvt_x_f_v_d, OP_UU_D, H8, H8, float64_to_int64)
4304 GEN_VEXT_V_ENV(vfcvt_x_f_v_h, 2)
4305 GEN_VEXT_V_ENV(vfcvt_x_f_v_w, 4)
4306 GEN_VEXT_V_ENV(vfcvt_x_f_v_d, 8)
4308 /* vfcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to float. */
4309 RVVCALL(OPFVV1, vfcvt_f_xu_v_h, OP_UU_H, H2, H2, uint16_to_float16)
4310 RVVCALL(OPFVV1, vfcvt_f_xu_v_w, OP_UU_W, H4, H4, uint32_to_float32)
4311 RVVCALL(OPFVV1, vfcvt_f_xu_v_d, OP_UU_D, H8, H8, uint64_to_float64)
4312 GEN_VEXT_V_ENV(vfcvt_f_xu_v_h, 2)
4313 GEN_VEXT_V_ENV(vfcvt_f_xu_v_w, 4)
4314 GEN_VEXT_V_ENV(vfcvt_f_xu_v_d, 8)
4316 /* vfcvt.f.x.v vd, vs2, vm # Convert integer to float. */
4317 RVVCALL(OPFVV1, vfcvt_f_x_v_h, OP_UU_H, H2, H2, int16_to_float16)
4318 RVVCALL(OPFVV1, vfcvt_f_x_v_w, OP_UU_W, H4, H4, int32_to_float32)
4319 RVVCALL(OPFVV1, vfcvt_f_x_v_d, OP_UU_D, H8, H8, int64_to_float64)
4320 GEN_VEXT_V_ENV(vfcvt_f_x_v_h, 2)
4321 GEN_VEXT_V_ENV(vfcvt_f_x_v_w, 4)
4322 GEN_VEXT_V_ENV(vfcvt_f_x_v_d, 8)
4324 /* Widening Floating-Point/Integer Type-Convert Instructions */
4325 /* (TD, T2, TX2) */
4326 #define WOP_UU_B uint16_t, uint8_t, uint8_t
4327 #define WOP_UU_H uint32_t, uint16_t, uint16_t
4328 #define WOP_UU_W uint64_t, uint32_t, uint32_t
4330 * vfwcvt.xu.f.v vd, vs2, vm # Convert float to double-width unsigned integer.
4332 RVVCALL(OPFVV1, vfwcvt_xu_f_v_h, WOP_UU_H, H4, H2, float16_to_uint32)
4333 RVVCALL(OPFVV1, vfwcvt_xu_f_v_w, WOP_UU_W, H8, H4, float32_to_uint64)
4334 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_h, 4)
4335 GEN_VEXT_V_ENV(vfwcvt_xu_f_v_w, 8)
4337 /* vfwcvt.x.f.v vd, vs2, vm # Convert float to double-width signed integer. */
4338 RVVCALL(OPFVV1, vfwcvt_x_f_v_h, WOP_UU_H, H4, H2, float16_to_int32)
4339 RVVCALL(OPFVV1, vfwcvt_x_f_v_w, WOP_UU_W, H8, H4, float32_to_int64)
4340 GEN_VEXT_V_ENV(vfwcvt_x_f_v_h, 4)
4341 GEN_VEXT_V_ENV(vfwcvt_x_f_v_w, 8)
4344 * vfwcvt.f.xu.v vd, vs2, vm # Convert unsigned integer to double-width float.
4346 RVVCALL(OPFVV1, vfwcvt_f_xu_v_b, WOP_UU_B, H2, H1, uint8_to_float16)
4347 RVVCALL(OPFVV1, vfwcvt_f_xu_v_h, WOP_UU_H, H4, H2, uint16_to_float32)
4348 RVVCALL(OPFVV1, vfwcvt_f_xu_v_w, WOP_UU_W, H8, H4, uint32_to_float64)
4349 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_b, 2)
4350 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_h, 4)
4351 GEN_VEXT_V_ENV(vfwcvt_f_xu_v_w, 8)
4353 /* vfwcvt.f.x.v vd, vs2, vm # Convert integer to double-width float. */
4354 RVVCALL(OPFVV1, vfwcvt_f_x_v_b, WOP_UU_B, H2, H1, int8_to_float16)
4355 RVVCALL(OPFVV1, vfwcvt_f_x_v_h, WOP_UU_H, H4, H2, int16_to_float32)
4356 RVVCALL(OPFVV1, vfwcvt_f_x_v_w, WOP_UU_W, H8, H4, int32_to_float64)
4357 GEN_VEXT_V_ENV(vfwcvt_f_x_v_b, 2)
4358 GEN_VEXT_V_ENV(vfwcvt_f_x_v_h, 4)
4359 GEN_VEXT_V_ENV(vfwcvt_f_x_v_w, 8)
4362 * vfwcvt.f.f.v vd, vs2, vm # Convert single-width float to double-width float.
4364 static uint32_t vfwcvtffv16(uint16_t a, float_status *s)
4366 return float16_to_float32(a, true, s);
4369 RVVCALL(OPFVV1, vfwcvt_f_f_v_h, WOP_UU_H, H4, H2, vfwcvtffv16)
4370 RVVCALL(OPFVV1, vfwcvt_f_f_v_w, WOP_UU_W, H8, H4, float32_to_float64)
4371 GEN_VEXT_V_ENV(vfwcvt_f_f_v_h, 4)
4372 GEN_VEXT_V_ENV(vfwcvt_f_f_v_w, 8)
4374 RVVCALL(OPFVV1, vfwcvtbf16_f_f_v, WOP_UU_H, H4, H2, bfloat16_to_float32)
4375 GEN_VEXT_V_ENV(vfwcvtbf16_f_f_v, 4)
4377 /* Narrowing Floating-Point/Integer Type-Convert Instructions */
4378 /* (TD, T2, TX2) */
4379 #define NOP_UU_B uint8_t, uint16_t, uint32_t
4380 #define NOP_UU_H uint16_t, uint32_t, uint32_t
4381 #define NOP_UU_W uint32_t, uint64_t, uint64_t
4382 /* vfncvt.xu.f.v vd, vs2, vm # Convert float to unsigned integer. */
4383 RVVCALL(OPFVV1, vfncvt_xu_f_w_b, NOP_UU_B, H1, H2, float16_to_uint8)
4384 RVVCALL(OPFVV1, vfncvt_xu_f_w_h, NOP_UU_H, H2, H4, float32_to_uint16)
4385 RVVCALL(OPFVV1, vfncvt_xu_f_w_w, NOP_UU_W, H4, H8, float64_to_uint32)
4386 GEN_VEXT_V_ENV(vfncvt_xu_f_w_b, 1)
4387 GEN_VEXT_V_ENV(vfncvt_xu_f_w_h, 2)
4388 GEN_VEXT_V_ENV(vfncvt_xu_f_w_w, 4)
4390 /* vfncvt.x.f.v vd, vs2, vm # Convert double-width float to signed integer. */
4391 RVVCALL(OPFVV1, vfncvt_x_f_w_b, NOP_UU_B, H1, H2, float16_to_int8)
4392 RVVCALL(OPFVV1, vfncvt_x_f_w_h, NOP_UU_H, H2, H4, float32_to_int16)
4393 RVVCALL(OPFVV1, vfncvt_x_f_w_w, NOP_UU_W, H4, H8, float64_to_int32)
4394 GEN_VEXT_V_ENV(vfncvt_x_f_w_b, 1)
4395 GEN_VEXT_V_ENV(vfncvt_x_f_w_h, 2)
4396 GEN_VEXT_V_ENV(vfncvt_x_f_w_w, 4)
4399 * vfncvt.f.xu.v vd, vs2, vm # Convert double-width unsigned integer to float.
4401 RVVCALL(OPFVV1, vfncvt_f_xu_w_h, NOP_UU_H, H2, H4, uint32_to_float16)
4402 RVVCALL(OPFVV1, vfncvt_f_xu_w_w, NOP_UU_W, H4, H8, uint64_to_float32)
4403 GEN_VEXT_V_ENV(vfncvt_f_xu_w_h, 2)
4404 GEN_VEXT_V_ENV(vfncvt_f_xu_w_w, 4)
4406 /* vfncvt.f.x.v vd, vs2, vm # Convert double-width integer to float. */
4407 RVVCALL(OPFVV1, vfncvt_f_x_w_h, NOP_UU_H, H2, H4, int32_to_float16)
4408 RVVCALL(OPFVV1, vfncvt_f_x_w_w, NOP_UU_W, H4, H8, int64_to_float32)
4409 GEN_VEXT_V_ENV(vfncvt_f_x_w_h, 2)
4410 GEN_VEXT_V_ENV(vfncvt_f_x_w_w, 4)
4412 /* vfncvt.f.f.v vd, vs2, vm # Convert double float to single-width float. */
4413 static uint16_t vfncvtffv16(uint32_t a, float_status *s)
4415 return float32_to_float16(a, true, s);
4418 RVVCALL(OPFVV1, vfncvt_f_f_w_h, NOP_UU_H, H2, H4, vfncvtffv16)
4419 RVVCALL(OPFVV1, vfncvt_f_f_w_w, NOP_UU_W, H4, H8, float64_to_float32)
4420 GEN_VEXT_V_ENV(vfncvt_f_f_w_h, 2)
4421 GEN_VEXT_V_ENV(vfncvt_f_f_w_w, 4)
4423 RVVCALL(OPFVV1, vfncvtbf16_f_f_w, NOP_UU_H, H2, H4, float32_to_bfloat16)
4424 GEN_VEXT_V_ENV(vfncvtbf16_f_f_w, 2)
4427 * Vector Reduction Operations
4429 /* Vector Single-Width Integer Reduction Instructions */
4430 #define GEN_VEXT_RED(NAME, TD, TS2, HD, HS2, OP) \
4431 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4432 void *vs2, CPURISCVState *env, \
4433 uint32_t desc) \
4435 uint32_t vm = vext_vm(desc); \
4436 uint32_t vl = env->vl; \
4437 uint32_t esz = sizeof(TD); \
4438 uint32_t vlenb = simd_maxsz(desc); \
4439 uint32_t vta = vext_vta(desc); \
4440 uint32_t i; \
4441 TD s1 = *((TD *)vs1 + HD(0)); \
4443 for (i = env->vstart; i < vl; i++) { \
4444 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4445 if (!vm && !vext_elem_mask(v0, i)) { \
4446 continue; \
4448 s1 = OP(s1, (TD)s2); \
4450 *((TD *)vd + HD(0)) = s1; \
4451 env->vstart = 0; \
4452 /* set tail elements to 1s */ \
4453 vext_set_elems_1s(vd, vta, esz, vlenb); \
4456 /* vd[0] = sum(vs1[0], vs2[*]) */
4457 GEN_VEXT_RED(vredsum_vs_b, int8_t, int8_t, H1, H1, DO_ADD)
4458 GEN_VEXT_RED(vredsum_vs_h, int16_t, int16_t, H2, H2, DO_ADD)
4459 GEN_VEXT_RED(vredsum_vs_w, int32_t, int32_t, H4, H4, DO_ADD)
4460 GEN_VEXT_RED(vredsum_vs_d, int64_t, int64_t, H8, H8, DO_ADD)
4462 /* vd[0] = maxu(vs1[0], vs2[*]) */
4463 GEN_VEXT_RED(vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, DO_MAX)
4464 GEN_VEXT_RED(vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, DO_MAX)
4465 GEN_VEXT_RED(vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, DO_MAX)
4466 GEN_VEXT_RED(vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, DO_MAX)
4468 /* vd[0] = max(vs1[0], vs2[*]) */
4469 GEN_VEXT_RED(vredmax_vs_b, int8_t, int8_t, H1, H1, DO_MAX)
4470 GEN_VEXT_RED(vredmax_vs_h, int16_t, int16_t, H2, H2, DO_MAX)
4471 GEN_VEXT_RED(vredmax_vs_w, int32_t, int32_t, H4, H4, DO_MAX)
4472 GEN_VEXT_RED(vredmax_vs_d, int64_t, int64_t, H8, H8, DO_MAX)
4474 /* vd[0] = minu(vs1[0], vs2[*]) */
4475 GEN_VEXT_RED(vredminu_vs_b, uint8_t, uint8_t, H1, H1, DO_MIN)
4476 GEN_VEXT_RED(vredminu_vs_h, uint16_t, uint16_t, H2, H2, DO_MIN)
4477 GEN_VEXT_RED(vredminu_vs_w, uint32_t, uint32_t, H4, H4, DO_MIN)
4478 GEN_VEXT_RED(vredminu_vs_d, uint64_t, uint64_t, H8, H8, DO_MIN)
4480 /* vd[0] = min(vs1[0], vs2[*]) */
4481 GEN_VEXT_RED(vredmin_vs_b, int8_t, int8_t, H1, H1, DO_MIN)
4482 GEN_VEXT_RED(vredmin_vs_h, int16_t, int16_t, H2, H2, DO_MIN)
4483 GEN_VEXT_RED(vredmin_vs_w, int32_t, int32_t, H4, H4, DO_MIN)
4484 GEN_VEXT_RED(vredmin_vs_d, int64_t, int64_t, H8, H8, DO_MIN)
4486 /* vd[0] = and(vs1[0], vs2[*]) */
4487 GEN_VEXT_RED(vredand_vs_b, int8_t, int8_t, H1, H1, DO_AND)
4488 GEN_VEXT_RED(vredand_vs_h, int16_t, int16_t, H2, H2, DO_AND)
4489 GEN_VEXT_RED(vredand_vs_w, int32_t, int32_t, H4, H4, DO_AND)
4490 GEN_VEXT_RED(vredand_vs_d, int64_t, int64_t, H8, H8, DO_AND)
4492 /* vd[0] = or(vs1[0], vs2[*]) */
4493 GEN_VEXT_RED(vredor_vs_b, int8_t, int8_t, H1, H1, DO_OR)
4494 GEN_VEXT_RED(vredor_vs_h, int16_t, int16_t, H2, H2, DO_OR)
4495 GEN_VEXT_RED(vredor_vs_w, int32_t, int32_t, H4, H4, DO_OR)
4496 GEN_VEXT_RED(vredor_vs_d, int64_t, int64_t, H8, H8, DO_OR)
4498 /* vd[0] = xor(vs1[0], vs2[*]) */
4499 GEN_VEXT_RED(vredxor_vs_b, int8_t, int8_t, H1, H1, DO_XOR)
4500 GEN_VEXT_RED(vredxor_vs_h, int16_t, int16_t, H2, H2, DO_XOR)
4501 GEN_VEXT_RED(vredxor_vs_w, int32_t, int32_t, H4, H4, DO_XOR)
4502 GEN_VEXT_RED(vredxor_vs_d, int64_t, int64_t, H8, H8, DO_XOR)
4504 /* Vector Widening Integer Reduction Instructions */
4505 /* signed sum reduction into double-width accumulator */
4506 GEN_VEXT_RED(vwredsum_vs_b, int16_t, int8_t, H2, H1, DO_ADD)
4507 GEN_VEXT_RED(vwredsum_vs_h, int32_t, int16_t, H4, H2, DO_ADD)
4508 GEN_VEXT_RED(vwredsum_vs_w, int64_t, int32_t, H8, H4, DO_ADD)
4510 /* Unsigned sum reduction into double-width accumulator */
4511 GEN_VEXT_RED(vwredsumu_vs_b, uint16_t, uint8_t, H2, H1, DO_ADD)
4512 GEN_VEXT_RED(vwredsumu_vs_h, uint32_t, uint16_t, H4, H2, DO_ADD)
4513 GEN_VEXT_RED(vwredsumu_vs_w, uint64_t, uint32_t, H8, H4, DO_ADD)
4515 /* Vector Single-Width Floating-Point Reduction Instructions */
4516 #define GEN_VEXT_FRED(NAME, TD, TS2, HD, HS2, OP) \
4517 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4518 void *vs2, CPURISCVState *env, \
4519 uint32_t desc) \
4521 uint32_t vm = vext_vm(desc); \
4522 uint32_t vl = env->vl; \
4523 uint32_t esz = sizeof(TD); \
4524 uint32_t vlenb = simd_maxsz(desc); \
4525 uint32_t vta = vext_vta(desc); \
4526 uint32_t i; \
4527 TD s1 = *((TD *)vs1 + HD(0)); \
4529 for (i = env->vstart; i < vl; i++) { \
4530 TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
4531 if (!vm && !vext_elem_mask(v0, i)) { \
4532 continue; \
4534 s1 = OP(s1, (TD)s2, &env->fp_status); \
4536 *((TD *)vd + HD(0)) = s1; \
4537 env->vstart = 0; \
4538 /* set tail elements to 1s */ \
4539 vext_set_elems_1s(vd, vta, esz, vlenb); \
4542 /* Unordered sum */
4543 GEN_VEXT_FRED(vfredusum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4544 GEN_VEXT_FRED(vfredusum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4545 GEN_VEXT_FRED(vfredusum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4547 /* Ordered sum */
4548 GEN_VEXT_FRED(vfredosum_vs_h, uint16_t, uint16_t, H2, H2, float16_add)
4549 GEN_VEXT_FRED(vfredosum_vs_w, uint32_t, uint32_t, H4, H4, float32_add)
4550 GEN_VEXT_FRED(vfredosum_vs_d, uint64_t, uint64_t, H8, H8, float64_add)
4552 /* Maximum value */
4553 GEN_VEXT_FRED(vfredmax_vs_h, uint16_t, uint16_t, H2, H2,
4554 float16_maximum_number)
4555 GEN_VEXT_FRED(vfredmax_vs_w, uint32_t, uint32_t, H4, H4,
4556 float32_maximum_number)
4557 GEN_VEXT_FRED(vfredmax_vs_d, uint64_t, uint64_t, H8, H8,
4558 float64_maximum_number)
4560 /* Minimum value */
4561 GEN_VEXT_FRED(vfredmin_vs_h, uint16_t, uint16_t, H2, H2,
4562 float16_minimum_number)
4563 GEN_VEXT_FRED(vfredmin_vs_w, uint32_t, uint32_t, H4, H4,
4564 float32_minimum_number)
4565 GEN_VEXT_FRED(vfredmin_vs_d, uint64_t, uint64_t, H8, H8,
4566 float64_minimum_number)
4568 /* Vector Widening Floating-Point Add Instructions */
4569 static uint32_t fwadd16(uint32_t a, uint16_t b, float_status *s)
4571 return float32_add(a, float16_to_float32(b, true, s), s);
4574 static uint64_t fwadd32(uint64_t a, uint32_t b, float_status *s)
4576 return float64_add(a, float32_to_float64(b, s), s);
4579 /* Vector Widening Floating-Point Reduction Instructions */
4580 /* Ordered/unordered reduce 2*SEW = 2*SEW + sum(promote(SEW)) */
4581 GEN_VEXT_FRED(vfwredusum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4582 GEN_VEXT_FRED(vfwredusum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4583 GEN_VEXT_FRED(vfwredosum_vs_h, uint32_t, uint16_t, H4, H2, fwadd16)
4584 GEN_VEXT_FRED(vfwredosum_vs_w, uint64_t, uint32_t, H8, H4, fwadd32)
4587 * Vector Mask Operations
4589 /* Vector Mask-Register Logical Instructions */
4590 #define GEN_VEXT_MASK_VV(NAME, OP) \
4591 void HELPER(NAME)(void *vd, void *v0, void *vs1, \
4592 void *vs2, CPURISCVState *env, \
4593 uint32_t desc) \
4595 uint32_t vl = env->vl; \
4596 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;\
4597 uint32_t vta_all_1s = vext_vta_all_1s(desc); \
4598 uint32_t i; \
4599 int a, b; \
4601 VSTART_CHECK_EARLY_EXIT(env); \
4603 for (i = env->vstart; i < vl; i++) { \
4604 a = vext_elem_mask(vs1, i); \
4605 b = vext_elem_mask(vs2, i); \
4606 vext_set_elem_mask(vd, i, OP(b, a)); \
4608 env->vstart = 0; \
4610 * mask destination register are always tail-agnostic
4611 * set tail elements to 1s
4612 */ \
4613 if (vta_all_1s) { \
4614 for (; i < total_elems; i++) { \
4615 vext_set_elem_mask(vd, i, 1); \
4620 #define DO_NAND(N, M) (!(N & M))
4621 #define DO_ANDNOT(N, M) (N & !M)
4622 #define DO_NOR(N, M) (!(N | M))
4623 #define DO_ORNOT(N, M) (N | !M)
4624 #define DO_XNOR(N, M) (!(N ^ M))
4626 GEN_VEXT_MASK_VV(vmand_mm, DO_AND)
4627 GEN_VEXT_MASK_VV(vmnand_mm, DO_NAND)
4628 GEN_VEXT_MASK_VV(vmandn_mm, DO_ANDNOT)
4629 GEN_VEXT_MASK_VV(vmxor_mm, DO_XOR)
4630 GEN_VEXT_MASK_VV(vmor_mm, DO_OR)
4631 GEN_VEXT_MASK_VV(vmnor_mm, DO_NOR)
4632 GEN_VEXT_MASK_VV(vmorn_mm, DO_ORNOT)
4633 GEN_VEXT_MASK_VV(vmxnor_mm, DO_XNOR)
4635 /* Vector count population in mask vcpop */
4636 target_ulong HELPER(vcpop_m)(void *v0, void *vs2, CPURISCVState *env,
4637 uint32_t desc)
4639 target_ulong cnt = 0;
4640 uint32_t vm = vext_vm(desc);
4641 uint32_t vl = env->vl;
4642 int i;
4644 for (i = env->vstart; i < vl; i++) {
4645 if (vm || vext_elem_mask(v0, i)) {
4646 if (vext_elem_mask(vs2, i)) {
4647 cnt++;
4651 env->vstart = 0;
4652 return cnt;
4655 /* vfirst find-first-set mask bit */
4656 target_ulong HELPER(vfirst_m)(void *v0, void *vs2, CPURISCVState *env,
4657 uint32_t desc)
4659 uint32_t vm = vext_vm(desc);
4660 uint32_t vl = env->vl;
4661 int i;
4663 for (i = env->vstart; i < vl; i++) {
4664 if (vm || vext_elem_mask(v0, i)) {
4665 if (vext_elem_mask(vs2, i)) {
4666 return i;
4670 env->vstart = 0;
4671 return -1LL;
4674 enum set_mask_type {
4675 ONLY_FIRST = 1,
4676 INCLUDE_FIRST,
4677 BEFORE_FIRST,
4680 static void vmsetm(void *vd, void *v0, void *vs2, CPURISCVState *env,
4681 uint32_t desc, enum set_mask_type type)
4683 uint32_t vm = vext_vm(desc);
4684 uint32_t vl = env->vl;
4685 uint32_t total_elems = riscv_cpu_cfg(env)->vlenb << 3;
4686 uint32_t vta_all_1s = vext_vta_all_1s(desc);
4687 uint32_t vma = vext_vma(desc);
4688 int i;
4689 bool first_mask_bit = false;
4691 for (i = env->vstart; i < vl; i++) {
4692 if (!vm && !vext_elem_mask(v0, i)) {
4693 /* set masked-off elements to 1s */
4694 if (vma) {
4695 vext_set_elem_mask(vd, i, 1);
4697 continue;
4699 /* write a zero to all following active elements */
4700 if (first_mask_bit) {
4701 vext_set_elem_mask(vd, i, 0);
4702 continue;
4704 if (vext_elem_mask(vs2, i)) {
4705 first_mask_bit = true;
4706 if (type == BEFORE_FIRST) {
4707 vext_set_elem_mask(vd, i, 0);
4708 } else {
4709 vext_set_elem_mask(vd, i, 1);
4711 } else {
4712 if (type == ONLY_FIRST) {
4713 vext_set_elem_mask(vd, i, 0);
4714 } else {
4715 vext_set_elem_mask(vd, i, 1);
4719 env->vstart = 0;
4721 * mask destination register are always tail-agnostic
4722 * set tail elements to 1s
4724 if (vta_all_1s) {
4725 for (; i < total_elems; i++) {
4726 vext_set_elem_mask(vd, i, 1);
4731 void HELPER(vmsbf_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4732 uint32_t desc)
4734 vmsetm(vd, v0, vs2, env, desc, BEFORE_FIRST);
4737 void HELPER(vmsif_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4738 uint32_t desc)
4740 vmsetm(vd, v0, vs2, env, desc, INCLUDE_FIRST);
4743 void HELPER(vmsof_m)(void *vd, void *v0, void *vs2, CPURISCVState *env,
4744 uint32_t desc)
4746 vmsetm(vd, v0, vs2, env, desc, ONLY_FIRST);
4749 /* Vector Iota Instruction */
4750 #define GEN_VEXT_VIOTA_M(NAME, ETYPE, H) \
4751 void HELPER(NAME)(void *vd, void *v0, void *vs2, CPURISCVState *env, \
4752 uint32_t desc) \
4754 uint32_t vm = vext_vm(desc); \
4755 uint32_t vl = env->vl; \
4756 uint32_t esz = sizeof(ETYPE); \
4757 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4758 uint32_t vta = vext_vta(desc); \
4759 uint32_t vma = vext_vma(desc); \
4760 uint32_t sum = 0; \
4761 int i; \
4763 for (i = env->vstart; i < vl; i++) { \
4764 if (!vm && !vext_elem_mask(v0, i)) { \
4765 /* set masked-off elements to 1s */ \
4766 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4767 continue; \
4769 *((ETYPE *)vd + H(i)) = sum; \
4770 if (vext_elem_mask(vs2, i)) { \
4771 sum++; \
4774 env->vstart = 0; \
4775 /* set tail elements to 1s */ \
4776 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4779 GEN_VEXT_VIOTA_M(viota_m_b, uint8_t, H1)
4780 GEN_VEXT_VIOTA_M(viota_m_h, uint16_t, H2)
4781 GEN_VEXT_VIOTA_M(viota_m_w, uint32_t, H4)
4782 GEN_VEXT_VIOTA_M(viota_m_d, uint64_t, H8)
4784 /* Vector Element Index Instruction */
4785 #define GEN_VEXT_VID_V(NAME, ETYPE, H) \
4786 void HELPER(NAME)(void *vd, void *v0, CPURISCVState *env, uint32_t desc) \
4788 uint32_t vm = vext_vm(desc); \
4789 uint32_t vl = env->vl; \
4790 uint32_t esz = sizeof(ETYPE); \
4791 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4792 uint32_t vta = vext_vta(desc); \
4793 uint32_t vma = vext_vma(desc); \
4794 int i; \
4796 VSTART_CHECK_EARLY_EXIT(env); \
4798 for (i = env->vstart; i < vl; i++) { \
4799 if (!vm && !vext_elem_mask(v0, i)) { \
4800 /* set masked-off elements to 1s */ \
4801 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4802 continue; \
4804 *((ETYPE *)vd + H(i)) = i; \
4806 env->vstart = 0; \
4807 /* set tail elements to 1s */ \
4808 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4811 GEN_VEXT_VID_V(vid_v_b, uint8_t, H1)
4812 GEN_VEXT_VID_V(vid_v_h, uint16_t, H2)
4813 GEN_VEXT_VID_V(vid_v_w, uint32_t, H4)
4814 GEN_VEXT_VID_V(vid_v_d, uint64_t, H8)
4817 * Vector Permutation Instructions
4820 /* Vector Slide Instructions */
4821 #define GEN_VEXT_VSLIDEUP_VX(NAME, ETYPE, H) \
4822 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4823 CPURISCVState *env, uint32_t desc) \
4825 uint32_t vm = vext_vm(desc); \
4826 uint32_t vl = env->vl; \
4827 uint32_t esz = sizeof(ETYPE); \
4828 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4829 uint32_t vta = vext_vta(desc); \
4830 uint32_t vma = vext_vma(desc); \
4831 target_ulong offset = s1, i_min, i; \
4833 VSTART_CHECK_EARLY_EXIT(env); \
4835 i_min = MAX(env->vstart, offset); \
4836 for (i = i_min; i < vl; i++) { \
4837 if (!vm && !vext_elem_mask(v0, i)) { \
4838 /* set masked-off elements to 1s */ \
4839 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4840 continue; \
4842 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - offset)); \
4844 env->vstart = 0; \
4845 /* set tail elements to 1s */ \
4846 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4849 /* vslideup.vx vd, vs2, rs1, vm # vd[i+rs1] = vs2[i] */
4850 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_b, uint8_t, H1)
4851 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_h, uint16_t, H2)
4852 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_w, uint32_t, H4)
4853 GEN_VEXT_VSLIDEUP_VX(vslideup_vx_d, uint64_t, H8)
4855 #define GEN_VEXT_VSLIDEDOWN_VX(NAME, ETYPE, H) \
4856 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4857 CPURISCVState *env, uint32_t desc) \
4859 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
4860 uint32_t vm = vext_vm(desc); \
4861 uint32_t vl = env->vl; \
4862 uint32_t esz = sizeof(ETYPE); \
4863 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4864 uint32_t vta = vext_vta(desc); \
4865 uint32_t vma = vext_vma(desc); \
4866 target_ulong i_max, i_min, i; \
4868 VSTART_CHECK_EARLY_EXIT(env); \
4870 i_min = MIN(s1 < vlmax ? vlmax - s1 : 0, vl); \
4871 i_max = MAX(i_min, env->vstart); \
4872 for (i = env->vstart; i < i_max; ++i) { \
4873 if (!vm && !vext_elem_mask(v0, i)) { \
4874 /* set masked-off elements to 1s */ \
4875 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4876 continue; \
4878 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + s1)); \
4881 for (i = i_max; i < vl; ++i) { \
4882 if (vm || vext_elem_mask(v0, i)) { \
4883 *((ETYPE *)vd + H(i)) = 0; \
4887 env->vstart = 0; \
4888 /* set tail elements to 1s */ \
4889 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4892 /* vslidedown.vx vd, vs2, rs1, vm # vd[i] = vs2[i+rs1] */
4893 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_b, uint8_t, H1)
4894 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_h, uint16_t, H2)
4895 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_w, uint32_t, H4)
4896 GEN_VEXT_VSLIDEDOWN_VX(vslidedown_vx_d, uint64_t, H8)
4898 #define GEN_VEXT_VSLIE1UP(BITWIDTH, H) \
4899 static void vslide1up_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
4900 void *vs2, CPURISCVState *env, \
4901 uint32_t desc) \
4903 typedef uint##BITWIDTH##_t ETYPE; \
4904 uint32_t vm = vext_vm(desc); \
4905 uint32_t vl = env->vl; \
4906 uint32_t esz = sizeof(ETYPE); \
4907 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4908 uint32_t vta = vext_vta(desc); \
4909 uint32_t vma = vext_vma(desc); \
4910 uint32_t i; \
4912 VSTART_CHECK_EARLY_EXIT(env); \
4914 for (i = env->vstart; i < vl; i++) { \
4915 if (!vm && !vext_elem_mask(v0, i)) { \
4916 /* set masked-off elements to 1s */ \
4917 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4918 continue; \
4920 if (i == 0) { \
4921 *((ETYPE *)vd + H(i)) = s1; \
4922 } else { \
4923 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i - 1)); \
4926 env->vstart = 0; \
4927 /* set tail elements to 1s */ \
4928 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4931 GEN_VEXT_VSLIE1UP(8, H1)
4932 GEN_VEXT_VSLIE1UP(16, H2)
4933 GEN_VEXT_VSLIE1UP(32, H4)
4934 GEN_VEXT_VSLIE1UP(64, H8)
4936 #define GEN_VEXT_VSLIDE1UP_VX(NAME, BITWIDTH) \
4937 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4938 CPURISCVState *env, uint32_t desc) \
4940 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4943 /* vslide1up.vx vd, vs2, rs1, vm # vd[0]=x[rs1], vd[i+1] = vs2[i] */
4944 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_b, 8)
4945 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_h, 16)
4946 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_w, 32)
4947 GEN_VEXT_VSLIDE1UP_VX(vslide1up_vx_d, 64)
4949 #define GEN_VEXT_VSLIDE1DOWN(BITWIDTH, H) \
4950 static void vslide1down_##BITWIDTH(void *vd, void *v0, uint64_t s1, \
4951 void *vs2, CPURISCVState *env, \
4952 uint32_t desc) \
4954 typedef uint##BITWIDTH##_t ETYPE; \
4955 uint32_t vm = vext_vm(desc); \
4956 uint32_t vl = env->vl; \
4957 uint32_t esz = sizeof(ETYPE); \
4958 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
4959 uint32_t vta = vext_vta(desc); \
4960 uint32_t vma = vext_vma(desc); \
4961 uint32_t i; \
4963 VSTART_CHECK_EARLY_EXIT(env); \
4965 for (i = env->vstart; i < vl; i++) { \
4966 if (!vm && !vext_elem_mask(v0, i)) { \
4967 /* set masked-off elements to 1s */ \
4968 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
4969 continue; \
4971 if (i == vl - 1) { \
4972 *((ETYPE *)vd + H(i)) = s1; \
4973 } else { \
4974 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(i + 1)); \
4977 env->vstart = 0; \
4978 /* set tail elements to 1s */ \
4979 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
4982 GEN_VEXT_VSLIDE1DOWN(8, H1)
4983 GEN_VEXT_VSLIDE1DOWN(16, H2)
4984 GEN_VEXT_VSLIDE1DOWN(32, H4)
4985 GEN_VEXT_VSLIDE1DOWN(64, H8)
4987 #define GEN_VEXT_VSLIDE1DOWN_VX(NAME, BITWIDTH) \
4988 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
4989 CPURISCVState *env, uint32_t desc) \
4991 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
4994 /* vslide1down.vx vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=x[rs1] */
4995 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_b, 8)
4996 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_h, 16)
4997 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_w, 32)
4998 GEN_VEXT_VSLIDE1DOWN_VX(vslide1down_vx_d, 64)
5000 /* Vector Floating-Point Slide Instructions */
5001 #define GEN_VEXT_VFSLIDE1UP_VF(NAME, BITWIDTH) \
5002 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5003 CPURISCVState *env, uint32_t desc) \
5005 vslide1up_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5008 /* vfslide1up.vf vd, vs2, rs1, vm # vd[0]=f[rs1], vd[i+1] = vs2[i] */
5009 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_h, 16)
5010 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_w, 32)
5011 GEN_VEXT_VFSLIDE1UP_VF(vfslide1up_vf_d, 64)
5013 #define GEN_VEXT_VFSLIDE1DOWN_VF(NAME, BITWIDTH) \
5014 void HELPER(NAME)(void *vd, void *v0, uint64_t s1, void *vs2, \
5015 CPURISCVState *env, uint32_t desc) \
5017 vslide1down_##BITWIDTH(vd, v0, s1, vs2, env, desc); \
5020 /* vfslide1down.vf vd, vs2, rs1, vm # vd[i] = vs2[i+1], vd[vl-1]=f[rs1] */
5021 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_h, 16)
5022 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_w, 32)
5023 GEN_VEXT_VFSLIDE1DOWN_VF(vfslide1down_vf_d, 64)
5025 /* Vector Register Gather Instruction */
5026 #define GEN_VEXT_VRGATHER_VV(NAME, TS1, TS2, HS1, HS2) \
5027 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5028 CPURISCVState *env, uint32_t desc) \
5030 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(TS2))); \
5031 uint32_t vm = vext_vm(desc); \
5032 uint32_t vl = env->vl; \
5033 uint32_t esz = sizeof(TS2); \
5034 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5035 uint32_t vta = vext_vta(desc); \
5036 uint32_t vma = vext_vma(desc); \
5037 uint64_t index; \
5038 uint32_t i; \
5040 VSTART_CHECK_EARLY_EXIT(env); \
5042 for (i = env->vstart; i < vl; i++) { \
5043 if (!vm && !vext_elem_mask(v0, i)) { \
5044 /* set masked-off elements to 1s */ \
5045 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5046 continue; \
5048 index = *((TS1 *)vs1 + HS1(i)); \
5049 if (index >= vlmax) { \
5050 *((TS2 *)vd + HS2(i)) = 0; \
5051 } else { \
5052 *((TS2 *)vd + HS2(i)) = *((TS2 *)vs2 + HS2(index)); \
5055 env->vstart = 0; \
5056 /* set tail elements to 1s */ \
5057 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5060 /* vd[i] = (vs1[i] >= VLMAX) ? 0 : vs2[vs1[i]]; */
5061 GEN_VEXT_VRGATHER_VV(vrgather_vv_b, uint8_t, uint8_t, H1, H1)
5062 GEN_VEXT_VRGATHER_VV(vrgather_vv_h, uint16_t, uint16_t, H2, H2)
5063 GEN_VEXT_VRGATHER_VV(vrgather_vv_w, uint32_t, uint32_t, H4, H4)
5064 GEN_VEXT_VRGATHER_VV(vrgather_vv_d, uint64_t, uint64_t, H8, H8)
5066 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_b, uint16_t, uint8_t, H2, H1)
5067 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_h, uint16_t, uint16_t, H2, H2)
5068 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_w, uint16_t, uint32_t, H2, H4)
5069 GEN_VEXT_VRGATHER_VV(vrgatherei16_vv_d, uint16_t, uint64_t, H2, H8)
5071 #define GEN_VEXT_VRGATHER_VX(NAME, ETYPE, H) \
5072 void HELPER(NAME)(void *vd, void *v0, target_ulong s1, void *vs2, \
5073 CPURISCVState *env, uint32_t desc) \
5075 uint32_t vlmax = vext_max_elems(desc, ctzl(sizeof(ETYPE))); \
5076 uint32_t vm = vext_vm(desc); \
5077 uint32_t vl = env->vl; \
5078 uint32_t esz = sizeof(ETYPE); \
5079 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5080 uint32_t vta = vext_vta(desc); \
5081 uint32_t vma = vext_vma(desc); \
5082 uint64_t index = s1; \
5083 uint32_t i; \
5085 VSTART_CHECK_EARLY_EXIT(env); \
5087 for (i = env->vstart; i < vl; i++) { \
5088 if (!vm && !vext_elem_mask(v0, i)) { \
5089 /* set masked-off elements to 1s */ \
5090 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5091 continue; \
5093 if (index >= vlmax) { \
5094 *((ETYPE *)vd + H(i)) = 0; \
5095 } else { \
5096 *((ETYPE *)vd + H(i)) = *((ETYPE *)vs2 + H(index)); \
5099 env->vstart = 0; \
5100 /* set tail elements to 1s */ \
5101 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5104 /* vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
5105 GEN_VEXT_VRGATHER_VX(vrgather_vx_b, uint8_t, H1)
5106 GEN_VEXT_VRGATHER_VX(vrgather_vx_h, uint16_t, H2)
5107 GEN_VEXT_VRGATHER_VX(vrgather_vx_w, uint32_t, H4)
5108 GEN_VEXT_VRGATHER_VX(vrgather_vx_d, uint64_t, H8)
5110 /* Vector Compress Instruction */
5111 #define GEN_VEXT_VCOMPRESS_VM(NAME, ETYPE, H) \
5112 void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \
5113 CPURISCVState *env, uint32_t desc) \
5115 uint32_t vl = env->vl; \
5116 uint32_t esz = sizeof(ETYPE); \
5117 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5118 uint32_t vta = vext_vta(desc); \
5119 uint32_t num = 0, i; \
5121 for (i = env->vstart; i < vl; i++) { \
5122 if (!vext_elem_mask(vs1, i)) { \
5123 continue; \
5125 *((ETYPE *)vd + H(num)) = *((ETYPE *)vs2 + H(i)); \
5126 num++; \
5128 env->vstart = 0; \
5129 /* set tail elements to 1s */ \
5130 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5133 /* Compress into vd elements of vs2 where vs1 is enabled */
5134 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_b, uint8_t, H1)
5135 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_h, uint16_t, H2)
5136 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_w, uint32_t, H4)
5137 GEN_VEXT_VCOMPRESS_VM(vcompress_vm_d, uint64_t, H8)
5139 /* Vector Whole Register Move */
5140 void HELPER(vmvr_v)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc)
5142 /* EEW = SEW */
5143 uint32_t maxsz = simd_maxsz(desc);
5144 uint32_t sewb = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW);
5145 uint32_t startb = env->vstart * sewb;
5146 uint32_t i = startb;
5148 if (startb >= maxsz) {
5149 env->vstart = 0;
5150 return;
5153 if (HOST_BIG_ENDIAN && i % 8 != 0) {
5154 uint32_t j = ROUND_UP(i, 8);
5155 memcpy((uint8_t *)vd + H1(j - 1),
5156 (uint8_t *)vs2 + H1(j - 1),
5157 j - i);
5158 i = j;
5161 memcpy((uint8_t *)vd + H1(i),
5162 (uint8_t *)vs2 + H1(i),
5163 maxsz - i);
5165 env->vstart = 0;
5168 /* Vector Integer Extension */
5169 #define GEN_VEXT_INT_EXT(NAME, ETYPE, DTYPE, HD, HS1) \
5170 void HELPER(NAME)(void *vd, void *v0, void *vs2, \
5171 CPURISCVState *env, uint32_t desc) \
5173 uint32_t vl = env->vl; \
5174 uint32_t vm = vext_vm(desc); \
5175 uint32_t esz = sizeof(ETYPE); \
5176 uint32_t total_elems = vext_get_total_elems(env, desc, esz); \
5177 uint32_t vta = vext_vta(desc); \
5178 uint32_t vma = vext_vma(desc); \
5179 uint32_t i; \
5181 VSTART_CHECK_EARLY_EXIT(env); \
5183 for (i = env->vstart; i < vl; i++) { \
5184 if (!vm && !vext_elem_mask(v0, i)) { \
5185 /* set masked-off elements to 1s */ \
5186 vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); \
5187 continue; \
5189 *((ETYPE *)vd + HD(i)) = *((DTYPE *)vs2 + HS1(i)); \
5191 env->vstart = 0; \
5192 /* set tail elements to 1s */ \
5193 vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \
5196 GEN_VEXT_INT_EXT(vzext_vf2_h, uint16_t, uint8_t, H2, H1)
5197 GEN_VEXT_INT_EXT(vzext_vf2_w, uint32_t, uint16_t, H4, H2)
5198 GEN_VEXT_INT_EXT(vzext_vf2_d, uint64_t, uint32_t, H8, H4)
5199 GEN_VEXT_INT_EXT(vzext_vf4_w, uint32_t, uint8_t, H4, H1)
5200 GEN_VEXT_INT_EXT(vzext_vf4_d, uint64_t, uint16_t, H8, H2)
5201 GEN_VEXT_INT_EXT(vzext_vf8_d, uint64_t, uint8_t, H8, H1)
5203 GEN_VEXT_INT_EXT(vsext_vf2_h, int16_t, int8_t, H2, H1)
5204 GEN_VEXT_INT_EXT(vsext_vf2_w, int32_t, int16_t, H4, H2)
5205 GEN_VEXT_INT_EXT(vsext_vf2_d, int64_t, int32_t, H8, H4)
5206 GEN_VEXT_INT_EXT(vsext_vf4_w, int32_t, int8_t, H4, H1)
5207 GEN_VEXT_INT_EXT(vsext_vf4_d, int64_t, int16_t, H8, H2)
5208 GEN_VEXT_INT_EXT(vsext_vf8_d, int64_t, int8_t, H8, H1)