target/loongarch/kvm: fpu save the vreg registers high 192bit
[qemu/ar7.git] / target / hexagon / op_helper.c
blobda10ac5847696c12fdaab941037950e872743146
1 /*
2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 #include "qemu/osdep.h"
19 #include "qemu/log.h"
20 #include "exec/exec-all.h"
21 #include "exec/cpu_ldst.h"
22 #include "exec/helper-proto.h"
23 #include "fpu/softfloat.h"
24 #include "cpu.h"
25 #include "internal.h"
26 #include "macros.h"
27 #include "arch.h"
28 #include "hex_arch_types.h"
29 #include "fma_emu.h"
30 #include "mmvec/mmvec.h"
31 #include "mmvec/macros.h"
32 #include "op_helper.h"
33 #include "translate.h"
35 #define SF_BIAS 127
36 #define SF_MANTBITS 23
38 /* Exceptions processing helpers */
39 static G_NORETURN
40 void do_raise_exception_err(CPUHexagonState *env,
41 uint32_t exception,
42 uintptr_t pc)
44 CPUState *cs = env_cpu(env);
45 qemu_log_mask(CPU_LOG_INT, "%s: %d\n", __func__, exception);
46 cs->exception_index = exception;
47 cpu_loop_exit_restore(cs, pc);
50 G_NORETURN void HELPER(raise_exception)(CPUHexagonState *env, uint32_t excp)
52 do_raise_exception_err(env, excp, 0);
55 void log_store32(CPUHexagonState *env, target_ulong addr,
56 target_ulong val, int width, int slot)
58 HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
59 ", %" PRId32 " [0x08%" PRIx32 "])\n",
60 width, addr, val, val);
61 env->mem_log_stores[slot].va = addr;
62 env->mem_log_stores[slot].width = width;
63 env->mem_log_stores[slot].data32 = val;
66 void log_store64(CPUHexagonState *env, target_ulong addr,
67 int64_t val, int width, int slot)
69 HEX_DEBUG_LOG("log_store%d(0x" TARGET_FMT_lx
70 ", %" PRId64 " [0x016%" PRIx64 "])\n",
71 width, addr, val, val);
72 env->mem_log_stores[slot].va = addr;
73 env->mem_log_stores[slot].width = width;
74 env->mem_log_stores[slot].data64 = val;
77 /* Handy place to set a breakpoint */
78 void HELPER(debug_start_packet)(CPUHexagonState *env)
80 HEX_DEBUG_LOG("Start packet: pc = 0x" TARGET_FMT_lx "\n",
81 env->gpr[HEX_REG_PC]);
83 for (int i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
84 env->reg_written[i] = 0;
88 /* Checks for bookkeeping errors between disassembly context and runtime */
89 void HELPER(debug_check_store_width)(CPUHexagonState *env, int slot, int check)
91 if (env->mem_log_stores[slot].width != check) {
92 HEX_DEBUG_LOG("ERROR: %d != %d\n",
93 env->mem_log_stores[slot].width, check);
94 g_assert_not_reached();
98 static void commit_store(CPUHexagonState *env, int slot_num, uintptr_t ra)
100 uint8_t width = env->mem_log_stores[slot_num].width;
101 target_ulong va = env->mem_log_stores[slot_num].va;
103 switch (width) {
104 case 1:
105 cpu_stb_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
106 break;
107 case 2:
108 cpu_stw_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
109 break;
110 case 4:
111 cpu_stl_data_ra(env, va, env->mem_log_stores[slot_num].data32, ra);
112 break;
113 case 8:
114 cpu_stq_data_ra(env, va, env->mem_log_stores[slot_num].data64, ra);
115 break;
116 default:
117 g_assert_not_reached();
121 void HELPER(commit_store)(CPUHexagonState *env, int slot_num)
123 uintptr_t ra = GETPC();
124 commit_store(env, slot_num, ra);
127 void HELPER(gather_store)(CPUHexagonState *env, uint32_t addr, int slot)
129 mem_gather_store(env, addr, slot);
132 void HELPER(commit_hvx_stores)(CPUHexagonState *env)
134 uintptr_t ra = GETPC();
136 /* Normal (possibly masked) vector store */
137 for (int i = 0; i < VSTORES_MAX; i++) {
138 if (env->vstore_pending[i]) {
139 env->vstore_pending[i] = 0;
140 target_ulong va = env->vstore[i].va;
141 int size = env->vstore[i].size;
142 for (int j = 0; j < size; j++) {
143 if (test_bit(j, env->vstore[i].mask)) {
144 cpu_stb_data_ra(env, va + j, env->vstore[i].data.ub[j], ra);
150 /* Scatter store */
151 if (env->vtcm_pending) {
152 env->vtcm_pending = false;
153 if (env->vtcm_log.op) {
154 /* Need to perform the scatter read/modify/write at commit time */
155 if (env->vtcm_log.op_size == 2) {
156 SCATTER_OP_WRITE_TO_MEM(uint16_t);
157 } else if (env->vtcm_log.op_size == 4) {
158 /* Word Scatter += */
159 SCATTER_OP_WRITE_TO_MEM(uint32_t);
160 } else {
161 g_assert_not_reached();
163 } else {
164 for (int i = 0; i < sizeof(MMVector); i++) {
165 if (test_bit(i, env->vtcm_log.mask)) {
166 cpu_stb_data_ra(env, env->vtcm_log.va[i],
167 env->vtcm_log.data.ub[i], ra);
168 clear_bit(i, env->vtcm_log.mask);
169 env->vtcm_log.data.ub[i] = 0;
177 static void print_store(CPUHexagonState *env, int slot)
179 if (!(env->slot_cancelled & (1 << slot))) {
180 uint8_t width = env->mem_log_stores[slot].width;
181 if (width == 1) {
182 uint32_t data = env->mem_log_stores[slot].data32 & 0xff;
183 HEX_DEBUG_LOG("\tmemb[0x" TARGET_FMT_lx "] = %" PRId32
184 " (0x%02" PRIx32 ")\n",
185 env->mem_log_stores[slot].va, data, data);
186 } else if (width == 2) {
187 uint32_t data = env->mem_log_stores[slot].data32 & 0xffff;
188 HEX_DEBUG_LOG("\tmemh[0x" TARGET_FMT_lx "] = %" PRId32
189 " (0x%04" PRIx32 ")\n",
190 env->mem_log_stores[slot].va, data, data);
191 } else if (width == 4) {
192 uint32_t data = env->mem_log_stores[slot].data32;
193 HEX_DEBUG_LOG("\tmemw[0x" TARGET_FMT_lx "] = %" PRId32
194 " (0x%08" PRIx32 ")\n",
195 env->mem_log_stores[slot].va, data, data);
196 } else if (width == 8) {
197 HEX_DEBUG_LOG("\tmemd[0x" TARGET_FMT_lx "] = %" PRId64
198 " (0x%016" PRIx64 ")\n",
199 env->mem_log_stores[slot].va,
200 env->mem_log_stores[slot].data64,
201 env->mem_log_stores[slot].data64);
202 } else {
203 HEX_DEBUG_LOG("\tBad store width %d\n", width);
204 g_assert_not_reached();
209 /* This function is a handy place to set a breakpoint */
210 void HELPER(debug_commit_end)(CPUHexagonState *env, uint32_t this_PC,
211 int pred_written, int has_st0, int has_st1)
213 bool reg_printed = false;
214 bool pred_printed = false;
215 int i;
217 HEX_DEBUG_LOG("Packet committed: pc = 0x" TARGET_FMT_lx "\n", this_PC);
218 HEX_DEBUG_LOG("slot_cancelled = %d\n", env->slot_cancelled);
220 for (i = 0; i < TOTAL_PER_THREAD_REGS; i++) {
221 if (env->reg_written[i]) {
222 if (!reg_printed) {
223 HEX_DEBUG_LOG("Regs written\n");
224 reg_printed = true;
226 HEX_DEBUG_LOG("\tr%d = " TARGET_FMT_ld " (0x" TARGET_FMT_lx ")\n",
227 i, env->gpr[i], env->gpr[i]);
231 for (i = 0; i < NUM_PREGS; i++) {
232 if (pred_written & (1 << i)) {
233 if (!pred_printed) {
234 HEX_DEBUG_LOG("Predicates written\n");
235 pred_printed = true;
237 HEX_DEBUG_LOG("\tp%d = 0x" TARGET_FMT_lx "\n",
238 i, env->pred[i]);
242 if (has_st0 || has_st1) {
243 HEX_DEBUG_LOG("Stores\n");
244 if (has_st0) {
245 print_store(env, 0);
247 if (has_st1) {
248 print_store(env, 1);
252 HEX_DEBUG_LOG("Next PC = " TARGET_FMT_lx "\n", env->gpr[HEX_REG_PC]);
253 HEX_DEBUG_LOG("Exec counters: pkt = " TARGET_FMT_lx
254 ", insn = " TARGET_FMT_lx
255 ", hvx = " TARGET_FMT_lx "\n",
256 env->gpr[HEX_REG_QEMU_PKT_CNT],
257 env->gpr[HEX_REG_QEMU_INSN_CNT],
258 env->gpr[HEX_REG_QEMU_HVX_CNT]);
262 int32_t HELPER(fcircadd)(int32_t RxV, int32_t offset, int32_t M, int32_t CS)
264 uint32_t K_const = extract32(M, 24, 4);
265 uint32_t length = extract32(M, 0, 17);
266 uint32_t new_ptr = RxV + offset;
267 uint32_t start_addr;
268 uint32_t end_addr;
270 if (K_const == 0 && length >= 4) {
271 start_addr = CS;
272 end_addr = start_addr + length;
273 } else {
275 * Versions v3 and earlier used the K value to specify a power-of-2 size
276 * 2^(K+2) that is greater than the buffer length
278 int32_t mask = (1 << (K_const + 2)) - 1;
279 start_addr = RxV & (~mask);
280 end_addr = start_addr | length;
283 if (new_ptr >= end_addr) {
284 new_ptr -= length;
285 } else if (new_ptr < start_addr) {
286 new_ptr += length;
289 return new_ptr;
292 uint32_t HELPER(fbrev)(uint32_t addr)
295 * Bit reverse the low 16 bits of the address
297 return deposit32(addr, 0, 16, revbit16(addr));
300 static float32 build_float32(uint8_t sign, uint32_t exp, uint32_t mant)
302 return make_float32(
303 ((sign & 1) << 31) |
304 ((exp & 0xff) << SF_MANTBITS) |
305 (mant & ((1 << SF_MANTBITS) - 1)));
309 * sfrecipa, sfinvsqrta have two 32-bit results
310 * r0,p0=sfrecipa(r1,r2)
311 * r0,p0=sfinvsqrta(r1)
313 * Since helpers can only return a single value, we pack the two results
314 * into a 64-bit value.
316 uint64_t HELPER(sfrecipa)(CPUHexagonState *env, float32 RsV, float32 RtV)
318 int32_t PeV = 0;
319 float32 RdV;
320 int idx;
321 int adjust;
322 int mant;
323 int exp;
325 arch_fpop_start(env);
326 if (arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status)) {
327 PeV = adjust;
328 idx = (RtV >> 16) & 0x7f;
329 mant = (recip_lookup_table[idx] << 15) | 1;
330 exp = SF_BIAS - (float32_getexp(RtV) - SF_BIAS) - 1;
331 RdV = build_float32(extract32(RtV, 31, 1), exp, mant);
333 arch_fpop_end(env);
334 return ((uint64_t)RdV << 32) | PeV;
337 uint64_t HELPER(sfinvsqrta)(CPUHexagonState *env, float32 RsV)
339 int PeV = 0;
340 float32 RdV;
341 int idx;
342 int adjust;
343 int mant;
344 int exp;
346 arch_fpop_start(env);
347 if (arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status)) {
348 PeV = adjust;
349 idx = (RsV >> 17) & 0x7f;
350 mant = (invsqrt_lookup_table[idx] << 15);
351 exp = SF_BIAS - ((float32_getexp(RsV) - SF_BIAS) >> 1) - 1;
352 RdV = build_float32(extract32(RsV, 31, 1), exp, mant);
354 arch_fpop_end(env);
355 return ((uint64_t)RdV << 32) | PeV;
358 int64_t HELPER(vacsh_val)(CPUHexagonState *env,
359 int64_t RxxV, int64_t RssV, int64_t RttV,
360 uint32_t pkt_need_commit)
362 for (int i = 0; i < 4; i++) {
363 int xv = sextract64(RxxV, i * 16, 16);
364 int sv = sextract64(RssV, i * 16, 16);
365 int tv = sextract64(RttV, i * 16, 16);
366 int max;
367 xv = xv + tv;
368 sv = sv - tv;
369 max = xv > sv ? xv : sv;
370 /* Note that fSATH can set the OVF bit in usr */
371 RxxV = deposit64(RxxV, i * 16, 16, fSATH(max));
373 return RxxV;
376 int32_t HELPER(vacsh_pred)(CPUHexagonState *env,
377 int64_t RxxV, int64_t RssV, int64_t RttV)
379 int32_t PeV = 0;
380 for (int i = 0; i < 4; i++) {
381 int xv = sextract64(RxxV, i * 16, 16);
382 int sv = sextract64(RssV, i * 16, 16);
383 int tv = sextract64(RttV, i * 16, 16);
384 xv = xv + tv;
385 sv = sv - tv;
386 PeV = deposit32(PeV, i * 2, 1, (xv > sv));
387 PeV = deposit32(PeV, i * 2 + 1, 1, (xv > sv));
389 return PeV;
392 int64_t HELPER(cabacdecbin_val)(int64_t RssV, int64_t RttV)
394 int64_t RddV = 0;
395 size4u_t state;
396 size4u_t valMPS;
397 size4u_t bitpos;
398 size4u_t range;
399 size4u_t offset;
400 size4u_t rLPS;
401 size4u_t rMPS;
403 state = fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0);
404 valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8);
405 bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0);
406 range = fGETWORD(0, RssV);
407 offset = fGETWORD(1, RssV);
409 /* calculate rLPS */
410 range <<= bitpos;
411 offset <<= bitpos;
412 rLPS = rLPS_table_64x4[state][(range >> 29) & 3];
413 rLPS = rLPS << 23; /* left aligned */
415 /* calculate rMPS */
416 rMPS = (range & 0xff800000) - rLPS;
418 /* most probable region */
419 if (offset < rMPS) {
420 RddV = AC_next_state_MPS_64[state];
421 fINSERT_RANGE(RddV, 8, 8, valMPS);
422 fINSERT_RANGE(RddV, 31, 23, (rMPS >> 23));
423 fSETWORD(1, RddV, offset);
425 /* least probable region */
426 else {
427 RddV = AC_next_state_LPS_64[state];
428 fINSERT_RANGE(RddV, 8, 8, ((!state) ? (1 - valMPS) : (valMPS)));
429 fINSERT_RANGE(RddV, 31, 23, (rLPS >> 23));
430 fSETWORD(1, RddV, (offset - rMPS));
432 return RddV;
435 int32_t HELPER(cabacdecbin_pred)(int64_t RssV, int64_t RttV)
437 int32_t p0 = 0;
438 size4u_t state;
439 size4u_t valMPS;
440 size4u_t bitpos;
441 size4u_t range;
442 size4u_t offset;
443 size4u_t rLPS;
444 size4u_t rMPS;
446 state = fEXTRACTU_RANGE(fGETWORD(1, RttV), 5, 0);
447 valMPS = fEXTRACTU_RANGE(fGETWORD(1, RttV), 8, 8);
448 bitpos = fEXTRACTU_RANGE(fGETWORD(0, RttV), 4, 0);
449 range = fGETWORD(0, RssV);
450 offset = fGETWORD(1, RssV);
452 /* calculate rLPS */
453 range <<= bitpos;
454 offset <<= bitpos;
455 rLPS = rLPS_table_64x4[state][(range >> 29) & 3];
456 rLPS = rLPS << 23; /* left aligned */
458 /* calculate rMPS */
459 rMPS = (range & 0xff800000) - rLPS;
461 /* most probable region */
462 if (offset < rMPS) {
463 p0 = valMPS;
466 /* least probable region */
467 else {
468 p0 = valMPS ^ 1;
470 return p0;
473 static void probe_store(CPUHexagonState *env, int slot, int mmu_idx,
474 bool is_predicated, uintptr_t retaddr)
476 if (!is_predicated || !(env->slot_cancelled & (1 << slot))) {
477 size1u_t width = env->mem_log_stores[slot].width;
478 target_ulong va = env->mem_log_stores[slot].va;
479 probe_write(env, va, width, mmu_idx, retaddr);
484 * Called from a mem_noshuf packet to make sure the load doesn't
485 * raise an exception
487 void HELPER(probe_noshuf_load)(CPUHexagonState *env, target_ulong va,
488 int size, int mmu_idx)
490 uintptr_t retaddr = GETPC();
491 probe_read(env, va, size, mmu_idx, retaddr);
494 /* Called during packet commit when there are two scalar stores */
495 void HELPER(probe_pkt_scalar_store_s0)(CPUHexagonState *env, int args)
497 int mmu_idx = FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, MMU_IDX);
498 bool is_predicated =
499 FIELD_EX32(args, PROBE_PKT_SCALAR_STORE_S0, IS_PREDICATED);
500 uintptr_t ra = GETPC();
501 probe_store(env, 0, mmu_idx, is_predicated, ra);
504 static void probe_hvx_stores(CPUHexagonState *env, int mmu_idx,
505 uintptr_t retaddr)
507 /* Normal (possibly masked) vector store */
508 for (int i = 0; i < VSTORES_MAX; i++) {
509 if (env->vstore_pending[i]) {
510 target_ulong va = env->vstore[i].va;
511 int size = env->vstore[i].size;
512 for (int j = 0; j < size; j++) {
513 if (test_bit(j, env->vstore[i].mask)) {
514 probe_write(env, va + j, 1, mmu_idx, retaddr);
520 /* Scatter store */
521 if (env->vtcm_pending) {
522 if (env->vtcm_log.op) {
523 /* Need to perform the scatter read/modify/write at commit time */
524 if (env->vtcm_log.op_size == 2) {
525 SCATTER_OP_PROBE_MEM(size2u_t, mmu_idx, retaddr);
526 } else if (env->vtcm_log.op_size == 4) {
527 /* Word Scatter += */
528 SCATTER_OP_PROBE_MEM(size4u_t, mmu_idx, retaddr);
529 } else {
530 g_assert_not_reached();
532 } else {
533 for (int i = 0; i < sizeof(MMVector); i++) {
534 if (test_bit(i, env->vtcm_log.mask)) {
535 probe_write(env, env->vtcm_log.va[i], 1, mmu_idx, retaddr);
543 void HELPER(probe_hvx_stores)(CPUHexagonState *env, int mmu_idx)
545 uintptr_t retaddr = GETPC();
546 probe_hvx_stores(env, mmu_idx, retaddr);
549 void HELPER(probe_pkt_scalar_hvx_stores)(CPUHexagonState *env, int mask)
551 bool has_st0 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST0);
552 bool has_st1 = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_ST1);
553 bool has_hvx_stores =
554 FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, HAS_HVX_STORES);
555 bool s0_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S0_IS_PRED);
556 bool s1_is_pred = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, S1_IS_PRED);
557 int mmu_idx = FIELD_EX32(mask, PROBE_PKT_SCALAR_HVX_STORES, MMU_IDX);
558 uintptr_t ra = GETPC();
560 if (has_st0) {
561 probe_store(env, 0, mmu_idx, s0_is_pred, ra);
563 if (has_st1) {
564 probe_store(env, 1, mmu_idx, s1_is_pred, ra);
566 if (has_hvx_stores) {
567 probe_hvx_stores(env, mmu_idx, ra);
571 #ifndef CONFIG_HEXAGON_IDEF_PARSER
573 * mem_noshuf
574 * Section 5.5 of the Hexagon V67 Programmer's Reference Manual
576 * If the load is in slot 0 and there is a store in slot1 (that
577 * wasn't cancelled), we have to do the store first.
579 static void check_noshuf(CPUHexagonState *env, bool pkt_has_store_s1,
580 uint32_t slot, target_ulong vaddr, int size,
581 uintptr_t ra)
583 if (slot == 0 && pkt_has_store_s1 &&
584 ((env->slot_cancelled & (1 << 1)) == 0)) {
585 probe_read(env, vaddr, size, MMU_USER_IDX, ra);
586 commit_store(env, 1, ra);
589 #endif
591 /* Floating point */
592 float64 HELPER(conv_sf2df)(CPUHexagonState *env, float32 RsV)
594 float64 out_f64;
595 arch_fpop_start(env);
596 out_f64 = float32_to_float64(RsV, &env->fp_status);
597 arch_fpop_end(env);
598 return out_f64;
601 float32 HELPER(conv_df2sf)(CPUHexagonState *env, float64 RssV)
603 float32 out_f32;
604 arch_fpop_start(env);
605 out_f32 = float64_to_float32(RssV, &env->fp_status);
606 arch_fpop_end(env);
607 return out_f32;
610 float32 HELPER(conv_uw2sf)(CPUHexagonState *env, int32_t RsV)
612 float32 RdV;
613 arch_fpop_start(env);
614 RdV = uint32_to_float32(RsV, &env->fp_status);
615 arch_fpop_end(env);
616 return RdV;
619 float64 HELPER(conv_uw2df)(CPUHexagonState *env, int32_t RsV)
621 float64 RddV;
622 arch_fpop_start(env);
623 RddV = uint32_to_float64(RsV, &env->fp_status);
624 arch_fpop_end(env);
625 return RddV;
628 float32 HELPER(conv_w2sf)(CPUHexagonState *env, int32_t RsV)
630 float32 RdV;
631 arch_fpop_start(env);
632 RdV = int32_to_float32(RsV, &env->fp_status);
633 arch_fpop_end(env);
634 return RdV;
637 float64 HELPER(conv_w2df)(CPUHexagonState *env, int32_t RsV)
639 float64 RddV;
640 arch_fpop_start(env);
641 RddV = int32_to_float64(RsV, &env->fp_status);
642 arch_fpop_end(env);
643 return RddV;
646 float32 HELPER(conv_ud2sf)(CPUHexagonState *env, int64_t RssV)
648 float32 RdV;
649 arch_fpop_start(env);
650 RdV = uint64_to_float32(RssV, &env->fp_status);
651 arch_fpop_end(env);
652 return RdV;
655 float64 HELPER(conv_ud2df)(CPUHexagonState *env, int64_t RssV)
657 float64 RddV;
658 arch_fpop_start(env);
659 RddV = uint64_to_float64(RssV, &env->fp_status);
660 arch_fpop_end(env);
661 return RddV;
664 float32 HELPER(conv_d2sf)(CPUHexagonState *env, int64_t RssV)
666 float32 RdV;
667 arch_fpop_start(env);
668 RdV = int64_to_float32(RssV, &env->fp_status);
669 arch_fpop_end(env);
670 return RdV;
673 float64 HELPER(conv_d2df)(CPUHexagonState *env, int64_t RssV)
675 float64 RddV;
676 arch_fpop_start(env);
677 RddV = int64_to_float64(RssV, &env->fp_status);
678 arch_fpop_end(env);
679 return RddV;
682 uint32_t HELPER(conv_sf2uw)(CPUHexagonState *env, float32 RsV)
684 uint32_t RdV;
685 arch_fpop_start(env);
686 /* Hexagon checks the sign before rounding */
687 if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
688 float_raise(float_flag_invalid, &env->fp_status);
689 RdV = 0;
690 } else {
691 RdV = float32_to_uint32(RsV, &env->fp_status);
693 arch_fpop_end(env);
694 return RdV;
697 int32_t HELPER(conv_sf2w)(CPUHexagonState *env, float32 RsV)
699 int32_t RdV;
700 arch_fpop_start(env);
701 /* Hexagon returns -1 for NaN */
702 if (float32_is_any_nan(RsV)) {
703 float_raise(float_flag_invalid, &env->fp_status);
704 RdV = -1;
705 } else {
706 RdV = float32_to_int32(RsV, &env->fp_status);
708 arch_fpop_end(env);
709 return RdV;
712 uint64_t HELPER(conv_sf2ud)(CPUHexagonState *env, float32 RsV)
714 uint64_t RddV;
715 arch_fpop_start(env);
716 /* Hexagon checks the sign before rounding */
717 if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
718 float_raise(float_flag_invalid, &env->fp_status);
719 RddV = 0;
720 } else {
721 RddV = float32_to_uint64(RsV, &env->fp_status);
723 arch_fpop_end(env);
724 return RddV;
727 int64_t HELPER(conv_sf2d)(CPUHexagonState *env, float32 RsV)
729 int64_t RddV;
730 arch_fpop_start(env);
731 /* Hexagon returns -1 for NaN */
732 if (float32_is_any_nan(RsV)) {
733 float_raise(float_flag_invalid, &env->fp_status);
734 RddV = -1;
735 } else {
736 RddV = float32_to_int64(RsV, &env->fp_status);
738 arch_fpop_end(env);
739 return RddV;
742 uint32_t HELPER(conv_df2uw)(CPUHexagonState *env, float64 RssV)
744 uint32_t RdV;
745 arch_fpop_start(env);
746 /* Hexagon checks the sign before rounding */
747 if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
748 float_raise(float_flag_invalid, &env->fp_status);
749 RdV = 0;
750 } else {
751 RdV = float64_to_uint32(RssV, &env->fp_status);
753 arch_fpop_end(env);
754 return RdV;
757 int32_t HELPER(conv_df2w)(CPUHexagonState *env, float64 RssV)
759 int32_t RdV;
760 arch_fpop_start(env);
761 /* Hexagon returns -1 for NaN */
762 if (float64_is_any_nan(RssV)) {
763 float_raise(float_flag_invalid, &env->fp_status);
764 RdV = -1;
765 } else {
766 RdV = float64_to_int32(RssV, &env->fp_status);
768 arch_fpop_end(env);
769 return RdV;
772 uint64_t HELPER(conv_df2ud)(CPUHexagonState *env, float64 RssV)
774 uint64_t RddV;
775 arch_fpop_start(env);
776 /* Hexagon checks the sign before rounding */
777 if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
778 float_raise(float_flag_invalid, &env->fp_status);
779 RddV = 0;
780 } else {
781 RddV = float64_to_uint64(RssV, &env->fp_status);
783 arch_fpop_end(env);
784 return RddV;
787 int64_t HELPER(conv_df2d)(CPUHexagonState *env, float64 RssV)
789 int64_t RddV;
790 arch_fpop_start(env);
791 /* Hexagon returns -1 for NaN */
792 if (float64_is_any_nan(RssV)) {
793 float_raise(float_flag_invalid, &env->fp_status);
794 RddV = -1;
795 } else {
796 RddV = float64_to_int64(RssV, &env->fp_status);
798 arch_fpop_end(env);
799 return RddV;
802 uint32_t HELPER(conv_sf2uw_chop)(CPUHexagonState *env, float32 RsV)
804 uint32_t RdV;
805 arch_fpop_start(env);
806 /* Hexagon checks the sign before rounding */
807 if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
808 float_raise(float_flag_invalid, &env->fp_status);
809 RdV = 0;
810 } else {
811 RdV = float32_to_uint32_round_to_zero(RsV, &env->fp_status);
813 arch_fpop_end(env);
814 return RdV;
817 int32_t HELPER(conv_sf2w_chop)(CPUHexagonState *env, float32 RsV)
819 int32_t RdV;
820 arch_fpop_start(env);
821 /* Hexagon returns -1 for NaN */
822 if (float32_is_any_nan(RsV)) {
823 float_raise(float_flag_invalid, &env->fp_status);
824 RdV = -1;
825 } else {
826 RdV = float32_to_int32_round_to_zero(RsV, &env->fp_status);
828 arch_fpop_end(env);
829 return RdV;
832 uint64_t HELPER(conv_sf2ud_chop)(CPUHexagonState *env, float32 RsV)
834 uint64_t RddV;
835 arch_fpop_start(env);
836 /* Hexagon checks the sign before rounding */
837 if (float32_is_neg(RsV) && !float32_is_any_nan(RsV)) {
838 float_raise(float_flag_invalid, &env->fp_status);
839 RddV = 0;
840 } else {
841 RddV = float32_to_uint64_round_to_zero(RsV, &env->fp_status);
843 arch_fpop_end(env);
844 return RddV;
847 int64_t HELPER(conv_sf2d_chop)(CPUHexagonState *env, float32 RsV)
849 int64_t RddV;
850 arch_fpop_start(env);
851 /* Hexagon returns -1 for NaN */
852 if (float32_is_any_nan(RsV)) {
853 float_raise(float_flag_invalid, &env->fp_status);
854 RddV = -1;
855 } else {
856 RddV = float32_to_int64_round_to_zero(RsV, &env->fp_status);
858 arch_fpop_end(env);
859 return RddV;
862 uint32_t HELPER(conv_df2uw_chop)(CPUHexagonState *env, float64 RssV)
864 uint32_t RdV;
865 arch_fpop_start(env);
866 /* Hexagon checks the sign before rounding */
867 if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
868 float_raise(float_flag_invalid, &env->fp_status);
869 RdV = 0;
870 } else {
871 RdV = float64_to_uint32_round_to_zero(RssV, &env->fp_status);
873 arch_fpop_end(env);
874 return RdV;
877 int32_t HELPER(conv_df2w_chop)(CPUHexagonState *env, float64 RssV)
879 int32_t RdV;
880 arch_fpop_start(env);
881 /* Hexagon returns -1 for NaN */
882 if (float64_is_any_nan(RssV)) {
883 float_raise(float_flag_invalid, &env->fp_status);
884 RdV = -1;
885 } else {
886 RdV = float64_to_int32_round_to_zero(RssV, &env->fp_status);
888 arch_fpop_end(env);
889 return RdV;
892 uint64_t HELPER(conv_df2ud_chop)(CPUHexagonState *env, float64 RssV)
894 uint64_t RddV;
895 arch_fpop_start(env);
896 /* Hexagon checks the sign before rounding */
897 if (float64_is_neg(RssV) && !float64_is_any_nan(RssV)) {
898 float_raise(float_flag_invalid, &env->fp_status);
899 RddV = 0;
900 } else {
901 RddV = float64_to_uint64_round_to_zero(RssV, &env->fp_status);
903 arch_fpop_end(env);
904 return RddV;
907 int64_t HELPER(conv_df2d_chop)(CPUHexagonState *env, float64 RssV)
909 int64_t RddV;
910 arch_fpop_start(env);
911 /* Hexagon returns -1 for NaN */
912 if (float64_is_any_nan(RssV)) {
913 float_raise(float_flag_invalid, &env->fp_status);
914 RddV = -1;
915 } else {
916 RddV = float64_to_int64_round_to_zero(RssV, &env->fp_status);
918 arch_fpop_end(env);
919 return RddV;
922 float32 HELPER(sfadd)(CPUHexagonState *env, float32 RsV, float32 RtV)
924 float32 RdV;
925 arch_fpop_start(env);
926 RdV = float32_add(RsV, RtV, &env->fp_status);
927 arch_fpop_end(env);
928 return RdV;
931 float32 HELPER(sfsub)(CPUHexagonState *env, float32 RsV, float32 RtV)
933 float32 RdV;
934 arch_fpop_start(env);
935 RdV = float32_sub(RsV, RtV, &env->fp_status);
936 arch_fpop_end(env);
937 return RdV;
940 int32_t HELPER(sfcmpeq)(CPUHexagonState *env, float32 RsV, float32 RtV)
942 int32_t PdV;
943 arch_fpop_start(env);
944 PdV = f8BITSOF(float32_eq_quiet(RsV, RtV, &env->fp_status));
945 arch_fpop_end(env);
946 return PdV;
949 int32_t HELPER(sfcmpgt)(CPUHexagonState *env, float32 RsV, float32 RtV)
951 int cmp;
952 int32_t PdV;
953 arch_fpop_start(env);
954 cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
955 PdV = f8BITSOF(cmp == float_relation_greater);
956 arch_fpop_end(env);
957 return PdV;
960 int32_t HELPER(sfcmpge)(CPUHexagonState *env, float32 RsV, float32 RtV)
962 int cmp;
963 int32_t PdV;
964 arch_fpop_start(env);
965 cmp = float32_compare_quiet(RsV, RtV, &env->fp_status);
966 PdV = f8BITSOF(cmp == float_relation_greater ||
967 cmp == float_relation_equal);
968 arch_fpop_end(env);
969 return PdV;
972 int32_t HELPER(sfcmpuo)(CPUHexagonState *env, float32 RsV, float32 RtV)
974 int32_t PdV;
975 arch_fpop_start(env);
976 PdV = f8BITSOF(float32_unordered_quiet(RsV, RtV, &env->fp_status));
977 arch_fpop_end(env);
978 return PdV;
981 float32 HELPER(sfmax)(CPUHexagonState *env, float32 RsV, float32 RtV)
983 float32 RdV;
984 arch_fpop_start(env);
985 RdV = float32_maximum_number(RsV, RtV, &env->fp_status);
986 arch_fpop_end(env);
987 return RdV;
990 float32 HELPER(sfmin)(CPUHexagonState *env, float32 RsV, float32 RtV)
992 float32 RdV;
993 arch_fpop_start(env);
994 RdV = float32_minimum_number(RsV, RtV, &env->fp_status);
995 arch_fpop_end(env);
996 return RdV;
999 int32_t HELPER(sfclass)(CPUHexagonState *env, float32 RsV, int32_t uiV)
1001 int32_t PdV = 0;
1002 arch_fpop_start(env);
1003 if (fGETBIT(0, uiV) && float32_is_zero(RsV)) {
1004 PdV = 0xff;
1006 if (fGETBIT(1, uiV) && float32_is_normal(RsV)) {
1007 PdV = 0xff;
1009 if (fGETBIT(2, uiV) && float32_is_denormal(RsV)) {
1010 PdV = 0xff;
1012 if (fGETBIT(3, uiV) && float32_is_infinity(RsV)) {
1013 PdV = 0xff;
1015 if (fGETBIT(4, uiV) && float32_is_any_nan(RsV)) {
1016 PdV = 0xff;
1018 set_float_exception_flags(0, &env->fp_status);
1019 arch_fpop_end(env);
1020 return PdV;
1023 float32 HELPER(sffixupn)(CPUHexagonState *env, float32 RsV, float32 RtV)
1025 float32 RdV = 0;
1026 int adjust;
1027 arch_fpop_start(env);
1028 arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1029 RdV = RsV;
1030 arch_fpop_end(env);
1031 return RdV;
1034 float32 HELPER(sffixupd)(CPUHexagonState *env, float32 RsV, float32 RtV)
1036 float32 RdV = 0;
1037 int adjust;
1038 arch_fpop_start(env);
1039 arch_sf_recip_common(&RsV, &RtV, &RdV, &adjust, &env->fp_status);
1040 RdV = RtV;
1041 arch_fpop_end(env);
1042 return RdV;
1045 float32 HELPER(sffixupr)(CPUHexagonState *env, float32 RsV)
1047 float32 RdV = 0;
1048 int adjust;
1049 arch_fpop_start(env);
1050 arch_sf_invsqrt_common(&RsV, &RdV, &adjust, &env->fp_status);
1051 RdV = RsV;
1052 arch_fpop_end(env);
1053 return RdV;
1056 float64 HELPER(dfadd)(CPUHexagonState *env, float64 RssV, float64 RttV)
1058 float64 RddV;
1059 arch_fpop_start(env);
1060 RddV = float64_add(RssV, RttV, &env->fp_status);
1061 arch_fpop_end(env);
1062 return RddV;
1065 float64 HELPER(dfsub)(CPUHexagonState *env, float64 RssV, float64 RttV)
1067 float64 RddV;
1068 arch_fpop_start(env);
1069 RddV = float64_sub(RssV, RttV, &env->fp_status);
1070 arch_fpop_end(env);
1071 return RddV;
1074 float64 HELPER(dfmax)(CPUHexagonState *env, float64 RssV, float64 RttV)
1076 float64 RddV;
1077 arch_fpop_start(env);
1078 RddV = float64_maximum_number(RssV, RttV, &env->fp_status);
1079 arch_fpop_end(env);
1080 return RddV;
1083 float64 HELPER(dfmin)(CPUHexagonState *env, float64 RssV, float64 RttV)
1085 float64 RddV;
1086 arch_fpop_start(env);
1087 RddV = float64_minimum_number(RssV, RttV, &env->fp_status);
1088 arch_fpop_end(env);
1089 return RddV;
1092 int32_t HELPER(dfcmpeq)(CPUHexagonState *env, float64 RssV, float64 RttV)
1094 int32_t PdV;
1095 arch_fpop_start(env);
1096 PdV = f8BITSOF(float64_eq_quiet(RssV, RttV, &env->fp_status));
1097 arch_fpop_end(env);
1098 return PdV;
1101 int32_t HELPER(dfcmpgt)(CPUHexagonState *env, float64 RssV, float64 RttV)
1103 int cmp;
1104 int32_t PdV;
1105 arch_fpop_start(env);
1106 cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1107 PdV = f8BITSOF(cmp == float_relation_greater);
1108 arch_fpop_end(env);
1109 return PdV;
1112 int32_t HELPER(dfcmpge)(CPUHexagonState *env, float64 RssV, float64 RttV)
1114 int cmp;
1115 int32_t PdV;
1116 arch_fpop_start(env);
1117 cmp = float64_compare_quiet(RssV, RttV, &env->fp_status);
1118 PdV = f8BITSOF(cmp == float_relation_greater ||
1119 cmp == float_relation_equal);
1120 arch_fpop_end(env);
1121 return PdV;
1124 int32_t HELPER(dfcmpuo)(CPUHexagonState *env, float64 RssV, float64 RttV)
1126 int32_t PdV;
1127 arch_fpop_start(env);
1128 PdV = f8BITSOF(float64_unordered_quiet(RssV, RttV, &env->fp_status));
1129 arch_fpop_end(env);
1130 return PdV;
1133 int32_t HELPER(dfclass)(CPUHexagonState *env, float64 RssV, int32_t uiV)
1135 int32_t PdV = 0;
1136 arch_fpop_start(env);
1137 if (fGETBIT(0, uiV) && float64_is_zero(RssV)) {
1138 PdV = 0xff;
1140 if (fGETBIT(1, uiV) && float64_is_normal(RssV)) {
1141 PdV = 0xff;
1143 if (fGETBIT(2, uiV) && float64_is_denormal(RssV)) {
1144 PdV = 0xff;
1146 if (fGETBIT(3, uiV) && float64_is_infinity(RssV)) {
1147 PdV = 0xff;
1149 if (fGETBIT(4, uiV) && float64_is_any_nan(RssV)) {
1150 PdV = 0xff;
1152 set_float_exception_flags(0, &env->fp_status);
1153 arch_fpop_end(env);
1154 return PdV;
1157 float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
1159 float32 RdV;
1160 arch_fpop_start(env);
1161 RdV = internal_mpyf(RsV, RtV, &env->fp_status);
1162 arch_fpop_end(env);
1163 return RdV;
1166 float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
1167 float32 RsV, float32 RtV)
1169 arch_fpop_start(env);
1170 RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1171 arch_fpop_end(env);
1172 return RxV;
1175 static bool is_zero_prod(float32 a, float32 b)
1177 return ((float32_is_zero(a) && is_finite(b)) ||
1178 (float32_is_zero(b) && is_finite(a)));
1181 static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
1183 float32 ret = dst;
1184 if (float32_is_any_nan(x)) {
1185 if (extract32(x, 22, 1) == 0) {
1186 float_raise(float_flag_invalid, fp_status);
1188 ret = make_float32(0xffffffff); /* nan */
1190 return ret;
1193 float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
1194 float32 RsV, float32 RtV, float32 PuV)
1196 size4s_t tmp;
1197 arch_fpop_start(env);
1198 RxV = check_nan(RxV, RxV, &env->fp_status);
1199 RxV = check_nan(RxV, RsV, &env->fp_status);
1200 RxV = check_nan(RxV, RtV, &env->fp_status);
1201 tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
1202 if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1203 RxV = tmp;
1205 arch_fpop_end(env);
1206 return RxV;
1209 float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
1210 float32 RsV, float32 RtV)
1212 float32 neg_RsV;
1213 arch_fpop_start(env);
1214 neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
1215 RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
1216 arch_fpop_end(env);
1217 return RxV;
1220 static bool is_inf_prod(int32_t a, int32_t b)
1222 return (float32_is_infinity(a) && float32_is_infinity(b)) ||
1223 (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
1224 (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
1227 float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
1228 float32 RsV, float32 RtV)
1230 bool infinp;
1231 bool infminusinf;
1232 float32 tmp;
1234 arch_fpop_start(env);
1235 set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1236 infminusinf = float32_is_infinity(RxV) &&
1237 is_inf_prod(RsV, RtV) &&
1238 (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
1239 infinp = float32_is_infinity(RxV) ||
1240 float32_is_infinity(RtV) ||
1241 float32_is_infinity(RsV);
1242 RxV = check_nan(RxV, RxV, &env->fp_status);
1243 RxV = check_nan(RxV, RsV, &env->fp_status);
1244 RxV = check_nan(RxV, RtV, &env->fp_status);
1245 tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
1246 if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1247 RxV = tmp;
1249 set_float_exception_flags(0, &env->fp_status);
1250 if (float32_is_infinity(RxV) && !infinp) {
1251 RxV = RxV - 1;
1253 if (infminusinf) {
1254 RxV = 0;
1256 arch_fpop_end(env);
1257 return RxV;
1260 float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
1261 float32 RsV, float32 RtV)
1263 bool infinp;
1264 bool infminusinf;
1265 float32 tmp;
1267 arch_fpop_start(env);
1268 set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
1269 infminusinf = float32_is_infinity(RxV) &&
1270 is_inf_prod(RsV, RtV) &&
1271 (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
1272 infinp = float32_is_infinity(RxV) ||
1273 float32_is_infinity(RtV) ||
1274 float32_is_infinity(RsV);
1275 RxV = check_nan(RxV, RxV, &env->fp_status);
1276 RxV = check_nan(RxV, RsV, &env->fp_status);
1277 RxV = check_nan(RxV, RtV, &env->fp_status);
1278 float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
1279 tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
1280 if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
1281 RxV = tmp;
1283 set_float_exception_flags(0, &env->fp_status);
1284 if (float32_is_infinity(RxV) && !infinp) {
1285 RxV = RxV - 1;
1287 if (infminusinf) {
1288 RxV = 0;
1290 arch_fpop_end(env);
1291 return RxV;
1294 float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
1296 int64_t RddV;
1297 arch_fpop_start(env);
1298 if (float64_is_denormal(RssV) &&
1299 (float64_getexp(RttV) >= 512) &&
1300 float64_is_normal(RttV)) {
1301 RddV = float64_mul(RssV, make_float64(0x4330000000000000),
1302 &env->fp_status);
1303 } else if (float64_is_denormal(RttV) &&
1304 (float64_getexp(RssV) >= 512) &&
1305 float64_is_normal(RssV)) {
1306 RddV = float64_mul(RssV, make_float64(0x3cb0000000000000),
1307 &env->fp_status);
1308 } else {
1309 RddV = RssV;
1311 arch_fpop_end(env);
1312 return RddV;
1315 float64 HELPER(dfmpyhh)(CPUHexagonState *env, float64 RxxV,
1316 float64 RssV, float64 RttV)
1318 arch_fpop_start(env);
1319 RxxV = internal_mpyhh(RssV, RttV, RxxV, &env->fp_status);
1320 arch_fpop_end(env);
1321 return RxxV;
1324 /* Histogram instructions */
1326 void HELPER(vhist)(CPUHexagonState *env)
1328 MMVector *input = &env->tmp_VRegs[0];
1330 for (int lane = 0; lane < 8; lane++) {
1331 for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1332 unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1333 unsigned char regno = value >> 3;
1334 unsigned char element = value & 7;
1336 env->VRegs[regno].uh[(sizeof(MMVector) / 16) * lane + element]++;
1341 void HELPER(vhistq)(CPUHexagonState *env)
1343 MMVector *input = &env->tmp_VRegs[0];
1345 for (int lane = 0; lane < 8; lane++) {
1346 for (int i = 0; i < sizeof(MMVector) / 8; ++i) {
1347 unsigned char value = input->ub[(sizeof(MMVector) / 8) * lane + i];
1348 unsigned char regno = value >> 3;
1349 unsigned char element = value & 7;
1351 if (fGETQBIT(env->qtmp, sizeof(MMVector) / 8 * lane + i)) {
1352 env->VRegs[regno].uh[
1353 (sizeof(MMVector) / 16) * lane + element]++;
1359 void HELPER(vwhist256)(CPUHexagonState *env)
1361 MMVector *input = &env->tmp_VRegs[0];
1363 for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1364 unsigned int bucket = fGETUBYTE(0, input->h[i]);
1365 unsigned int weight = fGETUBYTE(1, input->h[i]);
1366 unsigned int vindex = (bucket >> 3) & 0x1F;
1367 unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1369 env->VRegs[vindex].uh[elindex] =
1370 env->VRegs[vindex].uh[elindex] + weight;
1374 void HELPER(vwhist256q)(CPUHexagonState *env)
1376 MMVector *input = &env->tmp_VRegs[0];
1378 for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1379 unsigned int bucket = fGETUBYTE(0, input->h[i]);
1380 unsigned int weight = fGETUBYTE(1, input->h[i]);
1381 unsigned int vindex = (bucket >> 3) & 0x1F;
1382 unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1384 if (fGETQBIT(env->qtmp, 2 * i)) {
1385 env->VRegs[vindex].uh[elindex] =
1386 env->VRegs[vindex].uh[elindex] + weight;
1391 void HELPER(vwhist256_sat)(CPUHexagonState *env)
1393 MMVector *input = &env->tmp_VRegs[0];
1395 for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1396 unsigned int bucket = fGETUBYTE(0, input->h[i]);
1397 unsigned int weight = fGETUBYTE(1, input->h[i]);
1398 unsigned int vindex = (bucket >> 3) & 0x1F;
1399 unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1401 env->VRegs[vindex].uh[elindex] =
1402 fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1406 void HELPER(vwhist256q_sat)(CPUHexagonState *env)
1408 MMVector *input = &env->tmp_VRegs[0];
1410 for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1411 unsigned int bucket = fGETUBYTE(0, input->h[i]);
1412 unsigned int weight = fGETUBYTE(1, input->h[i]);
1413 unsigned int vindex = (bucket >> 3) & 0x1F;
1414 unsigned int elindex = ((i >> 0) & (~7)) | ((bucket >> 0) & 7);
1416 if (fGETQBIT(env->qtmp, 2 * i)) {
1417 env->VRegs[vindex].uh[elindex] =
1418 fVSATUH(env->VRegs[vindex].uh[elindex] + weight);
1423 void HELPER(vwhist128)(CPUHexagonState *env)
1425 MMVector *input = &env->tmp_VRegs[0];
1427 for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1428 unsigned int bucket = fGETUBYTE(0, input->h[i]);
1429 unsigned int weight = fGETUBYTE(1, input->h[i]);
1430 unsigned int vindex = (bucket >> 3) & 0x1F;
1431 unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1433 env->VRegs[vindex].uw[elindex] =
1434 env->VRegs[vindex].uw[elindex] + weight;
1438 void HELPER(vwhist128q)(CPUHexagonState *env)
1440 MMVector *input = &env->tmp_VRegs[0];
1442 for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1443 unsigned int bucket = fGETUBYTE(0, input->h[i]);
1444 unsigned int weight = fGETUBYTE(1, input->h[i]);
1445 unsigned int vindex = (bucket >> 3) & 0x1F;
1446 unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1448 if (fGETQBIT(env->qtmp, 2 * i)) {
1449 env->VRegs[vindex].uw[elindex] =
1450 env->VRegs[vindex].uw[elindex] + weight;
1455 void HELPER(vwhist128m)(CPUHexagonState *env, int32_t uiV)
1457 MMVector *input = &env->tmp_VRegs[0];
1459 for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1460 unsigned int bucket = fGETUBYTE(0, input->h[i]);
1461 unsigned int weight = fGETUBYTE(1, input->h[i]);
1462 unsigned int vindex = (bucket >> 3) & 0x1F;
1463 unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1465 if ((bucket & 1) == uiV) {
1466 env->VRegs[vindex].uw[elindex] =
1467 env->VRegs[vindex].uw[elindex] + weight;
1472 void HELPER(vwhist128qm)(CPUHexagonState *env, int32_t uiV)
1474 MMVector *input = &env->tmp_VRegs[0];
1476 for (int i = 0; i < (sizeof(MMVector) / 2); i++) {
1477 unsigned int bucket = fGETUBYTE(0, input->h[i]);
1478 unsigned int weight = fGETUBYTE(1, input->h[i]);
1479 unsigned int vindex = (bucket >> 3) & 0x1F;
1480 unsigned int elindex = ((i >> 1) & (~3)) | ((bucket >> 1) & 3);
1482 if (((bucket & 1) == uiV) && fGETQBIT(env->qtmp, 2 * i)) {
1483 env->VRegs[vindex].uw[elindex] =
1484 env->VRegs[vindex].uw[elindex] + weight;
1489 /* These macros can be referenced in the generated helper functions */
1490 #define warn(...) /* Nothing */
1491 #define fatal(...) g_assert_not_reached();
1493 #define BOGUS_HELPER(tag) \
1494 printf("ERROR: bogus helper: " #tag "\n")
1496 #include "helper_funcs_generated.c.inc"