2 * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
21 #include "tcg/tcg-op.h"
22 #include "tcg/tcg-op-gvec.h"
23 #include "exec/helper-gen.h"
24 #include "exec/helper-proto.h"
25 #include "exec/translation-block.h"
31 #include "translate.h"
33 #include "printinsn.h"
35 #define HELPER_H "helper.h"
36 #include "exec/helper-info.c.inc"
39 #include "analyze_funcs_generated.c.inc"
41 typedef void (*AnalyzeInsn
)(DisasContext
*ctx
);
42 static const AnalyzeInsn opcode_analyze
[XX_LAST_OPCODE
] = {
43 #define OPCODE(X) [X] = analyze_##X
44 #include "opcodes_def_generated.h.inc"
48 TCGv hex_gpr
[TOTAL_PER_THREAD_REGS
];
49 TCGv hex_pred
[NUM_PREGS
];
50 TCGv hex_slot_cancelled
;
51 TCGv hex_new_value_usr
;
52 TCGv hex_reg_written
[TOTAL_PER_THREAD_REGS
];
53 TCGv hex_store_addr
[STORES_MAX
];
54 TCGv hex_store_width
[STORES_MAX
];
55 TCGv hex_store_val32
[STORES_MAX
];
56 TCGv_i64 hex_store_val64
[STORES_MAX
];
59 TCGv_i64 hex_llsc_val_i64
;
60 TCGv hex_vstore_addr
[VSTORES_MAX
];
61 TCGv hex_vstore_size
[VSTORES_MAX
];
62 TCGv hex_vstore_pending
[VSTORES_MAX
];
64 static const char * const hexagon_prednames
[] = {
65 "p0", "p1", "p2", "p3"
68 intptr_t ctx_future_vreg_off(DisasContext
*ctx
, int regnum
,
69 int num
, bool alloc_ok
)
73 if (!ctx
->need_commit
) {
74 return offsetof(CPUHexagonState
, VRegs
[regnum
]);
77 /* See if it is already allocated */
78 for (int i
= 0; i
< ctx
->future_vregs_idx
; i
++) {
79 if (ctx
->future_vregs_num
[i
] == regnum
) {
80 return offsetof(CPUHexagonState
, future_VRegs
[i
]);
85 offset
= offsetof(CPUHexagonState
, future_VRegs
[ctx
->future_vregs_idx
]);
86 for (int i
= 0; i
< num
; i
++) {
87 ctx
->future_vregs_num
[ctx
->future_vregs_idx
+ i
] = regnum
++;
89 ctx
->future_vregs_idx
+= num
;
90 g_assert(ctx
->future_vregs_idx
<= VECTOR_TEMPS_MAX
);
94 intptr_t ctx_tmp_vreg_off(DisasContext
*ctx
, int regnum
,
95 int num
, bool alloc_ok
)
99 /* See if it is already allocated */
100 for (int i
= 0; i
< ctx
->tmp_vregs_idx
; i
++) {
101 if (ctx
->tmp_vregs_num
[i
] == regnum
) {
102 return offsetof(CPUHexagonState
, tmp_VRegs
[i
]);
107 offset
= offsetof(CPUHexagonState
, tmp_VRegs
[ctx
->tmp_vregs_idx
]);
108 for (int i
= 0; i
< num
; i
++) {
109 ctx
->tmp_vregs_num
[ctx
->tmp_vregs_idx
+ i
] = regnum
++;
111 ctx
->tmp_vregs_idx
+= num
;
112 g_assert(ctx
->tmp_vregs_idx
<= VECTOR_TEMPS_MAX
);
116 static void gen_exception_raw(int excp
)
118 gen_helper_raise_exception(tcg_env
, tcg_constant_i32(excp
));
121 static void gen_exec_counters(DisasContext
*ctx
)
123 tcg_gen_addi_tl(hex_gpr
[HEX_REG_QEMU_PKT_CNT
],
124 hex_gpr
[HEX_REG_QEMU_PKT_CNT
], ctx
->num_packets
);
125 tcg_gen_addi_tl(hex_gpr
[HEX_REG_QEMU_INSN_CNT
],
126 hex_gpr
[HEX_REG_QEMU_INSN_CNT
], ctx
->num_insns
);
127 tcg_gen_addi_tl(hex_gpr
[HEX_REG_QEMU_HVX_CNT
],
128 hex_gpr
[HEX_REG_QEMU_HVX_CNT
], ctx
->num_hvx_insns
);
131 static bool use_goto_tb(DisasContext
*ctx
, target_ulong dest
)
133 return translator_use_goto_tb(&ctx
->base
, dest
);
136 static void gen_goto_tb(DisasContext
*ctx
, int idx
, target_ulong dest
, bool
139 if (use_goto_tb(ctx
, dest
)) {
140 tcg_gen_goto_tb(idx
);
142 tcg_gen_movi_tl(hex_gpr
[HEX_REG_PC
], dest
);
144 tcg_gen_exit_tb(ctx
->base
.tb
, idx
);
147 tcg_gen_movi_tl(hex_gpr
[HEX_REG_PC
], dest
);
149 tcg_gen_lookup_and_goto_ptr();
153 static void gen_end_tb(DisasContext
*ctx
)
155 Packet
*pkt
= ctx
->pkt
;
157 gen_exec_counters(ctx
);
159 if (ctx
->branch_cond
!= TCG_COND_NEVER
) {
160 if (ctx
->branch_cond
!= TCG_COND_ALWAYS
) {
161 TCGLabel
*skip
= gen_new_label();
162 tcg_gen_brcondi_tl(ctx
->branch_cond
, ctx
->branch_taken
, 0, skip
);
163 gen_goto_tb(ctx
, 0, ctx
->branch_dest
, true);
165 gen_goto_tb(ctx
, 1, ctx
->next_PC
, false);
167 gen_goto_tb(ctx
, 0, ctx
->branch_dest
, true);
169 } else if (ctx
->is_tight_loop
&&
170 pkt
->insn
[pkt
->num_insns
- 1].opcode
== J2_endloop0
) {
172 * When we're in a tight loop, we defer the endloop0 processing
173 * to take advantage of direct block chaining
175 TCGLabel
*skip
= gen_new_label();
176 tcg_gen_brcondi_tl(TCG_COND_LEU
, hex_gpr
[HEX_REG_LC0
], 1, skip
);
177 tcg_gen_subi_tl(hex_gpr
[HEX_REG_LC0
], hex_gpr
[HEX_REG_LC0
], 1);
178 gen_goto_tb(ctx
, 0, ctx
->base
.tb
->pc
, true);
180 gen_goto_tb(ctx
, 1, ctx
->next_PC
, false);
182 tcg_gen_lookup_and_goto_ptr();
185 ctx
->base
.is_jmp
= DISAS_NORETURN
;
188 static void gen_exception_end_tb(DisasContext
*ctx
, int excp
)
190 gen_exec_counters(ctx
);
191 tcg_gen_movi_tl(hex_gpr
[HEX_REG_PC
], ctx
->next_PC
);
192 gen_exception_raw(excp
);
193 ctx
->base
.is_jmp
= DISAS_NORETURN
;
197 #define PACKET_BUFFER_LEN 1028
198 static void print_pkt(Packet
*pkt
)
200 GString
*buf
= g_string_sized_new(PACKET_BUFFER_LEN
);
201 snprint_a_pkt_debug(buf
, pkt
);
202 HEX_DEBUG_LOG("%s", buf
->str
);
203 g_string_free(buf
, true);
205 #define HEX_DEBUG_PRINT_PKT(pkt) \
212 static int read_packet_words(CPUHexagonState
*env
, DisasContext
*ctx
,
215 bool found_end
= false;
216 int nwords
, max_words
;
218 memset(words
, 0, PACKET_WORDS_MAX
* sizeof(uint32_t));
219 for (nwords
= 0; !found_end
&& nwords
< PACKET_WORDS_MAX
; nwords
++) {
221 translator_ldl(env
, &ctx
->base
,
222 ctx
->base
.pc_next
+ nwords
* sizeof(uint32_t));
223 found_end
= is_packet_end(words
[nwords
]);
226 /* Read too many words without finding the end */
230 /* Check for page boundary crossing */
231 max_words
= -(ctx
->base
.pc_next
| TARGET_PAGE_MASK
) / sizeof(uint32_t);
232 if (nwords
> max_words
) {
233 /* We can only cross a page boundary at the beginning of a TB */
234 g_assert(ctx
->base
.num_insns
== 1);
237 HEX_DEBUG_LOG("decode_packet: pc = 0x%x\n", ctx
->base
.pc_next
);
238 HEX_DEBUG_LOG(" words = { ");
239 for (int i
= 0; i
< nwords
; i
++) {
240 HEX_DEBUG_LOG("0x%x, ", words
[i
]);
242 HEX_DEBUG_LOG("}\n");
247 static bool check_for_attrib(Packet
*pkt
, int attrib
)
249 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
250 if (GET_ATTRIB(pkt
->insn
[i
].opcode
, attrib
)) {
257 static bool need_slot_cancelled(Packet
*pkt
)
259 /* We only need slot_cancelled for conditional store instructions */
260 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
261 uint16_t opcode
= pkt
->insn
[i
].opcode
;
262 if (GET_ATTRIB(opcode
, A_CONDEXEC
) &&
263 GET_ATTRIB(opcode
, A_SCALAR_STORE
)) {
270 static bool need_next_PC(DisasContext
*ctx
)
272 Packet
*pkt
= ctx
->pkt
;
274 /* Check for conditional control flow or HW loop end */
275 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
276 uint16_t opcode
= pkt
->insn
[i
].opcode
;
277 if (GET_ATTRIB(opcode
, A_CONDEXEC
) && GET_ATTRIB(opcode
, A_COF
)) {
280 if (GET_ATTRIB(opcode
, A_HWLOOP0_END
) ||
281 GET_ATTRIB(opcode
, A_HWLOOP1_END
)) {
289 * The opcode_analyze functions mark most of the writes in a packet
290 * However, there are some implicit writes marked as attributes
291 * of the applicable instructions.
293 static void mark_implicit_reg_write(DisasContext
*ctx
, int attrib
, int rnum
)
295 uint16_t opcode
= ctx
->insn
->opcode
;
296 if (GET_ATTRIB(opcode
, attrib
)) {
298 * USR is used to set overflow and FP exceptions,
299 * so treat it as conditional
301 bool is_predicated
= GET_ATTRIB(opcode
, A_CONDEXEC
) ||
304 /* LC0/LC1 is conditionally written by endloop instructions */
305 if ((rnum
== HEX_REG_LC0
|| rnum
== HEX_REG_LC1
) &&
306 (opcode
== J2_endloop0
||
307 opcode
== J2_endloop1
||
308 opcode
== J2_endloop01
)) {
309 is_predicated
= true;
312 ctx_log_reg_write(ctx
, rnum
, is_predicated
);
316 static void mark_implicit_reg_writes(DisasContext
*ctx
)
318 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_FP
, HEX_REG_FP
);
319 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_SP
, HEX_REG_SP
);
320 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_LR
, HEX_REG_LR
);
321 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_LC0
, HEX_REG_LC0
);
322 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_SA0
, HEX_REG_SA0
);
323 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_LC1
, HEX_REG_LC1
);
324 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_SA1
, HEX_REG_SA1
);
325 mark_implicit_reg_write(ctx
, A_IMPLICIT_WRITES_USR
, HEX_REG_USR
);
326 mark_implicit_reg_write(ctx
, A_FPOP
, HEX_REG_USR
);
329 static void mark_implicit_pred_write(DisasContext
*ctx
, int attrib
, int pnum
)
331 if (GET_ATTRIB(ctx
->insn
->opcode
, attrib
)) {
332 ctx_log_pred_write(ctx
, pnum
);
336 static void mark_implicit_pred_writes(DisasContext
*ctx
)
338 mark_implicit_pred_write(ctx
, A_IMPLICIT_WRITES_P0
, 0);
339 mark_implicit_pred_write(ctx
, A_IMPLICIT_WRITES_P1
, 1);
340 mark_implicit_pred_write(ctx
, A_IMPLICIT_WRITES_P2
, 2);
341 mark_implicit_pred_write(ctx
, A_IMPLICIT_WRITES_P3
, 3);
344 static bool pkt_raises_exception(Packet
*pkt
)
346 if (check_for_attrib(pkt
, A_LOAD
) ||
347 check_for_attrib(pkt
, A_STORE
)) {
353 static bool need_commit(DisasContext
*ctx
)
355 Packet
*pkt
= ctx
->pkt
;
358 * If the short-circuit property is set to false, we'll always do the commit
360 if (!ctx
->short_circuit
) {
364 if (pkt_raises_exception(pkt
)) {
368 /* Registers with immutability flags require new_value */
369 for (int i
= 0; i
< ctx
->reg_log_idx
; i
++) {
370 int rnum
= ctx
->reg_log
[i
];
371 if (reg_immut_masks
[rnum
]) {
376 /* Floating point instructions are hard-coded to use new_value */
377 if (check_for_attrib(pkt
, A_FPOP
)) {
381 if (pkt
->num_insns
== 1) {
382 if (pkt
->pkt_has_hvx
) {
384 * The HVX instructions with generated helpers use
385 * pass-by-reference, so they need the read/write overlap
387 * The HVX instructions with overrides are OK.
389 if (!ctx
->has_hvx_helper
) {
397 /* Check for overlap between register reads and writes */
398 for (int i
= 0; i
< ctx
->reg_log_idx
; i
++) {
399 int rnum
= ctx
->reg_log
[i
];
400 if (test_bit(rnum
, ctx
->regs_read
)) {
405 /* Check for overlap between predicate reads and writes */
406 for (int i
= 0; i
< ctx
->preg_log_idx
; i
++) {
407 int pnum
= ctx
->preg_log
[i
];
408 if (test_bit(pnum
, ctx
->pregs_read
)) {
413 /* Check for overlap between HVX reads and writes */
414 for (int i
= 0; i
< ctx
->vreg_log_idx
; i
++) {
415 int vnum
= ctx
->vreg_log
[i
];
416 if (test_bit(vnum
, ctx
->vregs_read
)) {
420 if (!bitmap_empty(ctx
->vregs_updated_tmp
, NUM_VREGS
)) {
421 int i
= find_first_bit(ctx
->vregs_updated_tmp
, NUM_VREGS
);
422 while (i
< NUM_VREGS
) {
423 if (test_bit(i
, ctx
->vregs_read
)) {
426 i
= find_next_bit(ctx
->vregs_updated_tmp
, NUM_VREGS
, i
+ 1);
429 if (!bitmap_empty(ctx
->vregs_select
, NUM_VREGS
)) {
430 int i
= find_first_bit(ctx
->vregs_select
, NUM_VREGS
);
431 while (i
< NUM_VREGS
) {
432 if (test_bit(i
, ctx
->vregs_read
)) {
435 i
= find_next_bit(ctx
->vregs_select
, NUM_VREGS
, i
+ 1);
439 /* Check for overlap between HVX predicate reads and writes */
440 for (int i
= 0; i
< ctx
->qreg_log_idx
; i
++) {
441 int qnum
= ctx
->qreg_log
[i
];
442 if (test_bit(qnum
, ctx
->qregs_read
)) {
450 static void mark_implicit_pred_read(DisasContext
*ctx
, int attrib
, int pnum
)
452 if (GET_ATTRIB(ctx
->insn
->opcode
, attrib
)) {
453 ctx_log_pred_read(ctx
, pnum
);
457 static void mark_implicit_pred_reads(DisasContext
*ctx
)
459 mark_implicit_pred_read(ctx
, A_IMPLICIT_READS_P0
, 0);
460 mark_implicit_pred_read(ctx
, A_IMPLICIT_READS_P1
, 1);
461 mark_implicit_pred_read(ctx
, A_IMPLICIT_READS_P3
, 2);
462 mark_implicit_pred_read(ctx
, A_IMPLICIT_READS_P3
, 3);
465 static void analyze_packet(DisasContext
*ctx
)
467 Packet
*pkt
= ctx
->pkt
;
468 ctx
->has_hvx_helper
= false;
469 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
470 Insn
*insn
= &pkt
->insn
[i
];
472 if (opcode_analyze
[insn
->opcode
]) {
473 opcode_analyze
[insn
->opcode
](ctx
);
475 mark_implicit_reg_writes(ctx
);
476 mark_implicit_pred_writes(ctx
);
477 mark_implicit_pred_reads(ctx
);
480 ctx
->need_commit
= need_commit(ctx
);
483 static void gen_start_packet(DisasContext
*ctx
)
485 Packet
*pkt
= ctx
->pkt
;
486 target_ulong next_PC
= ctx
->base
.pc_next
+ pkt
->encod_pkt_size_in_bytes
;
489 /* Clear out the disassembly context */
490 ctx
->next_PC
= next_PC
;
491 ctx
->reg_log_idx
= 0;
492 bitmap_zero(ctx
->regs_written
, TOTAL_PER_THREAD_REGS
);
493 bitmap_zero(ctx
->regs_read
, TOTAL_PER_THREAD_REGS
);
494 bitmap_zero(ctx
->predicated_regs
, TOTAL_PER_THREAD_REGS
);
495 ctx
->preg_log_idx
= 0;
496 bitmap_zero(ctx
->pregs_written
, NUM_PREGS
);
497 bitmap_zero(ctx
->pregs_read
, NUM_PREGS
);
498 ctx
->future_vregs_idx
= 0;
499 ctx
->tmp_vregs_idx
= 0;
500 ctx
->vreg_log_idx
= 0;
501 bitmap_zero(ctx
->vregs_updated_tmp
, NUM_VREGS
);
502 bitmap_zero(ctx
->vregs_updated
, NUM_VREGS
);
503 bitmap_zero(ctx
->vregs_select
, NUM_VREGS
);
504 bitmap_zero(ctx
->predicated_future_vregs
, NUM_VREGS
);
505 bitmap_zero(ctx
->predicated_tmp_vregs
, NUM_VREGS
);
506 bitmap_zero(ctx
->vregs_read
, NUM_VREGS
);
507 bitmap_zero(ctx
->qregs_read
, NUM_QREGS
);
508 ctx
->qreg_log_idx
= 0;
509 for (i
= 0; i
< STORES_MAX
; i
++) {
510 ctx
->store_width
[i
] = 0;
512 ctx
->s1_store_processed
= false;
513 ctx
->pre_commit
= true;
514 for (i
= 0; i
< TOTAL_PER_THREAD_REGS
; i
++) {
515 ctx
->new_value
[i
] = NULL
;
517 for (i
= 0; i
< NUM_PREGS
; i
++) {
518 ctx
->new_pred_value
[i
] = NULL
;
524 * pregs_written is used both in the analyze phase as well as the code
525 * gen phase, so clear it again.
527 bitmap_zero(ctx
->pregs_written
, NUM_PREGS
);
530 /* Handy place to set a breakpoint before the packet executes */
531 gen_helper_debug_start_packet(tcg_env
);
534 /* Initialize the runtime state for packet semantics */
535 if (need_slot_cancelled(pkt
)) {
536 tcg_gen_movi_tl(hex_slot_cancelled
, 0);
538 ctx
->branch_taken
= NULL
;
539 if (pkt
->pkt_has_cof
) {
540 ctx
->branch_taken
= tcg_temp_new();
541 if (pkt
->pkt_has_multi_cof
) {
542 tcg_gen_movi_tl(ctx
->branch_taken
, 0);
544 if (need_next_PC(ctx
)) {
545 tcg_gen_movi_tl(hex_gpr
[HEX_REG_PC
], next_PC
);
549 ctx
->pred_written
= tcg_temp_new();
550 tcg_gen_movi_tl(ctx
->pred_written
, 0);
553 /* Preload the predicated registers into get_result_gpr(ctx, i) */
554 if (ctx
->need_commit
&&
555 !bitmap_empty(ctx
->predicated_regs
, TOTAL_PER_THREAD_REGS
)) {
556 int i
= find_first_bit(ctx
->predicated_regs
, TOTAL_PER_THREAD_REGS
);
557 while (i
< TOTAL_PER_THREAD_REGS
) {
558 tcg_gen_mov_tl(get_result_gpr(ctx
, i
), hex_gpr
[i
]);
559 i
= find_next_bit(ctx
->predicated_regs
, TOTAL_PER_THREAD_REGS
,
565 * Preload the predicated pred registers into ctx->new_pred_value[pred_num]
566 * Only endloop instructions conditionally write to pred registers
568 if (ctx
->need_commit
&& pkt
->pkt_has_endloop
) {
569 for (int i
= 0; i
< ctx
->preg_log_idx
; i
++) {
570 int pred_num
= ctx
->preg_log
[i
];
571 ctx
->new_pred_value
[pred_num
] = tcg_temp_new();
572 tcg_gen_mov_tl(ctx
->new_pred_value
[pred_num
], hex_pred
[pred_num
]);
576 /* Preload the predicated HVX registers into future_VRegs and tmp_VRegs */
577 if (!bitmap_empty(ctx
->predicated_future_vregs
, NUM_VREGS
)) {
578 int i
= find_first_bit(ctx
->predicated_future_vregs
, NUM_VREGS
);
579 while (i
< NUM_VREGS
) {
580 const intptr_t VdV_off
=
581 ctx_future_vreg_off(ctx
, i
, 1, true);
582 intptr_t src_off
= offsetof(CPUHexagonState
, VRegs
[i
]);
583 tcg_gen_gvec_mov(MO_64
, VdV_off
,
587 i
= find_next_bit(ctx
->predicated_future_vregs
, NUM_VREGS
, i
+ 1);
590 if (!bitmap_empty(ctx
->predicated_tmp_vregs
, NUM_VREGS
)) {
591 int i
= find_first_bit(ctx
->predicated_tmp_vregs
, NUM_VREGS
);
592 while (i
< NUM_VREGS
) {
593 const intptr_t VdV_off
=
594 ctx_tmp_vreg_off(ctx
, i
, 1, true);
595 intptr_t src_off
= offsetof(CPUHexagonState
, VRegs
[i
]);
596 tcg_gen_gvec_mov(MO_64
, VdV_off
,
600 i
= find_next_bit(ctx
->predicated_tmp_vregs
, NUM_VREGS
, i
+ 1);
605 bool is_gather_store_insn(DisasContext
*ctx
)
607 Packet
*pkt
= ctx
->pkt
;
608 Insn
*insn
= ctx
->insn
;
609 if (GET_ATTRIB(insn
->opcode
, A_CVI_NEW
) &&
610 insn
->new_value_producer_slot
== 1) {
611 /* Look for gather instruction */
612 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
613 Insn
*in
= &pkt
->insn
[i
];
614 if (GET_ATTRIB(in
->opcode
, A_CVI_GATHER
) && in
->slot
== 1) {
622 static void mark_store_width(DisasContext
*ctx
)
624 uint16_t opcode
= ctx
->insn
->opcode
;
625 uint32_t slot
= ctx
->insn
->slot
;
628 if (GET_ATTRIB(opcode
, A_SCALAR_STORE
)) {
629 if (GET_ATTRIB(opcode
, A_MEMSIZE_0B
)) {
632 if (GET_ATTRIB(opcode
, A_MEMSIZE_1B
)) {
635 if (GET_ATTRIB(opcode
, A_MEMSIZE_2B
)) {
638 if (GET_ATTRIB(opcode
, A_MEMSIZE_4B
)) {
641 if (GET_ATTRIB(opcode
, A_MEMSIZE_8B
)) {
644 tcg_debug_assert(is_power_of_2(width
));
645 ctx
->store_width
[slot
] = width
;
649 static void gen_insn(DisasContext
*ctx
)
651 if (ctx
->insn
->generate
) {
652 ctx
->insn
->generate(ctx
);
653 mark_store_width(ctx
);
655 gen_exception_end_tb(ctx
, HEX_EXCP_INVALID_OPCODE
);
660 * Helpers for generating the packet commit
662 static void gen_reg_writes(DisasContext
*ctx
)
666 /* Early exit if not needed */
667 if (!ctx
->need_commit
) {
671 for (i
= 0; i
< ctx
->reg_log_idx
; i
++) {
672 int reg_num
= ctx
->reg_log
[i
];
674 tcg_gen_mov_tl(hex_gpr
[reg_num
], get_result_gpr(ctx
, reg_num
));
677 * ctx->is_tight_loop is set when SA0 points to the beginning of the TB.
678 * If we write to SA0, we have to turn off tight loop handling.
680 if (reg_num
== HEX_REG_SA0
) {
681 ctx
->is_tight_loop
= false;
686 static void gen_pred_writes(DisasContext
*ctx
)
688 /* Early exit if not needed or the log is empty */
689 if (!ctx
->need_commit
|| !ctx
->preg_log_idx
) {
693 for (int i
= 0; i
< ctx
->preg_log_idx
; i
++) {
694 int pred_num
= ctx
->preg_log
[i
];
695 tcg_gen_mov_tl(hex_pred
[pred_num
], ctx
->new_pred_value
[pred_num
]);
699 static void gen_check_store_width(DisasContext
*ctx
, int slot_num
)
702 TCGv slot
= tcg_constant_tl(slot_num
);
703 TCGv check
= tcg_constant_tl(ctx
->store_width
[slot_num
]);
704 gen_helper_debug_check_store_width(tcg_env
, slot
, check
);
708 static bool slot_is_predicated(Packet
*pkt
, int slot_num
)
710 for (int i
= 0; i
< pkt
->num_insns
; i
++) {
711 if (pkt
->insn
[i
].slot
== slot_num
) {
712 return GET_ATTRIB(pkt
->insn
[i
].opcode
, A_CONDEXEC
);
715 /* If we get to here, we didn't find an instruction in the requested slot */
716 g_assert_not_reached();
719 void process_store(DisasContext
*ctx
, int slot_num
)
721 bool is_predicated
= slot_is_predicated(ctx
->pkt
, slot_num
);
722 TCGLabel
*label_end
= NULL
;
725 * We may have already processed this store
726 * See CHECK_NOSHUF in macros.h
728 if (slot_num
== 1 && ctx
->s1_store_processed
) {
731 ctx
->s1_store_processed
= true;
734 TCGv cancelled
= tcg_temp_new();
735 label_end
= gen_new_label();
737 /* Don't do anything if the slot was cancelled */
738 tcg_gen_extract_tl(cancelled
, hex_slot_cancelled
, slot_num
, 1);
739 tcg_gen_brcondi_tl(TCG_COND_NE
, cancelled
, 0, label_end
);
742 TCGv address
= tcg_temp_new();
743 tcg_gen_mov_tl(address
, hex_store_addr
[slot_num
]);
746 * If we know the width from the DisasContext, we can
747 * generate much cleaner code.
748 * Unfortunately, not all instructions execute the fSTORE
749 * macro during code generation. Anything that uses the
750 * generic helper will have this problem. Instructions
751 * that use fWRAP to generate proper TCG code will be OK.
753 switch (ctx
->store_width
[slot_num
]) {
755 gen_check_store_width(ctx
, slot_num
);
756 tcg_gen_qemu_st_tl(hex_store_val32
[slot_num
],
757 hex_store_addr
[slot_num
],
758 ctx
->mem_idx
, MO_UB
);
761 gen_check_store_width(ctx
, slot_num
);
762 tcg_gen_qemu_st_tl(hex_store_val32
[slot_num
],
763 hex_store_addr
[slot_num
],
764 ctx
->mem_idx
, MO_TEUW
);
767 gen_check_store_width(ctx
, slot_num
);
768 tcg_gen_qemu_st_tl(hex_store_val32
[slot_num
],
769 hex_store_addr
[slot_num
],
770 ctx
->mem_idx
, MO_TEUL
);
773 gen_check_store_width(ctx
, slot_num
);
774 tcg_gen_qemu_st_i64(hex_store_val64
[slot_num
],
775 hex_store_addr
[slot_num
],
776 ctx
->mem_idx
, MO_TEUQ
);
781 * If we get to here, we don't know the width at
782 * TCG generation time, we'll use a helper to
783 * avoid branching based on the width at runtime.
785 TCGv slot
= tcg_constant_tl(slot_num
);
786 gen_helper_commit_store(tcg_env
, slot
);
791 gen_set_label(label_end
);
795 static void process_store_log(DisasContext
*ctx
)
798 * When a packet has two stores, the hardware processes
799 * slot 1 and then slot 0. This will be important when
800 * the memory accesses overlap.
802 Packet
*pkt
= ctx
->pkt
;
803 if (pkt
->pkt_has_store_s1
) {
804 g_assert(!pkt
->pkt_has_dczeroa
);
805 process_store(ctx
, 1);
807 if (pkt
->pkt_has_store_s0
) {
808 g_assert(!pkt
->pkt_has_dczeroa
);
809 process_store(ctx
, 0);
813 /* Zero out a 32-bit cache line */
814 static void process_dczeroa(DisasContext
*ctx
)
816 if (ctx
->pkt
->pkt_has_dczeroa
) {
817 /* Store 32 bytes of zero starting at (addr & ~0x1f) */
818 TCGv addr
= tcg_temp_new();
819 TCGv_i64 zero
= tcg_constant_i64(0);
821 tcg_gen_andi_tl(addr
, ctx
->dczero_addr
, ~0x1f);
822 tcg_gen_qemu_st_i64(zero
, addr
, ctx
->mem_idx
, MO_UQ
);
823 tcg_gen_addi_tl(addr
, addr
, 8);
824 tcg_gen_qemu_st_i64(zero
, addr
, ctx
->mem_idx
, MO_UQ
);
825 tcg_gen_addi_tl(addr
, addr
, 8);
826 tcg_gen_qemu_st_i64(zero
, addr
, ctx
->mem_idx
, MO_UQ
);
827 tcg_gen_addi_tl(addr
, addr
, 8);
828 tcg_gen_qemu_st_i64(zero
, addr
, ctx
->mem_idx
, MO_UQ
);
832 static bool pkt_has_hvx_store(Packet
*pkt
)
835 for (i
= 0; i
< pkt
->num_insns
; i
++) {
836 int opcode
= pkt
->insn
[i
].opcode
;
837 if (GET_ATTRIB(opcode
, A_CVI
) && GET_ATTRIB(opcode
, A_STORE
)) {
844 static void gen_commit_hvx(DisasContext
*ctx
)
848 /* Early exit if not needed */
849 if (!ctx
->need_commit
) {
850 g_assert(!pkt_has_hvx_store(ctx
->pkt
));
855 * for (i = 0; i < ctx->vreg_log_idx; i++) {
856 * int rnum = ctx->vreg_log[i];
857 * env->VRegs[rnum] = env->future_VRegs[rnum];
860 for (i
= 0; i
< ctx
->vreg_log_idx
; i
++) {
861 int rnum
= ctx
->vreg_log
[i
];
862 intptr_t dstoff
= offsetof(CPUHexagonState
, VRegs
[rnum
]);
863 intptr_t srcoff
= ctx_future_vreg_off(ctx
, rnum
, 1, false);
864 size_t size
= sizeof(MMVector
);
866 tcg_gen_gvec_mov(MO_64
, dstoff
, srcoff
, size
, size
);
870 * for (i = 0; i < ctx->qreg_log_idx; i++) {
871 * int rnum = ctx->qreg_log[i];
872 * env->QRegs[rnum] = env->future_QRegs[rnum];
875 for (i
= 0; i
< ctx
->qreg_log_idx
; i
++) {
876 int rnum
= ctx
->qreg_log
[i
];
877 intptr_t dstoff
= offsetof(CPUHexagonState
, QRegs
[rnum
]);
878 intptr_t srcoff
= offsetof(CPUHexagonState
, future_QRegs
[rnum
]);
879 size_t size
= sizeof(MMQReg
);
881 tcg_gen_gvec_mov(MO_64
, dstoff
, srcoff
, size
, size
);
884 if (pkt_has_hvx_store(ctx
->pkt
)) {
885 gen_helper_commit_hvx_stores(tcg_env
);
889 static void update_exec_counters(DisasContext
*ctx
)
891 Packet
*pkt
= ctx
->pkt
;
892 int num_insns
= pkt
->num_insns
;
893 int num_real_insns
= 0;
894 int num_hvx_insns
= 0;
896 for (int i
= 0; i
< num_insns
; i
++) {
897 if (!pkt
->insn
[i
].is_endloop
&&
898 !pkt
->insn
[i
].part1
&&
899 !GET_ATTRIB(pkt
->insn
[i
].opcode
, A_IT_NOP
)) {
902 if (GET_ATTRIB(pkt
->insn
[i
].opcode
, A_CVI
)) {
908 ctx
->num_insns
+= num_real_insns
;
909 ctx
->num_hvx_insns
+= num_hvx_insns
;
912 static void gen_commit_packet(DisasContext
*ctx
)
915 * If there is more than one store in a packet, make sure they are all OK
916 * before proceeding with the rest of the packet commit.
918 * dczeroa has to be the only store operation in the packet, so we go
919 * ahead and process that first.
921 * When there is an HVX store, there can also be a scalar store in either
922 * slot 0 or slot1, so we create a mask for the helper to indicate what
925 * When there are two scalar stores, we probe the one in slot 0.
927 * Note that we don't call the probe helper for packets with only one
928 * store. Therefore, we call process_store_log before anything else
929 * involved in committing the packet.
931 Packet
*pkt
= ctx
->pkt
;
932 bool has_store_s0
= pkt
->pkt_has_store_s0
;
933 bool has_store_s1
= (pkt
->pkt_has_store_s1
&& !ctx
->s1_store_processed
);
934 bool has_hvx_store
= pkt_has_hvx_store(pkt
);
935 if (pkt
->pkt_has_dczeroa
) {
937 * The dczeroa will be the store in slot 0, check that we don't have
938 * a store in slot 1 or an HVX store.
940 g_assert(!has_store_s1
&& !has_hvx_store
);
941 process_dczeroa(ctx
);
942 } else if (has_hvx_store
) {
943 if (!has_store_s0
&& !has_store_s1
) {
944 TCGv mem_idx
= tcg_constant_tl(ctx
->mem_idx
);
945 gen_helper_probe_hvx_stores(tcg_env
, mem_idx
);
951 FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
, HAS_ST0
, 1);
955 FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
, HAS_ST1
, 1);
959 FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
,
962 if (has_store_s0
&& slot_is_predicated(pkt
, 0)) {
964 FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
,
967 if (has_store_s1
&& slot_is_predicated(pkt
, 1)) {
969 FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
,
972 mask
= FIELD_DP32(mask
, PROBE_PKT_SCALAR_HVX_STORES
, MMU_IDX
,
974 gen_helper_probe_pkt_scalar_hvx_stores(tcg_env
,
975 tcg_constant_tl(mask
));
977 } else if (has_store_s0
&& has_store_s1
) {
979 * process_store_log will execute the slot 1 store first,
980 * so we only have to probe the store in slot 0
984 FIELD_DP32(args
, PROBE_PKT_SCALAR_STORE_S0
, MMU_IDX
, ctx
->mem_idx
);
985 if (slot_is_predicated(pkt
, 0)) {
987 FIELD_DP32(args
, PROBE_PKT_SCALAR_STORE_S0
, IS_PREDICATED
, 1);
989 TCGv args_tcgv
= tcg_constant_tl(args
);
990 gen_helper_probe_pkt_scalar_store_s0(tcg_env
, args_tcgv
);
993 process_store_log(ctx
);
996 gen_pred_writes(ctx
);
997 if (pkt
->pkt_has_hvx
) {
1000 update_exec_counters(ctx
);
1003 tcg_constant_tl(pkt
->pkt_has_store_s0
&& !pkt
->pkt_has_dczeroa
);
1005 tcg_constant_tl(pkt
->pkt_has_store_s1
&& !pkt
->pkt_has_dczeroa
);
1007 /* Handy place to set a breakpoint at the end of execution */
1008 gen_helper_debug_commit_end(tcg_env
, tcg_constant_tl(ctx
->pkt
->pc
),
1009 ctx
->pred_written
, has_st0
, has_st1
);
1012 if (pkt
->vhist_insn
!= NULL
) {
1013 ctx
->pre_commit
= false;
1014 ctx
->insn
= pkt
->vhist_insn
;
1015 pkt
->vhist_insn
->generate(ctx
);
1018 if (pkt
->pkt_has_cof
) {
1023 static void decode_and_translate_packet(CPUHexagonState
*env
, DisasContext
*ctx
)
1025 uint32_t words
[PACKET_WORDS_MAX
];
1030 nwords
= read_packet_words(env
, ctx
, words
);
1032 gen_exception_end_tb(ctx
, HEX_EXCP_INVALID_PACKET
);
1036 if (decode_packet(nwords
, words
, &pkt
, false) > 0) {
1037 pkt
.pc
= ctx
->base
.pc_next
;
1038 HEX_DEBUG_PRINT_PKT(&pkt
);
1040 gen_start_packet(ctx
);
1041 for (i
= 0; i
< pkt
.num_insns
; i
++) {
1042 ctx
->insn
= &pkt
.insn
[i
];
1045 gen_commit_packet(ctx
);
1046 ctx
->base
.pc_next
+= pkt
.encod_pkt_size_in_bytes
;
1048 gen_exception_end_tb(ctx
, HEX_EXCP_INVALID_PACKET
);
1052 static void hexagon_tr_init_disas_context(DisasContextBase
*dcbase
,
1055 DisasContext
*ctx
= container_of(dcbase
, DisasContext
, base
);
1056 HexagonCPU
*hex_cpu
= env_archcpu(cpu_env(cs
));
1057 uint32_t hex_flags
= dcbase
->tb
->flags
;
1059 ctx
->mem_idx
= MMU_USER_IDX
;
1060 ctx
->num_packets
= 0;
1062 ctx
->num_hvx_insns
= 0;
1063 ctx
->branch_cond
= TCG_COND_NEVER
;
1064 ctx
->is_tight_loop
= FIELD_EX32(hex_flags
, TB_FLAGS
, IS_TIGHT_LOOP
);
1065 ctx
->short_circuit
= hex_cpu
->short_circuit
;
1068 static void hexagon_tr_tb_start(DisasContextBase
*db
, CPUState
*cpu
)
1072 static void hexagon_tr_insn_start(DisasContextBase
*dcbase
, CPUState
*cpu
)
1074 DisasContext
*ctx
= container_of(dcbase
, DisasContext
, base
);
1076 tcg_gen_insn_start(ctx
->base
.pc_next
);
1079 static bool pkt_crosses_page(CPUHexagonState
*env
, DisasContext
*ctx
)
1081 target_ulong page_start
= ctx
->base
.pc_first
& TARGET_PAGE_MASK
;
1082 bool found_end
= false;
1085 for (nwords
= 0; !found_end
&& nwords
< PACKET_WORDS_MAX
; nwords
++) {
1086 uint32_t word
= cpu_ldl_code(env
,
1087 ctx
->base
.pc_next
+ nwords
* sizeof(uint32_t));
1088 found_end
= is_packet_end(word
);
1090 uint32_t next_ptr
= ctx
->base
.pc_next
+ nwords
* sizeof(uint32_t);
1091 return found_end
&& next_ptr
- page_start
>= TARGET_PAGE_SIZE
;
1094 static void hexagon_tr_translate_packet(DisasContextBase
*dcbase
, CPUState
*cpu
)
1096 DisasContext
*ctx
= container_of(dcbase
, DisasContext
, base
);
1097 CPUHexagonState
*env
= cpu_env(cpu
);
1099 decode_and_translate_packet(env
, ctx
);
1101 if (ctx
->base
.is_jmp
== DISAS_NEXT
) {
1102 target_ulong page_start
= ctx
->base
.pc_first
& TARGET_PAGE_MASK
;
1103 target_ulong bytes_max
= PACKET_WORDS_MAX
* sizeof(target_ulong
);
1105 if (ctx
->base
.pc_next
- page_start
>= TARGET_PAGE_SIZE
||
1106 (ctx
->base
.pc_next
- page_start
>= TARGET_PAGE_SIZE
- bytes_max
&&
1107 pkt_crosses_page(env
, ctx
))) {
1108 ctx
->base
.is_jmp
= DISAS_TOO_MANY
;
1112 * The CPU log is used to compare against LLDB single stepping,
1113 * so end the TLB after every packet.
1115 HexagonCPU
*hex_cpu
= env_archcpu(env
);
1116 if (hex_cpu
->lldb_compat
&& qemu_loglevel_mask(CPU_LOG_TB_CPU
)) {
1117 ctx
->base
.is_jmp
= DISAS_TOO_MANY
;
1122 static void hexagon_tr_tb_stop(DisasContextBase
*dcbase
, CPUState
*cpu
)
1124 DisasContext
*ctx
= container_of(dcbase
, DisasContext
, base
);
1126 switch (ctx
->base
.is_jmp
) {
1127 case DISAS_TOO_MANY
:
1128 gen_exec_counters(ctx
);
1129 tcg_gen_movi_tl(hex_gpr
[HEX_REG_PC
], ctx
->base
.pc_next
);
1130 tcg_gen_exit_tb(NULL
, 0);
1132 case DISAS_NORETURN
:
1135 g_assert_not_reached();
1139 static void hexagon_tr_disas_log(const DisasContextBase
*dcbase
,
1140 CPUState
*cpu
, FILE *logfile
)
1142 fprintf(logfile
, "IN: %s\n", lookup_symbol(dcbase
->pc_first
));
1143 target_disas(logfile
, cpu
, dcbase
->pc_first
, dcbase
->tb
->size
);
1147 static const TranslatorOps hexagon_tr_ops
= {
1148 .init_disas_context
= hexagon_tr_init_disas_context
,
1149 .tb_start
= hexagon_tr_tb_start
,
1150 .insn_start
= hexagon_tr_insn_start
,
1151 .translate_insn
= hexagon_tr_translate_packet
,
1152 .tb_stop
= hexagon_tr_tb_stop
,
1153 .disas_log
= hexagon_tr_disas_log
,
1156 void gen_intermediate_code(CPUState
*cs
, TranslationBlock
*tb
, int *max_insns
,
1157 target_ulong pc
, void *host_pc
)
1161 translator_loop(cs
, tb
, max_insns
, pc
, host_pc
,
1162 &hexagon_tr_ops
, &ctx
.base
);
1166 static char reg_written_names
[TOTAL_PER_THREAD_REGS
][NAME_LEN
];
1167 static char store_addr_names
[STORES_MAX
][NAME_LEN
];
1168 static char store_width_names
[STORES_MAX
][NAME_LEN
];
1169 static char store_val32_names
[STORES_MAX
][NAME_LEN
];
1170 static char store_val64_names
[STORES_MAX
][NAME_LEN
];
1171 static char vstore_addr_names
[VSTORES_MAX
][NAME_LEN
];
1172 static char vstore_size_names
[VSTORES_MAX
][NAME_LEN
];
1173 static char vstore_pending_names
[VSTORES_MAX
][NAME_LEN
];
1175 void hexagon_translate_init(void)
1181 for (i
= 0; i
< TOTAL_PER_THREAD_REGS
; i
++) {
1182 hex_gpr
[i
] = tcg_global_mem_new(tcg_env
,
1183 offsetof(CPUHexagonState
, gpr
[i
]),
1184 hexagon_regnames
[i
]);
1187 snprintf(reg_written_names
[i
], NAME_LEN
, "reg_written_%s",
1188 hexagon_regnames
[i
]);
1189 hex_reg_written
[i
] = tcg_global_mem_new(tcg_env
,
1190 offsetof(CPUHexagonState
, reg_written
[i
]),
1191 reg_written_names
[i
]);
1194 hex_new_value_usr
= tcg_global_mem_new(tcg_env
,
1195 offsetof(CPUHexagonState
, new_value_usr
), "new_value_usr");
1197 for (i
= 0; i
< NUM_PREGS
; i
++) {
1198 hex_pred
[i
] = tcg_global_mem_new(tcg_env
,
1199 offsetof(CPUHexagonState
, pred
[i
]),
1200 hexagon_prednames
[i
]);
1202 hex_slot_cancelled
= tcg_global_mem_new(tcg_env
,
1203 offsetof(CPUHexagonState
, slot_cancelled
), "slot_cancelled");
1204 hex_llsc_addr
= tcg_global_mem_new(tcg_env
,
1205 offsetof(CPUHexagonState
, llsc_addr
), "llsc_addr");
1206 hex_llsc_val
= tcg_global_mem_new(tcg_env
,
1207 offsetof(CPUHexagonState
, llsc_val
), "llsc_val");
1208 hex_llsc_val_i64
= tcg_global_mem_new_i64(tcg_env
,
1209 offsetof(CPUHexagonState
, llsc_val_i64
), "llsc_val_i64");
1210 for (i
= 0; i
< STORES_MAX
; i
++) {
1211 snprintf(store_addr_names
[i
], NAME_LEN
, "store_addr_%d", i
);
1212 hex_store_addr
[i
] = tcg_global_mem_new(tcg_env
,
1213 offsetof(CPUHexagonState
, mem_log_stores
[i
].va
),
1214 store_addr_names
[i
]);
1216 snprintf(store_width_names
[i
], NAME_LEN
, "store_width_%d", i
);
1217 hex_store_width
[i
] = tcg_global_mem_new(tcg_env
,
1218 offsetof(CPUHexagonState
, mem_log_stores
[i
].width
),
1219 store_width_names
[i
]);
1221 snprintf(store_val32_names
[i
], NAME_LEN
, "store_val32_%d", i
);
1222 hex_store_val32
[i
] = tcg_global_mem_new(tcg_env
,
1223 offsetof(CPUHexagonState
, mem_log_stores
[i
].data32
),
1224 store_val32_names
[i
]);
1226 snprintf(store_val64_names
[i
], NAME_LEN
, "store_val64_%d", i
);
1227 hex_store_val64
[i
] = tcg_global_mem_new_i64(tcg_env
,
1228 offsetof(CPUHexagonState
, mem_log_stores
[i
].data64
),
1229 store_val64_names
[i
]);
1231 for (int i
= 0; i
< VSTORES_MAX
; i
++) {
1232 snprintf(vstore_addr_names
[i
], NAME_LEN
, "vstore_addr_%d", i
);
1233 hex_vstore_addr
[i
] = tcg_global_mem_new(tcg_env
,
1234 offsetof(CPUHexagonState
, vstore
[i
].va
),
1235 vstore_addr_names
[i
]);
1237 snprintf(vstore_size_names
[i
], NAME_LEN
, "vstore_size_%d", i
);
1238 hex_vstore_size
[i
] = tcg_global_mem_new(tcg_env
,
1239 offsetof(CPUHexagonState
, vstore
[i
].size
),
1240 vstore_size_names
[i
]);
1242 snprintf(vstore_pending_names
[i
], NAME_LEN
, "vstore_pending_%d", i
);
1243 hex_vstore_pending
[i
] = tcg_global_mem_new(tcg_env
,
1244 offsetof(CPUHexagonState
, vstore_pending
[i
]),
1245 vstore_pending_names
[i
]);