tcg: Reduce copies for plugin_gen_mem_callbacks
[qemu/ar7.git] / tcg / tcg-op-ldst.c
blobb695d2954ed51ecbd016431610f3e3fd33279db6
1 /*
2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "qemu/osdep.h"
26 #include "exec/exec-all.h"
27 #include "tcg/tcg.h"
28 #include "tcg/tcg-temp-internal.h"
29 #include "tcg/tcg-op.h"
30 #include "tcg/tcg-mo.h"
31 #include "exec/plugin-gen.h"
32 #include "tcg-internal.h"
35 static inline MemOp tcg_canonicalize_memop(MemOp op, bool is64, bool st)
37 /* Trigger the asserts within as early as possible. */
38 unsigned a_bits = get_alignment_bits(op);
40 /* Prefer MO_ALIGN+MO_XX over MO_ALIGN_XX+MO_XX */
41 if (a_bits == (op & MO_SIZE)) {
42 op = (op & ~MO_AMASK) | MO_ALIGN;
45 switch (op & MO_SIZE) {
46 case MO_8:
47 op &= ~MO_BSWAP;
48 break;
49 case MO_16:
50 break;
51 case MO_32:
52 if (!is64) {
53 op &= ~MO_SIGN;
55 break;
56 case MO_64:
57 if (is64) {
58 op &= ~MO_SIGN;
59 break;
61 /* fall through */
62 default:
63 g_assert_not_reached();
65 if (st) {
66 op &= ~MO_SIGN;
68 return op;
71 static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
72 MemOp memop, TCGArg idx)
74 MemOpIdx oi = make_memop_idx(memop, idx);
75 #if TARGET_LONG_BITS == 32
76 tcg_gen_op3i_i32(opc, val, addr, oi);
77 #else
78 if (TCG_TARGET_REG_BITS == 32) {
79 tcg_gen_op4i_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr), oi);
80 } else {
81 tcg_gen_op3(opc, tcgv_i32_arg(val), tcgv_i64_arg(addr), oi);
83 #endif
86 static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
87 MemOp memop, TCGArg idx)
89 MemOpIdx oi = make_memop_idx(memop, idx);
90 #if TARGET_LONG_BITS == 32
91 if (TCG_TARGET_REG_BITS == 32) {
92 tcg_gen_op4i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val), addr, oi);
93 } else {
94 tcg_gen_op3(opc, tcgv_i64_arg(val), tcgv_i32_arg(addr), oi);
96 #else
97 if (TCG_TARGET_REG_BITS == 32) {
98 tcg_gen_op5i_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
99 TCGV_LOW(addr), TCGV_HIGH(addr), oi);
100 } else {
101 tcg_gen_op3i_i64(opc, val, addr, oi);
103 #endif
106 static void tcg_gen_req_mo(TCGBar type)
108 #ifdef TCG_GUEST_DEFAULT_MO
109 type &= TCG_GUEST_DEFAULT_MO;
110 #endif
111 type &= ~TCG_TARGET_DEFAULT_MO;
112 if (type) {
113 tcg_gen_mb(type | TCG_BAR_SC);
117 /* Only required for loads, where value might overlap addr. */
118 static TCGv plugin_maybe_preserve_addr(TCGv vaddr)
120 #ifdef CONFIG_PLUGIN
121 if (tcg_ctx->plugin_insn != NULL) {
122 /* Save a copy of the vaddr for use after a load. */
123 TCGv temp = tcg_temp_new();
124 tcg_gen_mov_tl(temp, vaddr);
125 return temp;
127 #endif
128 return NULL;
131 static void
132 plugin_gen_mem_callbacks(TCGv copy_addr, TCGv orig_addr, MemOpIdx oi,
133 enum qemu_plugin_mem_rw rw)
135 #ifdef CONFIG_PLUGIN
136 if (tcg_ctx->plugin_insn != NULL) {
137 qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
138 plugin_gen_empty_mem_callback(copy_addr ? : orig_addr, info);
139 if (copy_addr) {
140 tcg_temp_free(copy_addr);
143 #endif
146 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
148 MemOp orig_memop;
149 MemOpIdx oi;
150 TCGv copy_addr;
152 tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
153 memop = tcg_canonicalize_memop(memop, 0, 0);
154 oi = make_memop_idx(memop, idx);
156 orig_memop = memop;
157 if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
158 memop &= ~MO_BSWAP;
159 /* The bswap primitive benefits from zero-extended input. */
160 if ((memop & MO_SSIZE) == MO_SW) {
161 memop &= ~MO_SIGN;
165 copy_addr = plugin_maybe_preserve_addr(addr);
166 gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
167 plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
169 if ((orig_memop ^ memop) & MO_BSWAP) {
170 switch (orig_memop & MO_SIZE) {
171 case MO_16:
172 tcg_gen_bswap16_i32(val, val, (orig_memop & MO_SIGN
173 ? TCG_BSWAP_IZ | TCG_BSWAP_OS
174 : TCG_BSWAP_IZ | TCG_BSWAP_OZ));
175 break;
176 case MO_32:
177 tcg_gen_bswap32_i32(val, val);
178 break;
179 default:
180 g_assert_not_reached();
185 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, MemOp memop)
187 TCGv_i32 swap = NULL;
188 MemOpIdx oi;
190 tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
191 memop = tcg_canonicalize_memop(memop, 0, 1);
192 oi = make_memop_idx(memop, idx);
194 if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
195 swap = tcg_temp_ebb_new_i32();
196 switch (memop & MO_SIZE) {
197 case MO_16:
198 tcg_gen_bswap16_i32(swap, val, 0);
199 break;
200 case MO_32:
201 tcg_gen_bswap32_i32(swap, val);
202 break;
203 default:
204 g_assert_not_reached();
206 val = swap;
207 memop &= ~MO_BSWAP;
210 if (TCG_TARGET_HAS_qemu_st8_i32 && (memop & MO_SIZE) == MO_8) {
211 gen_ldst_i32(INDEX_op_qemu_st8_i32, val, addr, memop, idx);
212 } else {
213 gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
215 plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
217 if (swap) {
218 tcg_temp_free_i32(swap);
222 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
224 MemOp orig_memop;
225 MemOpIdx oi;
226 TCGv copy_addr;
228 if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
229 tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
230 if (memop & MO_SIGN) {
231 tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
232 } else {
233 tcg_gen_movi_i32(TCGV_HIGH(val), 0);
235 return;
238 tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
239 memop = tcg_canonicalize_memop(memop, 1, 0);
240 oi = make_memop_idx(memop, idx);
242 orig_memop = memop;
243 if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
244 memop &= ~MO_BSWAP;
245 /* The bswap primitive benefits from zero-extended input. */
246 if ((memop & MO_SIGN) && (memop & MO_SIZE) < MO_64) {
247 memop &= ~MO_SIGN;
251 copy_addr = plugin_maybe_preserve_addr(addr);
252 gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
253 plugin_gen_mem_callbacks(copy_addr, addr, oi, QEMU_PLUGIN_MEM_R);
255 if ((orig_memop ^ memop) & MO_BSWAP) {
256 int flags = (orig_memop & MO_SIGN
257 ? TCG_BSWAP_IZ | TCG_BSWAP_OS
258 : TCG_BSWAP_IZ | TCG_BSWAP_OZ);
259 switch (orig_memop & MO_SIZE) {
260 case MO_16:
261 tcg_gen_bswap16_i64(val, val, flags);
262 break;
263 case MO_32:
264 tcg_gen_bswap32_i64(val, val, flags);
265 break;
266 case MO_64:
267 tcg_gen_bswap64_i64(val, val);
268 break;
269 default:
270 g_assert_not_reached();
275 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, MemOp memop)
277 TCGv_i64 swap = NULL;
278 MemOpIdx oi;
280 if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
281 tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
282 return;
285 tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
286 memop = tcg_canonicalize_memop(memop, 1, 1);
287 oi = make_memop_idx(memop, idx);
289 if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
290 swap = tcg_temp_ebb_new_i64();
291 switch (memop & MO_SIZE) {
292 case MO_16:
293 tcg_gen_bswap16_i64(swap, val, 0);
294 break;
295 case MO_32:
296 tcg_gen_bswap32_i64(swap, val, 0);
297 break;
298 case MO_64:
299 tcg_gen_bswap64_i64(swap, val);
300 break;
301 default:
302 g_assert_not_reached();
304 val = swap;
305 memop &= ~MO_BSWAP;
308 gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
309 plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
311 if (swap) {
312 tcg_temp_free_i64(swap);
317 * Return true if @mop, without knowledge of the pointer alignment,
318 * does not require 16-byte atomicity, and it would be adventagous
319 * to avoid a call to a helper function.
321 static bool use_two_i64_for_i128(MemOp mop)
323 #ifdef CONFIG_SOFTMMU
324 /* Two softmmu tlb lookups is larger than one function call. */
325 return false;
326 #else
328 * For user-only, two 64-bit operations may well be smaller than a call.
329 * Determine if that would be legal for the requested atomicity.
331 switch (mop & MO_ATOM_MASK) {
332 case MO_ATOM_NONE:
333 case MO_ATOM_IFALIGN_PAIR:
334 return true;
335 case MO_ATOM_IFALIGN:
336 case MO_ATOM_SUBALIGN:
337 case MO_ATOM_WITHIN16:
338 case MO_ATOM_WITHIN16_PAIR:
339 /* In a serialized context, no atomicity is required. */
340 return !(tcg_ctx->gen_tb->cflags & CF_PARALLEL);
341 default:
342 g_assert_not_reached();
344 #endif
347 static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
349 MemOp mop_1 = orig, mop_2;
351 tcg_debug_assert((orig & MO_SIZE) == MO_128);
352 tcg_debug_assert((orig & MO_SIGN) == 0);
354 /* Reduce the size to 64-bit. */
355 mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
357 /* Retain the alignment constraints of the original. */
358 switch (orig & MO_AMASK) {
359 case MO_UNALN:
360 case MO_ALIGN_2:
361 case MO_ALIGN_4:
362 mop_2 = mop_1;
363 break;
364 case MO_ALIGN_8:
365 /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
366 mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
367 mop_2 = mop_1;
368 break;
369 case MO_ALIGN:
370 /* Second has 8-byte alignment; first has 16-byte alignment. */
371 mop_2 = mop_1;
372 mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
373 break;
374 case MO_ALIGN_16:
375 case MO_ALIGN_32:
376 case MO_ALIGN_64:
377 /* Second has 8-byte alignment; first retains original. */
378 mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
379 break;
380 default:
381 g_assert_not_reached();
384 /* Use a memory ordering implemented by the host. */
385 if ((orig & MO_BSWAP) && !tcg_target_has_memory_bswap(mop_1)) {
386 mop_1 &= ~MO_BSWAP;
387 mop_2 &= ~MO_BSWAP;
390 ret[0] = mop_1;
391 ret[1] = mop_2;
394 #if TARGET_LONG_BITS == 64
395 #define tcg_temp_ebb_new tcg_temp_ebb_new_i64
396 #else
397 #define tcg_temp_ebb_new tcg_temp_ebb_new_i32
398 #endif
400 static TCGv_i64 maybe_extend_addr64(TCGv addr)
402 #if TARGET_LONG_BITS == 32
403 TCGv_i64 a64 = tcg_temp_ebb_new_i64();
404 tcg_gen_extu_i32_i64(a64, addr);
405 return a64;
406 #else
407 return addr;
408 #endif
411 static void maybe_free_addr64(TCGv_i64 a64)
413 #if TARGET_LONG_BITS == 32
414 tcg_temp_free_i64(a64);
415 #endif
418 void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
420 const MemOpIdx oi = make_memop_idx(memop, idx);
422 tcg_debug_assert((memop & MO_SIZE) == MO_128);
423 tcg_debug_assert((memop & MO_SIGN) == 0);
425 tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
427 /* TODO: For now, force 32-bit hosts to use the helper. */
428 if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
429 TCGv_i64 lo, hi;
430 TCGArg addr_arg;
431 MemOpIdx adj_oi;
432 bool need_bswap = false;
434 if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
435 lo = TCGV128_HIGH(val);
436 hi = TCGV128_LOW(val);
437 adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
438 need_bswap = true;
439 } else {
440 lo = TCGV128_LOW(val);
441 hi = TCGV128_HIGH(val);
442 adj_oi = oi;
445 #if TARGET_LONG_BITS == 32
446 addr_arg = tcgv_i32_arg(addr);
447 #else
448 addr_arg = tcgv_i64_arg(addr);
449 #endif
450 tcg_gen_op4ii_i64(INDEX_op_qemu_ld_i128, lo, hi, addr_arg, adj_oi);
452 if (need_bswap) {
453 tcg_gen_bswap64_i64(lo, lo);
454 tcg_gen_bswap64_i64(hi, hi);
456 } else if (use_two_i64_for_i128(memop)) {
457 MemOp mop[2];
458 TCGv addr_p8;
459 TCGv_i64 x, y;
461 canonicalize_memop_i128_as_i64(mop, memop);
464 * Since there are no global TCGv_i128, there is no visible state
465 * changed if the second load faults. Load directly into the two
466 * subwords.
468 if ((memop & MO_BSWAP) == MO_LE) {
469 x = TCGV128_LOW(val);
470 y = TCGV128_HIGH(val);
471 } else {
472 x = TCGV128_HIGH(val);
473 y = TCGV128_LOW(val);
476 gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
478 if ((mop[0] ^ memop) & MO_BSWAP) {
479 tcg_gen_bswap64_i64(x, x);
482 addr_p8 = tcg_temp_ebb_new();
483 tcg_gen_addi_tl(addr_p8, addr, 8);
484 gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
485 tcg_temp_free(addr_p8);
487 if ((mop[0] ^ memop) & MO_BSWAP) {
488 tcg_gen_bswap64_i64(y, y);
490 } else {
491 TCGv_i64 a64 = maybe_extend_addr64(addr);
492 gen_helper_ld_i128(val, cpu_env, a64, tcg_constant_i32(oi));
493 maybe_free_addr64(a64);
496 plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_R);
499 void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
501 const MemOpIdx oi = make_memop_idx(memop, idx);
503 tcg_debug_assert((memop & MO_SIZE) == MO_128);
504 tcg_debug_assert((memop & MO_SIGN) == 0);
506 tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
508 /* TODO: For now, force 32-bit hosts to use the helper. */
510 if (TCG_TARGET_HAS_qemu_ldst_i128 && TCG_TARGET_REG_BITS == 64) {
511 TCGv_i64 lo, hi;
512 TCGArg addr_arg;
513 MemOpIdx adj_oi;
514 bool need_bswap = false;
516 if ((memop & MO_BSWAP) && !tcg_target_has_memory_bswap(memop)) {
517 lo = tcg_temp_new_i64();
518 hi = tcg_temp_new_i64();
519 tcg_gen_bswap64_i64(lo, TCGV128_HIGH(val));
520 tcg_gen_bswap64_i64(hi, TCGV128_LOW(val));
521 adj_oi = make_memop_idx(memop & ~MO_BSWAP, idx);
522 need_bswap = true;
523 } else {
524 lo = TCGV128_LOW(val);
525 hi = TCGV128_HIGH(val);
526 adj_oi = oi;
529 #if TARGET_LONG_BITS == 32
530 addr_arg = tcgv_i32_arg(addr);
531 #else
532 addr_arg = tcgv_i64_arg(addr);
533 #endif
534 tcg_gen_op4ii_i64(INDEX_op_qemu_st_i128, lo, hi, addr_arg, adj_oi);
536 if (need_bswap) {
537 tcg_temp_free_i64(lo);
538 tcg_temp_free_i64(hi);
540 } else if (use_two_i64_for_i128(memop)) {
541 MemOp mop[2];
542 TCGv addr_p8;
543 TCGv_i64 x, y;
545 canonicalize_memop_i128_as_i64(mop, memop);
547 if ((memop & MO_BSWAP) == MO_LE) {
548 x = TCGV128_LOW(val);
549 y = TCGV128_HIGH(val);
550 } else {
551 x = TCGV128_HIGH(val);
552 y = TCGV128_LOW(val);
555 addr_p8 = tcg_temp_ebb_new();
556 if ((mop[0] ^ memop) & MO_BSWAP) {
557 TCGv_i64 t = tcg_temp_ebb_new_i64();
559 tcg_gen_bswap64_i64(t, x);
560 gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
561 tcg_gen_bswap64_i64(t, y);
562 tcg_gen_addi_tl(addr_p8, addr, 8);
563 gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
564 tcg_temp_free_i64(t);
565 } else {
566 gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
567 tcg_gen_addi_tl(addr_p8, addr, 8);
568 gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
570 tcg_temp_free(addr_p8);
571 } else {
572 TCGv_i64 a64 = maybe_extend_addr64(addr);
573 gen_helper_st_i128(cpu_env, a64, val, tcg_constant_i32(oi));
574 maybe_free_addr64(a64);
577 plugin_gen_mem_callbacks(NULL, addr, oi, QEMU_PLUGIN_MEM_W);
580 static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
582 switch (opc & MO_SSIZE) {
583 case MO_SB:
584 tcg_gen_ext8s_i32(ret, val);
585 break;
586 case MO_UB:
587 tcg_gen_ext8u_i32(ret, val);
588 break;
589 case MO_SW:
590 tcg_gen_ext16s_i32(ret, val);
591 break;
592 case MO_UW:
593 tcg_gen_ext16u_i32(ret, val);
594 break;
595 default:
596 tcg_gen_mov_i32(ret, val);
597 break;
601 static void tcg_gen_ext_i64(TCGv_i64 ret, TCGv_i64 val, MemOp opc)
603 switch (opc & MO_SSIZE) {
604 case MO_SB:
605 tcg_gen_ext8s_i64(ret, val);
606 break;
607 case MO_UB:
608 tcg_gen_ext8u_i64(ret, val);
609 break;
610 case MO_SW:
611 tcg_gen_ext16s_i64(ret, val);
612 break;
613 case MO_UW:
614 tcg_gen_ext16u_i64(ret, val);
615 break;
616 case MO_SL:
617 tcg_gen_ext32s_i64(ret, val);
618 break;
619 case MO_UL:
620 tcg_gen_ext32u_i64(ret, val);
621 break;
622 default:
623 tcg_gen_mov_i64(ret, val);
624 break;
628 typedef void (*gen_atomic_cx_i32)(TCGv_i32, TCGv_env, TCGv_i64,
629 TCGv_i32, TCGv_i32, TCGv_i32);
630 typedef void (*gen_atomic_cx_i64)(TCGv_i64, TCGv_env, TCGv_i64,
631 TCGv_i64, TCGv_i64, TCGv_i32);
632 typedef void (*gen_atomic_cx_i128)(TCGv_i128, TCGv_env, TCGv_i64,
633 TCGv_i128, TCGv_i128, TCGv_i32);
634 typedef void (*gen_atomic_op_i32)(TCGv_i32, TCGv_env, TCGv_i64,
635 TCGv_i32, TCGv_i32);
636 typedef void (*gen_atomic_op_i64)(TCGv_i64, TCGv_env, TCGv_i64,
637 TCGv_i64, TCGv_i32);
639 #ifdef CONFIG_ATOMIC64
640 # define WITH_ATOMIC64(X) X,
641 #else
642 # define WITH_ATOMIC64(X)
643 #endif
644 #ifdef CONFIG_CMPXCHG128
645 # define WITH_ATOMIC128(X) X,
646 #else
647 # define WITH_ATOMIC128(X)
648 #endif
650 static void * const table_cmpxchg[(MO_SIZE | MO_BSWAP) + 1] = {
651 [MO_8] = gen_helper_atomic_cmpxchgb,
652 [MO_16 | MO_LE] = gen_helper_atomic_cmpxchgw_le,
653 [MO_16 | MO_BE] = gen_helper_atomic_cmpxchgw_be,
654 [MO_32 | MO_LE] = gen_helper_atomic_cmpxchgl_le,
655 [MO_32 | MO_BE] = gen_helper_atomic_cmpxchgl_be,
656 WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_cmpxchgq_le)
657 WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_cmpxchgq_be)
658 WITH_ATOMIC128([MO_128 | MO_LE] = gen_helper_atomic_cmpxchgo_le)
659 WITH_ATOMIC128([MO_128 | MO_BE] = gen_helper_atomic_cmpxchgo_be)
662 void tcg_gen_nonatomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
663 TCGv_i32 newv, TCGArg idx, MemOp memop)
665 TCGv_i32 t1 = tcg_temp_ebb_new_i32();
666 TCGv_i32 t2 = tcg_temp_ebb_new_i32();
668 tcg_gen_ext_i32(t2, cmpv, memop & MO_SIZE);
670 tcg_gen_qemu_ld_i32(t1, addr, idx, memop & ~MO_SIGN);
671 tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, t2, newv, t1);
672 tcg_gen_qemu_st_i32(t2, addr, idx, memop);
673 tcg_temp_free_i32(t2);
675 if (memop & MO_SIGN) {
676 tcg_gen_ext_i32(retv, t1, memop);
677 } else {
678 tcg_gen_mov_i32(retv, t1);
680 tcg_temp_free_i32(t1);
683 void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv,
684 TCGv_i32 newv, TCGArg idx, MemOp memop)
686 gen_atomic_cx_i32 gen;
687 TCGv_i64 a64;
688 MemOpIdx oi;
690 if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
691 tcg_gen_nonatomic_cmpxchg_i32(retv, addr, cmpv, newv, idx, memop);
692 return;
695 memop = tcg_canonicalize_memop(memop, 0, 0);
696 gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
697 tcg_debug_assert(gen != NULL);
699 oi = make_memop_idx(memop & ~MO_SIGN, idx);
700 a64 = maybe_extend_addr64(addr);
701 gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
702 maybe_free_addr64(a64);
704 if (memop & MO_SIGN) {
705 tcg_gen_ext_i32(retv, retv, memop);
709 void tcg_gen_nonatomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
710 TCGv_i64 newv, TCGArg idx, MemOp memop)
712 TCGv_i64 t1, t2;
714 if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
715 tcg_gen_nonatomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
716 TCGV_LOW(newv), idx, memop);
717 if (memop & MO_SIGN) {
718 tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
719 } else {
720 tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
722 return;
725 t1 = tcg_temp_ebb_new_i64();
726 t2 = tcg_temp_ebb_new_i64();
728 tcg_gen_ext_i64(t2, cmpv, memop & MO_SIZE);
730 tcg_gen_qemu_ld_i64(t1, addr, idx, memop & ~MO_SIGN);
731 tcg_gen_movcond_i64(TCG_COND_EQ, t2, t1, t2, newv, t1);
732 tcg_gen_qemu_st_i64(t2, addr, idx, memop);
733 tcg_temp_free_i64(t2);
735 if (memop & MO_SIGN) {
736 tcg_gen_ext_i64(retv, t1, memop);
737 } else {
738 tcg_gen_mov_i64(retv, t1);
740 tcg_temp_free_i64(t1);
743 void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv,
744 TCGv_i64 newv, TCGArg idx, MemOp memop)
746 if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
747 tcg_gen_nonatomic_cmpxchg_i64(retv, addr, cmpv, newv, idx, memop);
748 return;
751 if ((memop & MO_SIZE) == MO_64) {
752 gen_atomic_cx_i64 gen;
754 memop = tcg_canonicalize_memop(memop, 1, 0);
755 gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
756 if (gen) {
757 MemOpIdx oi = make_memop_idx(memop, idx);
758 TCGv_i64 a64 = maybe_extend_addr64(addr);
759 gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
760 maybe_free_addr64(a64);
761 return;
764 gen_helper_exit_atomic(cpu_env);
767 * Produce a result for a well-formed opcode stream. This satisfies
768 * liveness for set before used, which happens before this dead code
769 * is removed.
771 tcg_gen_movi_i64(retv, 0);
772 return;
775 if (TCG_TARGET_REG_BITS == 32) {
776 tcg_gen_atomic_cmpxchg_i32(TCGV_LOW(retv), addr, TCGV_LOW(cmpv),
777 TCGV_LOW(newv), idx, memop);
778 if (memop & MO_SIGN) {
779 tcg_gen_sari_i32(TCGV_HIGH(retv), TCGV_LOW(retv), 31);
780 } else {
781 tcg_gen_movi_i32(TCGV_HIGH(retv), 0);
783 } else {
784 TCGv_i32 c32 = tcg_temp_ebb_new_i32();
785 TCGv_i32 n32 = tcg_temp_ebb_new_i32();
786 TCGv_i32 r32 = tcg_temp_ebb_new_i32();
788 tcg_gen_extrl_i64_i32(c32, cmpv);
789 tcg_gen_extrl_i64_i32(n32, newv);
790 tcg_gen_atomic_cmpxchg_i32(r32, addr, c32, n32, idx, memop & ~MO_SIGN);
791 tcg_temp_free_i32(c32);
792 tcg_temp_free_i32(n32);
794 tcg_gen_extu_i32_i64(retv, r32);
795 tcg_temp_free_i32(r32);
797 if (memop & MO_SIGN) {
798 tcg_gen_ext_i64(retv, retv, memop);
803 void tcg_gen_nonatomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
804 TCGv_i128 newv, TCGArg idx, MemOp memop)
806 if (TCG_TARGET_REG_BITS == 32) {
807 /* Inline expansion below is simply too large for 32-bit hosts. */
808 gen_atomic_cx_i128 gen = ((memop & MO_BSWAP) == MO_LE
809 ? gen_helper_nonatomic_cmpxchgo_le
810 : gen_helper_nonatomic_cmpxchgo_be);
811 MemOpIdx oi = make_memop_idx(memop, idx);
812 TCGv_i64 a64;
814 tcg_debug_assert((memop & MO_SIZE) == MO_128);
815 tcg_debug_assert((memop & MO_SIGN) == 0);
817 a64 = maybe_extend_addr64(addr);
818 gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
819 maybe_free_addr64(a64);
820 } else {
821 TCGv_i128 oldv = tcg_temp_ebb_new_i128();
822 TCGv_i128 tmpv = tcg_temp_ebb_new_i128();
823 TCGv_i64 t0 = tcg_temp_ebb_new_i64();
824 TCGv_i64 t1 = tcg_temp_ebb_new_i64();
825 TCGv_i64 z = tcg_constant_i64(0);
827 tcg_gen_qemu_ld_i128(oldv, addr, idx, memop);
829 /* Compare i128 */
830 tcg_gen_xor_i64(t0, TCGV128_LOW(oldv), TCGV128_LOW(cmpv));
831 tcg_gen_xor_i64(t1, TCGV128_HIGH(oldv), TCGV128_HIGH(cmpv));
832 tcg_gen_or_i64(t0, t0, t1);
834 /* tmpv = equal ? newv : oldv */
835 tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_LOW(tmpv), t0, z,
836 TCGV128_LOW(newv), TCGV128_LOW(oldv));
837 tcg_gen_movcond_i64(TCG_COND_EQ, TCGV128_HIGH(tmpv), t0, z,
838 TCGV128_HIGH(newv), TCGV128_HIGH(oldv));
840 /* Unconditional writeback. */
841 tcg_gen_qemu_st_i128(tmpv, addr, idx, memop);
842 tcg_gen_mov_i128(retv, oldv);
844 tcg_temp_free_i64(t0);
845 tcg_temp_free_i64(t1);
846 tcg_temp_free_i128(tmpv);
847 tcg_temp_free_i128(oldv);
851 void tcg_gen_atomic_cmpxchg_i128(TCGv_i128 retv, TCGv addr, TCGv_i128 cmpv,
852 TCGv_i128 newv, TCGArg idx, MemOp memop)
854 gen_atomic_cx_i128 gen;
856 if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) {
857 tcg_gen_nonatomic_cmpxchg_i128(retv, addr, cmpv, newv, idx, memop);
858 return;
861 tcg_debug_assert((memop & MO_SIZE) == MO_128);
862 tcg_debug_assert((memop & MO_SIGN) == 0);
863 gen = table_cmpxchg[memop & (MO_SIZE | MO_BSWAP)];
865 if (gen) {
866 MemOpIdx oi = make_memop_idx(memop, idx);
867 TCGv_i64 a64 = maybe_extend_addr64(addr);
868 gen(retv, cpu_env, a64, cmpv, newv, tcg_constant_i32(oi));
869 maybe_free_addr64(a64);
870 return;
873 gen_helper_exit_atomic(cpu_env);
876 * Produce a result for a well-formed opcode stream. This satisfies
877 * liveness for set before used, which happens before this dead code
878 * is removed.
880 tcg_gen_movi_i64(TCGV128_LOW(retv), 0);
881 tcg_gen_movi_i64(TCGV128_HIGH(retv), 0);
884 static void do_nonatomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
885 TCGArg idx, MemOp memop, bool new_val,
886 void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
888 TCGv_i32 t1 = tcg_temp_ebb_new_i32();
889 TCGv_i32 t2 = tcg_temp_ebb_new_i32();
891 memop = tcg_canonicalize_memop(memop, 0, 0);
893 tcg_gen_qemu_ld_i32(t1, addr, idx, memop);
894 tcg_gen_ext_i32(t2, val, memop);
895 gen(t2, t1, t2);
896 tcg_gen_qemu_st_i32(t2, addr, idx, memop);
898 tcg_gen_ext_i32(ret, (new_val ? t2 : t1), memop);
899 tcg_temp_free_i32(t1);
900 tcg_temp_free_i32(t2);
903 static void do_atomic_op_i32(TCGv_i32 ret, TCGv addr, TCGv_i32 val,
904 TCGArg idx, MemOp memop, void * const table[])
906 gen_atomic_op_i32 gen;
907 TCGv_i64 a64;
908 MemOpIdx oi;
910 memop = tcg_canonicalize_memop(memop, 0, 0);
912 gen = table[memop & (MO_SIZE | MO_BSWAP)];
913 tcg_debug_assert(gen != NULL);
915 oi = make_memop_idx(memop & ~MO_SIGN, idx);
916 a64 = maybe_extend_addr64(addr);
917 gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
918 maybe_free_addr64(a64);
920 if (memop & MO_SIGN) {
921 tcg_gen_ext_i32(ret, ret, memop);
925 static void do_nonatomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
926 TCGArg idx, MemOp memop, bool new_val,
927 void (*gen)(TCGv_i64, TCGv_i64, TCGv_i64))
929 TCGv_i64 t1 = tcg_temp_ebb_new_i64();
930 TCGv_i64 t2 = tcg_temp_ebb_new_i64();
932 memop = tcg_canonicalize_memop(memop, 1, 0);
934 tcg_gen_qemu_ld_i64(t1, addr, idx, memop);
935 tcg_gen_ext_i64(t2, val, memop);
936 gen(t2, t1, t2);
937 tcg_gen_qemu_st_i64(t2, addr, idx, memop);
939 tcg_gen_ext_i64(ret, (new_val ? t2 : t1), memop);
940 tcg_temp_free_i64(t1);
941 tcg_temp_free_i64(t2);
944 static void do_atomic_op_i64(TCGv_i64 ret, TCGv addr, TCGv_i64 val,
945 TCGArg idx, MemOp memop, void * const table[])
947 memop = tcg_canonicalize_memop(memop, 1, 0);
949 if ((memop & MO_SIZE) == MO_64) {
950 #ifdef CONFIG_ATOMIC64
951 gen_atomic_op_i64 gen;
952 TCGv_i64 a64;
953 MemOpIdx oi;
955 gen = table[memop & (MO_SIZE | MO_BSWAP)];
956 tcg_debug_assert(gen != NULL);
958 oi = make_memop_idx(memop & ~MO_SIGN, idx);
959 a64 = maybe_extend_addr64(addr);
960 gen(ret, cpu_env, a64, val, tcg_constant_i32(oi));
961 maybe_free_addr64(a64);
962 #else
963 gen_helper_exit_atomic(cpu_env);
964 /* Produce a result, so that we have a well-formed opcode stream
965 with respect to uses of the result in the (dead) code following. */
966 tcg_gen_movi_i64(ret, 0);
967 #endif /* CONFIG_ATOMIC64 */
968 } else {
969 TCGv_i32 v32 = tcg_temp_ebb_new_i32();
970 TCGv_i32 r32 = tcg_temp_ebb_new_i32();
972 tcg_gen_extrl_i64_i32(v32, val);
973 do_atomic_op_i32(r32, addr, v32, idx, memop & ~MO_SIGN, table);
974 tcg_temp_free_i32(v32);
976 tcg_gen_extu_i32_i64(ret, r32);
977 tcg_temp_free_i32(r32);
979 if (memop & MO_SIGN) {
980 tcg_gen_ext_i64(ret, ret, memop);
985 #define GEN_ATOMIC_HELPER(NAME, OP, NEW) \
986 static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \
987 [MO_8] = gen_helper_atomic_##NAME##b, \
988 [MO_16 | MO_LE] = gen_helper_atomic_##NAME##w_le, \
989 [MO_16 | MO_BE] = gen_helper_atomic_##NAME##w_be, \
990 [MO_32 | MO_LE] = gen_helper_atomic_##NAME##l_le, \
991 [MO_32 | MO_BE] = gen_helper_atomic_##NAME##l_be, \
992 WITH_ATOMIC64([MO_64 | MO_LE] = gen_helper_atomic_##NAME##q_le) \
993 WITH_ATOMIC64([MO_64 | MO_BE] = gen_helper_atomic_##NAME##q_be) \
994 }; \
995 void tcg_gen_atomic_##NAME##_i32 \
996 (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \
998 if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
999 do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \
1000 } else { \
1001 do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \
1002 tcg_gen_##OP##_i32); \
1005 void tcg_gen_atomic_##NAME##_i64 \
1006 (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \
1008 if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \
1009 do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \
1010 } else { \
1011 do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \
1012 tcg_gen_##OP##_i64); \
1016 GEN_ATOMIC_HELPER(fetch_add, add, 0)
1017 GEN_ATOMIC_HELPER(fetch_and, and, 0)
1018 GEN_ATOMIC_HELPER(fetch_or, or, 0)
1019 GEN_ATOMIC_HELPER(fetch_xor, xor, 0)
1020 GEN_ATOMIC_HELPER(fetch_smin, smin, 0)
1021 GEN_ATOMIC_HELPER(fetch_umin, umin, 0)
1022 GEN_ATOMIC_HELPER(fetch_smax, smax, 0)
1023 GEN_ATOMIC_HELPER(fetch_umax, umax, 0)
1025 GEN_ATOMIC_HELPER(add_fetch, add, 1)
1026 GEN_ATOMIC_HELPER(and_fetch, and, 1)
1027 GEN_ATOMIC_HELPER(or_fetch, or, 1)
1028 GEN_ATOMIC_HELPER(xor_fetch, xor, 1)
1029 GEN_ATOMIC_HELPER(smin_fetch, smin, 1)
1030 GEN_ATOMIC_HELPER(umin_fetch, umin, 1)
1031 GEN_ATOMIC_HELPER(smax_fetch, smax, 1)
1032 GEN_ATOMIC_HELPER(umax_fetch, umax, 1)
1034 static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b)
1036 tcg_gen_mov_i32(r, b);
1039 static void tcg_gen_mov2_i64(TCGv_i64 r, TCGv_i64 a, TCGv_i64 b)
1041 tcg_gen_mov_i64(r, b);
1044 GEN_ATOMIC_HELPER(xchg, mov2, 0)
1046 #undef GEN_ATOMIC_HELPER