exec: Advise huge pages for the TCG code gen buffer
[qemu.git] / exec.c
blob0594b0705736c124857b3c1993690e7ca5848f6b
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "dma.h"
38 #include "exec-memory.h"
39 #if defined(CONFIG_USER_ONLY)
40 #include <qemu.h>
41 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
42 #include <sys/param.h>
43 #if __FreeBSD_version >= 700104
44 #define HAVE_KINFO_GETVMMAP
45 #define sigqueue sigqueue_freebsd /* avoid redefinition */
46 #include <sys/time.h>
47 #include <sys/proc.h>
48 #include <machine/profile.h>
49 #define _KERNEL
50 #include <sys/user.h>
51 #undef _KERNEL
52 #undef sigqueue
53 #include <libutil.h>
54 #endif
55 #endif
56 #else /* !CONFIG_USER_ONLY */
57 #include "xen-mapcache.h"
58 #include "trace.h"
59 #endif
61 #include "cputlb.h"
63 #include "memory-internal.h"
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_UNASSIGNED
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 uint8_t *code_gen_prologue;
90 static uint8_t *code_gen_buffer;
91 static size_t code_gen_buffer_size;
92 /* threshold to flush the translated code buffer */
93 static size_t code_gen_buffer_max_size;
94 static uint8_t *code_gen_ptr;
96 #if !defined(CONFIG_USER_ONLY)
97 int phys_ram_fd;
98 static int in_migration;
100 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
102 static MemoryRegion *system_memory;
103 static MemoryRegion *system_io;
105 AddressSpace address_space_io;
106 AddressSpace address_space_memory;
107 DMAContext dma_context_memory;
109 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
110 static MemoryRegion io_mem_subpage_ram;
112 #endif
114 CPUArchState *first_cpu;
115 /* current CPU in the current thread. It is only valid inside
116 cpu_exec() */
117 DEFINE_TLS(CPUArchState *,cpu_single_env);
118 /* 0 = Do not count executed instructions.
119 1 = Precise instruction counting.
120 2 = Adaptive rate instruction counting. */
121 int use_icount = 0;
123 typedef struct PageDesc {
124 /* list of TBs intersecting this ram page */
125 TranslationBlock *first_tb;
126 /* in order to optimize self modifying code, we count the number
127 of lookups we do to a given page to use a bitmap */
128 unsigned int code_write_count;
129 uint8_t *code_bitmap;
130 #if defined(CONFIG_USER_ONLY)
131 unsigned long flags;
132 #endif
133 } PageDesc;
135 /* In system mode we want L1_MAP to be based on ram offsets,
136 while in user mode we want it to be based on virtual addresses. */
137 #if !defined(CONFIG_USER_ONLY)
138 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
139 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
140 #else
141 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
142 #endif
143 #else
144 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
145 #endif
147 /* Size of the L2 (and L3, etc) page tables. */
148 #define L2_BITS 10
149 #define L2_SIZE (1 << L2_BITS)
151 #define P_L2_LEVELS \
152 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
154 /* The bits remaining after N lower levels of page tables. */
155 #define V_L1_BITS_REM \
156 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
158 #if V_L1_BITS_REM < 4
159 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
160 #else
161 #define V_L1_BITS V_L1_BITS_REM
162 #endif
164 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
166 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
168 uintptr_t qemu_real_host_page_size;
169 uintptr_t qemu_host_page_size;
170 uintptr_t qemu_host_page_mask;
172 /* This is a multi-level map on the virtual address space.
173 The bottom level has pointers to PageDesc. */
174 static void *l1_map[V_L1_SIZE];
176 #if !defined(CONFIG_USER_ONLY)
178 static MemoryRegionSection *phys_sections;
179 static unsigned phys_sections_nb, phys_sections_nb_alloc;
180 static uint16_t phys_section_unassigned;
181 static uint16_t phys_section_notdirty;
182 static uint16_t phys_section_rom;
183 static uint16_t phys_section_watch;
185 /* Simple allocator for PhysPageEntry nodes */
186 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
187 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
189 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
191 static void io_mem_init(void);
192 static void memory_map_init(void);
193 static void *qemu_safe_ram_ptr(ram_addr_t addr);
195 static MemoryRegion io_mem_watch;
196 #endif
197 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
198 tb_page_addr_t phys_page2);
200 /* statistics */
201 static int tb_flush_count;
202 static int tb_phys_invalidate_count;
204 #ifdef _WIN32
205 static inline void map_exec(void *addr, long size)
207 DWORD old_protect;
208 VirtualProtect(addr, size,
209 PAGE_EXECUTE_READWRITE, &old_protect);
212 #else
213 static inline void map_exec(void *addr, long size)
215 unsigned long start, end, page_size;
217 page_size = getpagesize();
218 start = (unsigned long)addr;
219 start &= ~(page_size - 1);
221 end = (unsigned long)addr + size;
222 end += page_size - 1;
223 end &= ~(page_size - 1);
225 mprotect((void *)start, end - start,
226 PROT_READ | PROT_WRITE | PROT_EXEC);
228 #endif
230 static void page_init(void)
232 /* NOTE: we can always suppose that qemu_host_page_size >=
233 TARGET_PAGE_SIZE */
234 #ifdef _WIN32
236 SYSTEM_INFO system_info;
238 GetSystemInfo(&system_info);
239 qemu_real_host_page_size = system_info.dwPageSize;
241 #else
242 qemu_real_host_page_size = getpagesize();
243 #endif
244 if (qemu_host_page_size == 0)
245 qemu_host_page_size = qemu_real_host_page_size;
246 if (qemu_host_page_size < TARGET_PAGE_SIZE)
247 qemu_host_page_size = TARGET_PAGE_SIZE;
248 qemu_host_page_mask = ~(qemu_host_page_size - 1);
250 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
252 #ifdef HAVE_KINFO_GETVMMAP
253 struct kinfo_vmentry *freep;
254 int i, cnt;
256 freep = kinfo_getvmmap(getpid(), &cnt);
257 if (freep) {
258 mmap_lock();
259 for (i = 0; i < cnt; i++) {
260 unsigned long startaddr, endaddr;
262 startaddr = freep[i].kve_start;
263 endaddr = freep[i].kve_end;
264 if (h2g_valid(startaddr)) {
265 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
267 if (h2g_valid(endaddr)) {
268 endaddr = h2g(endaddr);
269 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
270 } else {
271 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
272 endaddr = ~0ul;
273 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
274 #endif
278 free(freep);
279 mmap_unlock();
281 #else
282 FILE *f;
284 last_brk = (unsigned long)sbrk(0);
286 f = fopen("/compat/linux/proc/self/maps", "r");
287 if (f) {
288 mmap_lock();
290 do {
291 unsigned long startaddr, endaddr;
292 int n;
294 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
296 if (n == 2 && h2g_valid(startaddr)) {
297 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
299 if (h2g_valid(endaddr)) {
300 endaddr = h2g(endaddr);
301 } else {
302 endaddr = ~0ul;
304 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
306 } while (!feof(f));
308 fclose(f);
309 mmap_unlock();
311 #endif
313 #endif
316 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
318 PageDesc *pd;
319 void **lp;
320 int i;
322 #if defined(CONFIG_USER_ONLY)
323 /* We can't use g_malloc because it may recurse into a locked mutex. */
324 # define ALLOC(P, SIZE) \
325 do { \
326 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
327 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
328 } while (0)
329 #else
330 # define ALLOC(P, SIZE) \
331 do { P = g_malloc0(SIZE); } while (0)
332 #endif
334 /* Level 1. Always allocated. */
335 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
337 /* Level 2..N-1. */
338 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
339 void **p = *lp;
341 if (p == NULL) {
342 if (!alloc) {
343 return NULL;
345 ALLOC(p, sizeof(void *) * L2_SIZE);
346 *lp = p;
349 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
352 pd = *lp;
353 if (pd == NULL) {
354 if (!alloc) {
355 return NULL;
357 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
358 *lp = pd;
361 #undef ALLOC
363 return pd + (index & (L2_SIZE - 1));
366 static inline PageDesc *page_find(tb_page_addr_t index)
368 return page_find_alloc(index, 0);
371 #if !defined(CONFIG_USER_ONLY)
373 static void phys_map_node_reserve(unsigned nodes)
375 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
376 typedef PhysPageEntry Node[L2_SIZE];
377 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
378 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
379 phys_map_nodes_nb + nodes);
380 phys_map_nodes = g_renew(Node, phys_map_nodes,
381 phys_map_nodes_nb_alloc);
385 static uint16_t phys_map_node_alloc(void)
387 unsigned i;
388 uint16_t ret;
390 ret = phys_map_nodes_nb++;
391 assert(ret != PHYS_MAP_NODE_NIL);
392 assert(ret != phys_map_nodes_nb_alloc);
393 for (i = 0; i < L2_SIZE; ++i) {
394 phys_map_nodes[ret][i].is_leaf = 0;
395 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
397 return ret;
400 static void phys_map_nodes_reset(void)
402 phys_map_nodes_nb = 0;
406 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
407 hwaddr *nb, uint16_t leaf,
408 int level)
410 PhysPageEntry *p;
411 int i;
412 hwaddr step = (hwaddr)1 << (level * L2_BITS);
414 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
415 lp->ptr = phys_map_node_alloc();
416 p = phys_map_nodes[lp->ptr];
417 if (level == 0) {
418 for (i = 0; i < L2_SIZE; i++) {
419 p[i].is_leaf = 1;
420 p[i].ptr = phys_section_unassigned;
423 } else {
424 p = phys_map_nodes[lp->ptr];
426 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
428 while (*nb && lp < &p[L2_SIZE]) {
429 if ((*index & (step - 1)) == 0 && *nb >= step) {
430 lp->is_leaf = true;
431 lp->ptr = leaf;
432 *index += step;
433 *nb -= step;
434 } else {
435 phys_page_set_level(lp, index, nb, leaf, level - 1);
437 ++lp;
441 static void phys_page_set(AddressSpaceDispatch *d,
442 hwaddr index, hwaddr nb,
443 uint16_t leaf)
445 /* Wildly overreserve - it doesn't matter much. */
446 phys_map_node_reserve(3 * P_L2_LEVELS);
448 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
451 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
453 PhysPageEntry lp = d->phys_map;
454 PhysPageEntry *p;
455 int i;
456 uint16_t s_index = phys_section_unassigned;
458 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
459 if (lp.ptr == PHYS_MAP_NODE_NIL) {
460 goto not_found;
462 p = phys_map_nodes[lp.ptr];
463 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
466 s_index = lp.ptr;
467 not_found:
468 return &phys_sections[s_index];
471 bool memory_region_is_unassigned(MemoryRegion *mr)
473 return mr != &io_mem_ram && mr != &io_mem_rom
474 && mr != &io_mem_notdirty && !mr->rom_device
475 && mr != &io_mem_watch;
478 #define mmap_lock() do { } while(0)
479 #define mmap_unlock() do { } while(0)
480 #endif
482 #if defined(CONFIG_USER_ONLY)
483 /* Currently it is not recommended to allocate big chunks of data in
484 user mode. It will change when a dedicated libc will be used. */
485 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
486 region in which the guest needs to run. Revisit this. */
487 #define USE_STATIC_CODE_GEN_BUFFER
488 #endif
490 /* ??? Should configure for this, not list operating systems here. */
491 #if (defined(__linux__) \
492 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
493 || defined(__DragonFly__) || defined(__OpenBSD__) \
494 || defined(__NetBSD__))
495 # define USE_MMAP
496 #endif
498 /* Minimum size of the code gen buffer. This number is randomly chosen,
499 but not so small that we can't have a fair number of TB's live. */
500 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
502 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
503 indicated, this is constrained by the range of direct branches on the
504 host cpu, as used by the TCG implementation of goto_tb. */
505 #if defined(__x86_64__)
506 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
507 #elif defined(__sparc__)
508 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
509 #elif defined(__arm__)
510 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
511 #elif defined(__s390x__)
512 /* We have a +- 4GB range on the branches; leave some slop. */
513 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
514 #else
515 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
516 #endif
518 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
520 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
521 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
522 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
524 static inline size_t size_code_gen_buffer(size_t tb_size)
526 /* Size the buffer. */
527 if (tb_size == 0) {
528 #ifdef USE_STATIC_CODE_GEN_BUFFER
529 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
530 #else
531 /* ??? Needs adjustments. */
532 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
533 static buffer, we could size this on RESERVED_VA, on the text
534 segment size of the executable, or continue to use the default. */
535 tb_size = (unsigned long)(ram_size / 4);
536 #endif
538 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
539 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
541 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
542 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
544 code_gen_buffer_size = tb_size;
545 return tb_size;
548 #ifdef USE_STATIC_CODE_GEN_BUFFER
549 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
550 __attribute__((aligned(CODE_GEN_ALIGN)));
552 static inline void *alloc_code_gen_buffer(void)
554 map_exec(static_code_gen_buffer, code_gen_buffer_size);
555 return static_code_gen_buffer;
557 #elif defined(USE_MMAP)
558 static inline void *alloc_code_gen_buffer(void)
560 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
561 uintptr_t start = 0;
562 void *buf;
564 /* Constrain the position of the buffer based on the host cpu.
565 Note that these addresses are chosen in concert with the
566 addresses assigned in the relevant linker script file. */
567 # if defined(__PIE__) || defined(__PIC__)
568 /* Don't bother setting a preferred location if we're building
569 a position-independent executable. We're more likely to get
570 an address near the main executable if we let the kernel
571 choose the address. */
572 # elif defined(__x86_64__) && defined(MAP_32BIT)
573 /* Force the memory down into low memory with the executable.
574 Leave the choice of exact location with the kernel. */
575 flags |= MAP_32BIT;
576 /* Cannot expect to map more than 800MB in low memory. */
577 if (code_gen_buffer_size > 800u * 1024 * 1024) {
578 code_gen_buffer_size = 800u * 1024 * 1024;
580 # elif defined(__sparc__)
581 start = 0x40000000ul;
582 # elif defined(__s390x__)
583 start = 0x90000000ul;
584 # endif
586 buf = mmap((void *)start, code_gen_buffer_size,
587 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
588 return buf == MAP_FAILED ? NULL : buf;
590 #else
591 static inline void *alloc_code_gen_buffer(void)
593 void *buf = g_malloc(code_gen_buffer_size);
594 if (buf) {
595 map_exec(buf, code_gen_buffer_size);
597 return buf;
599 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
601 static inline void code_gen_alloc(size_t tb_size)
603 code_gen_buffer_size = size_code_gen_buffer(tb_size);
604 code_gen_buffer = alloc_code_gen_buffer();
605 if (code_gen_buffer == NULL) {
606 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
607 exit(1);
610 qemu_madvise(code_gen_buffer, code_gen_buffer_size, QEMU_MADV_HUGEPAGE);
612 /* Steal room for the prologue at the end of the buffer. This ensures
613 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
614 from TB's to the prologue are going to be in range. It also means
615 that we don't need to mark (additional) portions of the data segment
616 as executable. */
617 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
618 code_gen_buffer_size -= 1024;
620 code_gen_buffer_max_size = code_gen_buffer_size -
621 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
622 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
623 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
626 /* Must be called before using the QEMU cpus. 'tb_size' is the size
627 (in bytes) allocated to the translation buffer. Zero means default
628 size. */
629 void tcg_exec_init(unsigned long tb_size)
631 cpu_gen_init();
632 code_gen_alloc(tb_size);
633 code_gen_ptr = code_gen_buffer;
634 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
635 page_init();
636 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
637 /* There's no guest base to take into account, so go ahead and
638 initialize the prologue now. */
639 tcg_prologue_init(&tcg_ctx);
640 #endif
643 bool tcg_enabled(void)
645 return code_gen_buffer != NULL;
648 void cpu_exec_init_all(void)
650 #if !defined(CONFIG_USER_ONLY)
651 memory_map_init();
652 io_mem_init();
653 #endif
656 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
658 static int cpu_common_post_load(void *opaque, int version_id)
660 CPUArchState *env = opaque;
662 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
663 version_id is increased. */
664 env->interrupt_request &= ~0x01;
665 tlb_flush(env, 1);
667 return 0;
670 static const VMStateDescription vmstate_cpu_common = {
671 .name = "cpu_common",
672 .version_id = 1,
673 .minimum_version_id = 1,
674 .minimum_version_id_old = 1,
675 .post_load = cpu_common_post_load,
676 .fields = (VMStateField []) {
677 VMSTATE_UINT32(halted, CPUArchState),
678 VMSTATE_UINT32(interrupt_request, CPUArchState),
679 VMSTATE_END_OF_LIST()
682 #endif
684 CPUArchState *qemu_get_cpu(int cpu)
686 CPUArchState *env = first_cpu;
688 while (env) {
689 if (env->cpu_index == cpu)
690 break;
691 env = env->next_cpu;
694 return env;
697 void cpu_exec_init(CPUArchState *env)
699 #ifndef CONFIG_USER_ONLY
700 CPUState *cpu = ENV_GET_CPU(env);
701 #endif
702 CPUArchState **penv;
703 int cpu_index;
705 #if defined(CONFIG_USER_ONLY)
706 cpu_list_lock();
707 #endif
708 env->next_cpu = NULL;
709 penv = &first_cpu;
710 cpu_index = 0;
711 while (*penv != NULL) {
712 penv = &(*penv)->next_cpu;
713 cpu_index++;
715 env->cpu_index = cpu_index;
716 env->numa_node = 0;
717 QTAILQ_INIT(&env->breakpoints);
718 QTAILQ_INIT(&env->watchpoints);
719 #ifndef CONFIG_USER_ONLY
720 cpu->thread_id = qemu_get_thread_id();
721 #endif
722 *penv = env;
723 #if defined(CONFIG_USER_ONLY)
724 cpu_list_unlock();
725 #endif
726 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
727 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
728 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
729 cpu_save, cpu_load, env);
730 #endif
733 /* Allocate a new translation block. Flush the translation buffer if
734 too many translation blocks or too much generated code. */
735 static TranslationBlock *tb_alloc(target_ulong pc)
737 TranslationBlock *tb;
739 if (nb_tbs >= code_gen_max_blocks ||
740 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
741 return NULL;
742 tb = &tbs[nb_tbs++];
743 tb->pc = pc;
744 tb->cflags = 0;
745 return tb;
748 void tb_free(TranslationBlock *tb)
750 /* In practice this is mostly used for single use temporary TB
751 Ignore the hard cases and just back up if this TB happens to
752 be the last one generated. */
753 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
754 code_gen_ptr = tb->tc_ptr;
755 nb_tbs--;
759 static inline void invalidate_page_bitmap(PageDesc *p)
761 if (p->code_bitmap) {
762 g_free(p->code_bitmap);
763 p->code_bitmap = NULL;
765 p->code_write_count = 0;
768 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
770 static void page_flush_tb_1 (int level, void **lp)
772 int i;
774 if (*lp == NULL) {
775 return;
777 if (level == 0) {
778 PageDesc *pd = *lp;
779 for (i = 0; i < L2_SIZE; ++i) {
780 pd[i].first_tb = NULL;
781 invalidate_page_bitmap(pd + i);
783 } else {
784 void **pp = *lp;
785 for (i = 0; i < L2_SIZE; ++i) {
786 page_flush_tb_1 (level - 1, pp + i);
791 static void page_flush_tb(void)
793 int i;
794 for (i = 0; i < V_L1_SIZE; i++) {
795 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
799 /* flush all the translation blocks */
800 /* XXX: tb_flush is currently not thread safe */
801 void tb_flush(CPUArchState *env1)
803 CPUArchState *env;
804 #if defined(DEBUG_FLUSH)
805 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
806 (unsigned long)(code_gen_ptr - code_gen_buffer),
807 nb_tbs, nb_tbs > 0 ?
808 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
809 #endif
810 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
811 cpu_abort(env1, "Internal error: code buffer overflow\n");
813 nb_tbs = 0;
815 for(env = first_cpu; env != NULL; env = env->next_cpu) {
816 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
819 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
820 page_flush_tb();
822 code_gen_ptr = code_gen_buffer;
823 /* XXX: flush processor icache at this point if cache flush is
824 expensive */
825 tb_flush_count++;
828 #ifdef DEBUG_TB_CHECK
830 static void tb_invalidate_check(target_ulong address)
832 TranslationBlock *tb;
833 int i;
834 address &= TARGET_PAGE_MASK;
835 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
836 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
837 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
838 address >= tb->pc + tb->size)) {
839 printf("ERROR invalidate: address=" TARGET_FMT_lx
840 " PC=%08lx size=%04x\n",
841 address, (long)tb->pc, tb->size);
847 /* verify that all the pages have correct rights for code */
848 static void tb_page_check(void)
850 TranslationBlock *tb;
851 int i, flags1, flags2;
853 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
854 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
855 flags1 = page_get_flags(tb->pc);
856 flags2 = page_get_flags(tb->pc + tb->size - 1);
857 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
858 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
859 (long)tb->pc, tb->size, flags1, flags2);
865 #endif
867 /* invalidate one TB */
868 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
869 int next_offset)
871 TranslationBlock *tb1;
872 for(;;) {
873 tb1 = *ptb;
874 if (tb1 == tb) {
875 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
876 break;
878 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
882 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
884 TranslationBlock *tb1;
885 unsigned int n1;
887 for(;;) {
888 tb1 = *ptb;
889 n1 = (uintptr_t)tb1 & 3;
890 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
891 if (tb1 == tb) {
892 *ptb = tb1->page_next[n1];
893 break;
895 ptb = &tb1->page_next[n1];
899 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
901 TranslationBlock *tb1, **ptb;
902 unsigned int n1;
904 ptb = &tb->jmp_next[n];
905 tb1 = *ptb;
906 if (tb1) {
907 /* find tb(n) in circular list */
908 for(;;) {
909 tb1 = *ptb;
910 n1 = (uintptr_t)tb1 & 3;
911 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
912 if (n1 == n && tb1 == tb)
913 break;
914 if (n1 == 2) {
915 ptb = &tb1->jmp_first;
916 } else {
917 ptb = &tb1->jmp_next[n1];
920 /* now we can suppress tb(n) from the list */
921 *ptb = tb->jmp_next[n];
923 tb->jmp_next[n] = NULL;
927 /* reset the jump entry 'n' of a TB so that it is not chained to
928 another TB */
929 static inline void tb_reset_jump(TranslationBlock *tb, int n)
931 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
934 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
936 CPUArchState *env;
937 PageDesc *p;
938 unsigned int h, n1;
939 tb_page_addr_t phys_pc;
940 TranslationBlock *tb1, *tb2;
942 /* remove the TB from the hash list */
943 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
944 h = tb_phys_hash_func(phys_pc);
945 tb_remove(&tb_phys_hash[h], tb,
946 offsetof(TranslationBlock, phys_hash_next));
948 /* remove the TB from the page list */
949 if (tb->page_addr[0] != page_addr) {
950 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
951 tb_page_remove(&p->first_tb, tb);
952 invalidate_page_bitmap(p);
954 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
955 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
956 tb_page_remove(&p->first_tb, tb);
957 invalidate_page_bitmap(p);
960 tb_invalidated_flag = 1;
962 /* remove the TB from the hash list */
963 h = tb_jmp_cache_hash_func(tb->pc);
964 for(env = first_cpu; env != NULL; env = env->next_cpu) {
965 if (env->tb_jmp_cache[h] == tb)
966 env->tb_jmp_cache[h] = NULL;
969 /* suppress this TB from the two jump lists */
970 tb_jmp_remove(tb, 0);
971 tb_jmp_remove(tb, 1);
973 /* suppress any remaining jumps to this TB */
974 tb1 = tb->jmp_first;
975 for(;;) {
976 n1 = (uintptr_t)tb1 & 3;
977 if (n1 == 2)
978 break;
979 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
980 tb2 = tb1->jmp_next[n1];
981 tb_reset_jump(tb1, n1);
982 tb1->jmp_next[n1] = NULL;
983 tb1 = tb2;
985 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
987 tb_phys_invalidate_count++;
990 static inline void set_bits(uint8_t *tab, int start, int len)
992 int end, mask, end1;
994 end = start + len;
995 tab += start >> 3;
996 mask = 0xff << (start & 7);
997 if ((start & ~7) == (end & ~7)) {
998 if (start < end) {
999 mask &= ~(0xff << (end & 7));
1000 *tab |= mask;
1002 } else {
1003 *tab++ |= mask;
1004 start = (start + 8) & ~7;
1005 end1 = end & ~7;
1006 while (start < end1) {
1007 *tab++ = 0xff;
1008 start += 8;
1010 if (start < end) {
1011 mask = ~(0xff << (end & 7));
1012 *tab |= mask;
1017 static void build_page_bitmap(PageDesc *p)
1019 int n, tb_start, tb_end;
1020 TranslationBlock *tb;
1022 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1024 tb = p->first_tb;
1025 while (tb != NULL) {
1026 n = (uintptr_t)tb & 3;
1027 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1028 /* NOTE: this is subtle as a TB may span two physical pages */
1029 if (n == 0) {
1030 /* NOTE: tb_end may be after the end of the page, but
1031 it is not a problem */
1032 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1033 tb_end = tb_start + tb->size;
1034 if (tb_end > TARGET_PAGE_SIZE)
1035 tb_end = TARGET_PAGE_SIZE;
1036 } else {
1037 tb_start = 0;
1038 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1040 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1041 tb = tb->page_next[n];
1045 TranslationBlock *tb_gen_code(CPUArchState *env,
1046 target_ulong pc, target_ulong cs_base,
1047 int flags, int cflags)
1049 TranslationBlock *tb;
1050 uint8_t *tc_ptr;
1051 tb_page_addr_t phys_pc, phys_page2;
1052 target_ulong virt_page2;
1053 int code_gen_size;
1055 phys_pc = get_page_addr_code(env, pc);
1056 tb = tb_alloc(pc);
1057 if (!tb) {
1058 /* flush must be done */
1059 tb_flush(env);
1060 /* cannot fail at this point */
1061 tb = tb_alloc(pc);
1062 /* Don't forget to invalidate previous TB info. */
1063 tb_invalidated_flag = 1;
1065 tc_ptr = code_gen_ptr;
1066 tb->tc_ptr = tc_ptr;
1067 tb->cs_base = cs_base;
1068 tb->flags = flags;
1069 tb->cflags = cflags;
1070 cpu_gen_code(env, tb, &code_gen_size);
1071 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1072 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1074 /* check next page if needed */
1075 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1076 phys_page2 = -1;
1077 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1078 phys_page2 = get_page_addr_code(env, virt_page2);
1080 tb_link_page(tb, phys_pc, phys_page2);
1081 return tb;
1085 * Invalidate all TBs which intersect with the target physical address range
1086 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1087 * 'is_cpu_write_access' should be true if called from a real cpu write
1088 * access: the virtual CPU will exit the current TB if code is modified inside
1089 * this TB.
1091 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1092 int is_cpu_write_access)
1094 while (start < end) {
1095 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1096 start &= TARGET_PAGE_MASK;
1097 start += TARGET_PAGE_SIZE;
1102 * Invalidate all TBs which intersect with the target physical address range
1103 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1104 * 'is_cpu_write_access' should be true if called from a real cpu write
1105 * access: the virtual CPU will exit the current TB if code is modified inside
1106 * this TB.
1108 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1109 int is_cpu_write_access)
1111 TranslationBlock *tb, *tb_next, *saved_tb;
1112 CPUArchState *env = cpu_single_env;
1113 tb_page_addr_t tb_start, tb_end;
1114 PageDesc *p;
1115 int n;
1116 #ifdef TARGET_HAS_PRECISE_SMC
1117 int current_tb_not_found = is_cpu_write_access;
1118 TranslationBlock *current_tb = NULL;
1119 int current_tb_modified = 0;
1120 target_ulong current_pc = 0;
1121 target_ulong current_cs_base = 0;
1122 int current_flags = 0;
1123 #endif /* TARGET_HAS_PRECISE_SMC */
1125 p = page_find(start >> TARGET_PAGE_BITS);
1126 if (!p)
1127 return;
1128 if (!p->code_bitmap &&
1129 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1130 is_cpu_write_access) {
1131 /* build code bitmap */
1132 build_page_bitmap(p);
1135 /* we remove all the TBs in the range [start, end[ */
1136 /* XXX: see if in some cases it could be faster to invalidate all the code */
1137 tb = p->first_tb;
1138 while (tb != NULL) {
1139 n = (uintptr_t)tb & 3;
1140 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1141 tb_next = tb->page_next[n];
1142 /* NOTE: this is subtle as a TB may span two physical pages */
1143 if (n == 0) {
1144 /* NOTE: tb_end may be after the end of the page, but
1145 it is not a problem */
1146 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1147 tb_end = tb_start + tb->size;
1148 } else {
1149 tb_start = tb->page_addr[1];
1150 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1152 if (!(tb_end <= start || tb_start >= end)) {
1153 #ifdef TARGET_HAS_PRECISE_SMC
1154 if (current_tb_not_found) {
1155 current_tb_not_found = 0;
1156 current_tb = NULL;
1157 if (env->mem_io_pc) {
1158 /* now we have a real cpu fault */
1159 current_tb = tb_find_pc(env->mem_io_pc);
1162 if (current_tb == tb &&
1163 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1164 /* If we are modifying the current TB, we must stop
1165 its execution. We could be more precise by checking
1166 that the modification is after the current PC, but it
1167 would require a specialized function to partially
1168 restore the CPU state */
1170 current_tb_modified = 1;
1171 cpu_restore_state(current_tb, env, env->mem_io_pc);
1172 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1173 &current_flags);
1175 #endif /* TARGET_HAS_PRECISE_SMC */
1176 /* we need to do that to handle the case where a signal
1177 occurs while doing tb_phys_invalidate() */
1178 saved_tb = NULL;
1179 if (env) {
1180 saved_tb = env->current_tb;
1181 env->current_tb = NULL;
1183 tb_phys_invalidate(tb, -1);
1184 if (env) {
1185 env->current_tb = saved_tb;
1186 if (env->interrupt_request && env->current_tb)
1187 cpu_interrupt(env, env->interrupt_request);
1190 tb = tb_next;
1192 #if !defined(CONFIG_USER_ONLY)
1193 /* if no code remaining, no need to continue to use slow writes */
1194 if (!p->first_tb) {
1195 invalidate_page_bitmap(p);
1196 if (is_cpu_write_access) {
1197 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1200 #endif
1201 #ifdef TARGET_HAS_PRECISE_SMC
1202 if (current_tb_modified) {
1203 /* we generate a block containing just the instruction
1204 modifying the memory. It will ensure that it cannot modify
1205 itself */
1206 env->current_tb = NULL;
1207 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1208 cpu_resume_from_signal(env, NULL);
1210 #endif
1213 /* len must be <= 8 and start must be a multiple of len */
1214 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1216 PageDesc *p;
1217 int offset, b;
1218 #if 0
1219 if (1) {
1220 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1221 cpu_single_env->mem_io_vaddr, len,
1222 cpu_single_env->eip,
1223 cpu_single_env->eip +
1224 (intptr_t)cpu_single_env->segs[R_CS].base);
1226 #endif
1227 p = page_find(start >> TARGET_PAGE_BITS);
1228 if (!p)
1229 return;
1230 if (p->code_bitmap) {
1231 offset = start & ~TARGET_PAGE_MASK;
1232 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1233 if (b & ((1 << len) - 1))
1234 goto do_invalidate;
1235 } else {
1236 do_invalidate:
1237 tb_invalidate_phys_page_range(start, start + len, 1);
1241 #if !defined(CONFIG_SOFTMMU)
1242 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1243 uintptr_t pc, void *puc)
1245 TranslationBlock *tb;
1246 PageDesc *p;
1247 int n;
1248 #ifdef TARGET_HAS_PRECISE_SMC
1249 TranslationBlock *current_tb = NULL;
1250 CPUArchState *env = cpu_single_env;
1251 int current_tb_modified = 0;
1252 target_ulong current_pc = 0;
1253 target_ulong current_cs_base = 0;
1254 int current_flags = 0;
1255 #endif
1257 addr &= TARGET_PAGE_MASK;
1258 p = page_find(addr >> TARGET_PAGE_BITS);
1259 if (!p)
1260 return;
1261 tb = p->first_tb;
1262 #ifdef TARGET_HAS_PRECISE_SMC
1263 if (tb && pc != 0) {
1264 current_tb = tb_find_pc(pc);
1266 #endif
1267 while (tb != NULL) {
1268 n = (uintptr_t)tb & 3;
1269 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1270 #ifdef TARGET_HAS_PRECISE_SMC
1271 if (current_tb == tb &&
1272 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1273 /* If we are modifying the current TB, we must stop
1274 its execution. We could be more precise by checking
1275 that the modification is after the current PC, but it
1276 would require a specialized function to partially
1277 restore the CPU state */
1279 current_tb_modified = 1;
1280 cpu_restore_state(current_tb, env, pc);
1281 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1282 &current_flags);
1284 #endif /* TARGET_HAS_PRECISE_SMC */
1285 tb_phys_invalidate(tb, addr);
1286 tb = tb->page_next[n];
1288 p->first_tb = NULL;
1289 #ifdef TARGET_HAS_PRECISE_SMC
1290 if (current_tb_modified) {
1291 /* we generate a block containing just the instruction
1292 modifying the memory. It will ensure that it cannot modify
1293 itself */
1294 env->current_tb = NULL;
1295 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1296 cpu_resume_from_signal(env, puc);
1298 #endif
1300 #endif
1302 /* add the tb in the target page and protect it if necessary */
1303 static inline void tb_alloc_page(TranslationBlock *tb,
1304 unsigned int n, tb_page_addr_t page_addr)
1306 PageDesc *p;
1307 #ifndef CONFIG_USER_ONLY
1308 bool page_already_protected;
1309 #endif
1311 tb->page_addr[n] = page_addr;
1312 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1313 tb->page_next[n] = p->first_tb;
1314 #ifndef CONFIG_USER_ONLY
1315 page_already_protected = p->first_tb != NULL;
1316 #endif
1317 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1318 invalidate_page_bitmap(p);
1320 #if defined(TARGET_HAS_SMC) || 1
1322 #if defined(CONFIG_USER_ONLY)
1323 if (p->flags & PAGE_WRITE) {
1324 target_ulong addr;
1325 PageDesc *p2;
1326 int prot;
1328 /* force the host page as non writable (writes will have a
1329 page fault + mprotect overhead) */
1330 page_addr &= qemu_host_page_mask;
1331 prot = 0;
1332 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1333 addr += TARGET_PAGE_SIZE) {
1335 p2 = page_find (addr >> TARGET_PAGE_BITS);
1336 if (!p2)
1337 continue;
1338 prot |= p2->flags;
1339 p2->flags &= ~PAGE_WRITE;
1341 mprotect(g2h(page_addr), qemu_host_page_size,
1342 (prot & PAGE_BITS) & ~PAGE_WRITE);
1343 #ifdef DEBUG_TB_INVALIDATE
1344 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1345 page_addr);
1346 #endif
1348 #else
1349 /* if some code is already present, then the pages are already
1350 protected. So we handle the case where only the first TB is
1351 allocated in a physical page */
1352 if (!page_already_protected) {
1353 tlb_protect_code(page_addr);
1355 #endif
1357 #endif /* TARGET_HAS_SMC */
1360 /* add a new TB and link it to the physical page tables. phys_page2 is
1361 (-1) to indicate that only one page contains the TB. */
1362 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1363 tb_page_addr_t phys_page2)
1365 unsigned int h;
1366 TranslationBlock **ptb;
1368 /* Grab the mmap lock to stop another thread invalidating this TB
1369 before we are done. */
1370 mmap_lock();
1371 /* add in the physical hash table */
1372 h = tb_phys_hash_func(phys_pc);
1373 ptb = &tb_phys_hash[h];
1374 tb->phys_hash_next = *ptb;
1375 *ptb = tb;
1377 /* add in the page list */
1378 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1379 if (phys_page2 != -1)
1380 tb_alloc_page(tb, 1, phys_page2);
1381 else
1382 tb->page_addr[1] = -1;
1384 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1385 tb->jmp_next[0] = NULL;
1386 tb->jmp_next[1] = NULL;
1388 /* init original jump addresses */
1389 if (tb->tb_next_offset[0] != 0xffff)
1390 tb_reset_jump(tb, 0);
1391 if (tb->tb_next_offset[1] != 0xffff)
1392 tb_reset_jump(tb, 1);
1394 #ifdef DEBUG_TB_CHECK
1395 tb_page_check();
1396 #endif
1397 mmap_unlock();
1400 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
1401 /* check whether the given addr is in TCG generated code buffer or not */
1402 bool is_tcg_gen_code(uintptr_t tc_ptr)
1404 /* This can be called during code generation, code_gen_buffer_max_size
1405 is used instead of code_gen_ptr for upper boundary checking */
1406 return (tc_ptr >= (uintptr_t)code_gen_buffer &&
1407 tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
1409 #endif
1411 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1412 tb[1].tc_ptr. Return NULL if not found */
1413 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1415 int m_min, m_max, m;
1416 uintptr_t v;
1417 TranslationBlock *tb;
1419 if (nb_tbs <= 0)
1420 return NULL;
1421 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1422 tc_ptr >= (uintptr_t)code_gen_ptr) {
1423 return NULL;
1425 /* binary search (cf Knuth) */
1426 m_min = 0;
1427 m_max = nb_tbs - 1;
1428 while (m_min <= m_max) {
1429 m = (m_min + m_max) >> 1;
1430 tb = &tbs[m];
1431 v = (uintptr_t)tb->tc_ptr;
1432 if (v == tc_ptr)
1433 return tb;
1434 else if (tc_ptr < v) {
1435 m_max = m - 1;
1436 } else {
1437 m_min = m + 1;
1440 return &tbs[m_max];
1443 static void tb_reset_jump_recursive(TranslationBlock *tb);
1445 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1447 TranslationBlock *tb1, *tb_next, **ptb;
1448 unsigned int n1;
1450 tb1 = tb->jmp_next[n];
1451 if (tb1 != NULL) {
1452 /* find head of list */
1453 for(;;) {
1454 n1 = (uintptr_t)tb1 & 3;
1455 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1456 if (n1 == 2)
1457 break;
1458 tb1 = tb1->jmp_next[n1];
1460 /* we are now sure now that tb jumps to tb1 */
1461 tb_next = tb1;
1463 /* remove tb from the jmp_first list */
1464 ptb = &tb_next->jmp_first;
1465 for(;;) {
1466 tb1 = *ptb;
1467 n1 = (uintptr_t)tb1 & 3;
1468 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1469 if (n1 == n && tb1 == tb)
1470 break;
1471 ptb = &tb1->jmp_next[n1];
1473 *ptb = tb->jmp_next[n];
1474 tb->jmp_next[n] = NULL;
1476 /* suppress the jump to next tb in generated code */
1477 tb_reset_jump(tb, n);
1479 /* suppress jumps in the tb on which we could have jumped */
1480 tb_reset_jump_recursive(tb_next);
1484 static void tb_reset_jump_recursive(TranslationBlock *tb)
1486 tb_reset_jump_recursive2(tb, 0);
1487 tb_reset_jump_recursive2(tb, 1);
1490 #if defined(TARGET_HAS_ICE)
1491 #if defined(CONFIG_USER_ONLY)
1492 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1494 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1496 #else
1497 void tb_invalidate_phys_addr(hwaddr addr)
1499 ram_addr_t ram_addr;
1500 MemoryRegionSection *section;
1502 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1503 if (!(memory_region_is_ram(section->mr)
1504 || (section->mr->rom_device && section->mr->readable))) {
1505 return;
1507 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1508 + memory_region_section_addr(section, addr);
1509 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1512 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1514 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1515 (pc & ~TARGET_PAGE_MASK));
1517 #endif
1518 #endif /* TARGET_HAS_ICE */
1520 #if defined(CONFIG_USER_ONLY)
1521 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1526 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1527 int flags, CPUWatchpoint **watchpoint)
1529 return -ENOSYS;
1531 #else
1532 /* Add a watchpoint. */
1533 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1534 int flags, CPUWatchpoint **watchpoint)
1536 target_ulong len_mask = ~(len - 1);
1537 CPUWatchpoint *wp;
1539 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1540 if ((len & (len - 1)) || (addr & ~len_mask) ||
1541 len == 0 || len > TARGET_PAGE_SIZE) {
1542 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1543 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1544 return -EINVAL;
1546 wp = g_malloc(sizeof(*wp));
1548 wp->vaddr = addr;
1549 wp->len_mask = len_mask;
1550 wp->flags = flags;
1552 /* keep all GDB-injected watchpoints in front */
1553 if (flags & BP_GDB)
1554 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1555 else
1556 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1558 tlb_flush_page(env, addr);
1560 if (watchpoint)
1561 *watchpoint = wp;
1562 return 0;
1565 /* Remove a specific watchpoint. */
1566 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1567 int flags)
1569 target_ulong len_mask = ~(len - 1);
1570 CPUWatchpoint *wp;
1572 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1573 if (addr == wp->vaddr && len_mask == wp->len_mask
1574 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1575 cpu_watchpoint_remove_by_ref(env, wp);
1576 return 0;
1579 return -ENOENT;
1582 /* Remove a specific watchpoint by reference. */
1583 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1585 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1587 tlb_flush_page(env, watchpoint->vaddr);
1589 g_free(watchpoint);
1592 /* Remove all matching watchpoints. */
1593 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1595 CPUWatchpoint *wp, *next;
1597 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1598 if (wp->flags & mask)
1599 cpu_watchpoint_remove_by_ref(env, wp);
1602 #endif
1604 /* Add a breakpoint. */
1605 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1606 CPUBreakpoint **breakpoint)
1608 #if defined(TARGET_HAS_ICE)
1609 CPUBreakpoint *bp;
1611 bp = g_malloc(sizeof(*bp));
1613 bp->pc = pc;
1614 bp->flags = flags;
1616 /* keep all GDB-injected breakpoints in front */
1617 if (flags & BP_GDB)
1618 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1619 else
1620 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1622 breakpoint_invalidate(env, pc);
1624 if (breakpoint)
1625 *breakpoint = bp;
1626 return 0;
1627 #else
1628 return -ENOSYS;
1629 #endif
1632 /* Remove a specific breakpoint. */
1633 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1635 #if defined(TARGET_HAS_ICE)
1636 CPUBreakpoint *bp;
1638 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1639 if (bp->pc == pc && bp->flags == flags) {
1640 cpu_breakpoint_remove_by_ref(env, bp);
1641 return 0;
1644 return -ENOENT;
1645 #else
1646 return -ENOSYS;
1647 #endif
1650 /* Remove a specific breakpoint by reference. */
1651 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1653 #if defined(TARGET_HAS_ICE)
1654 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1656 breakpoint_invalidate(env, breakpoint->pc);
1658 g_free(breakpoint);
1659 #endif
1662 /* Remove all matching breakpoints. */
1663 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1665 #if defined(TARGET_HAS_ICE)
1666 CPUBreakpoint *bp, *next;
1668 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1669 if (bp->flags & mask)
1670 cpu_breakpoint_remove_by_ref(env, bp);
1672 #endif
1675 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1676 CPU loop after each instruction */
1677 void cpu_single_step(CPUArchState *env, int enabled)
1679 #if defined(TARGET_HAS_ICE)
1680 if (env->singlestep_enabled != enabled) {
1681 env->singlestep_enabled = enabled;
1682 if (kvm_enabled())
1683 kvm_update_guest_debug(env, 0);
1684 else {
1685 /* must flush all the translated code to avoid inconsistencies */
1686 /* XXX: only flush what is necessary */
1687 tb_flush(env);
1690 #endif
1693 static void cpu_unlink_tb(CPUArchState *env)
1695 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1696 problem and hope the cpu will stop of its own accord. For userspace
1697 emulation this often isn't actually as bad as it sounds. Often
1698 signals are used primarily to interrupt blocking syscalls. */
1699 TranslationBlock *tb;
1700 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1702 spin_lock(&interrupt_lock);
1703 tb = env->current_tb;
1704 /* if the cpu is currently executing code, we must unlink it and
1705 all the potentially executing TB */
1706 if (tb) {
1707 env->current_tb = NULL;
1708 tb_reset_jump_recursive(tb);
1710 spin_unlock(&interrupt_lock);
1713 #ifndef CONFIG_USER_ONLY
1714 /* mask must never be zero, except for A20 change call */
1715 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1717 CPUState *cpu = ENV_GET_CPU(env);
1718 int old_mask;
1720 old_mask = env->interrupt_request;
1721 env->interrupt_request |= mask;
1724 * If called from iothread context, wake the target cpu in
1725 * case its halted.
1727 if (!qemu_cpu_is_self(cpu)) {
1728 qemu_cpu_kick(cpu);
1729 return;
1732 if (use_icount) {
1733 env->icount_decr.u16.high = 0xffff;
1734 if (!can_do_io(env)
1735 && (mask & ~old_mask) != 0) {
1736 cpu_abort(env, "Raised interrupt while not in I/O function");
1738 } else {
1739 cpu_unlink_tb(env);
1743 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1745 #else /* CONFIG_USER_ONLY */
1747 void cpu_interrupt(CPUArchState *env, int mask)
1749 env->interrupt_request |= mask;
1750 cpu_unlink_tb(env);
1752 #endif /* CONFIG_USER_ONLY */
1754 void cpu_reset_interrupt(CPUArchState *env, int mask)
1756 env->interrupt_request &= ~mask;
1759 void cpu_exit(CPUArchState *env)
1761 env->exit_request = 1;
1762 cpu_unlink_tb(env);
1765 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1767 va_list ap;
1768 va_list ap2;
1770 va_start(ap, fmt);
1771 va_copy(ap2, ap);
1772 fprintf(stderr, "qemu: fatal: ");
1773 vfprintf(stderr, fmt, ap);
1774 fprintf(stderr, "\n");
1775 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1776 if (qemu_log_enabled()) {
1777 qemu_log("qemu: fatal: ");
1778 qemu_log_vprintf(fmt, ap2);
1779 qemu_log("\n");
1780 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1781 qemu_log_flush();
1782 qemu_log_close();
1784 va_end(ap2);
1785 va_end(ap);
1786 #if defined(CONFIG_USER_ONLY)
1788 struct sigaction act;
1789 sigfillset(&act.sa_mask);
1790 act.sa_handler = SIG_DFL;
1791 sigaction(SIGABRT, &act, NULL);
1793 #endif
1794 abort();
1797 CPUArchState *cpu_copy(CPUArchState *env)
1799 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1800 CPUArchState *next_cpu = new_env->next_cpu;
1801 int cpu_index = new_env->cpu_index;
1802 #if defined(TARGET_HAS_ICE)
1803 CPUBreakpoint *bp;
1804 CPUWatchpoint *wp;
1805 #endif
1807 memcpy(new_env, env, sizeof(CPUArchState));
1809 /* Preserve chaining and index. */
1810 new_env->next_cpu = next_cpu;
1811 new_env->cpu_index = cpu_index;
1813 /* Clone all break/watchpoints.
1814 Note: Once we support ptrace with hw-debug register access, make sure
1815 BP_CPU break/watchpoints are handled correctly on clone. */
1816 QTAILQ_INIT(&env->breakpoints);
1817 QTAILQ_INIT(&env->watchpoints);
1818 #if defined(TARGET_HAS_ICE)
1819 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1820 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1822 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1823 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1824 wp->flags, NULL);
1826 #endif
1828 return new_env;
1831 #if !defined(CONFIG_USER_ONLY)
1832 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1834 unsigned int i;
1836 /* Discard jump cache entries for any tb which might potentially
1837 overlap the flushed page. */
1838 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1839 memset (&env->tb_jmp_cache[i], 0,
1840 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1842 i = tb_jmp_cache_hash_page(addr);
1843 memset (&env->tb_jmp_cache[i], 0,
1844 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1847 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1848 uintptr_t length)
1850 uintptr_t start1;
1852 /* we modify the TLB cache so that the dirty bit will be set again
1853 when accessing the range */
1854 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1855 /* Check that we don't span multiple blocks - this breaks the
1856 address comparisons below. */
1857 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1858 != (end - 1) - start) {
1859 abort();
1861 cpu_tlb_reset_dirty_all(start1, length);
1865 /* Note: start and end must be within the same ram block. */
1866 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1867 int dirty_flags)
1869 uintptr_t length;
1871 start &= TARGET_PAGE_MASK;
1872 end = TARGET_PAGE_ALIGN(end);
1874 length = end - start;
1875 if (length == 0)
1876 return;
1877 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1879 if (tcg_enabled()) {
1880 tlb_reset_dirty_range_all(start, end, length);
1884 static int cpu_physical_memory_set_dirty_tracking(int enable)
1886 int ret = 0;
1887 in_migration = enable;
1888 return ret;
1891 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1892 MemoryRegionSection *section,
1893 target_ulong vaddr,
1894 hwaddr paddr,
1895 int prot,
1896 target_ulong *address)
1898 hwaddr iotlb;
1899 CPUWatchpoint *wp;
1901 if (memory_region_is_ram(section->mr)) {
1902 /* Normal RAM. */
1903 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1904 + memory_region_section_addr(section, paddr);
1905 if (!section->readonly) {
1906 iotlb |= phys_section_notdirty;
1907 } else {
1908 iotlb |= phys_section_rom;
1910 } else {
1911 /* IO handlers are currently passed a physical address.
1912 It would be nice to pass an offset from the base address
1913 of that region. This would avoid having to special case RAM,
1914 and avoid full address decoding in every device.
1915 We can't use the high bits of pd for this because
1916 IO_MEM_ROMD uses these as a ram address. */
1917 iotlb = section - phys_sections;
1918 iotlb += memory_region_section_addr(section, paddr);
1921 /* Make accesses to pages with watchpoints go via the
1922 watchpoint trap routines. */
1923 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1924 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1925 /* Avoid trapping reads of pages with a write breakpoint. */
1926 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1927 iotlb = phys_section_watch + paddr;
1928 *address |= TLB_MMIO;
1929 break;
1934 return iotlb;
1937 #else
1939 * Walks guest process memory "regions" one by one
1940 * and calls callback function 'fn' for each region.
1943 struct walk_memory_regions_data
1945 walk_memory_regions_fn fn;
1946 void *priv;
1947 uintptr_t start;
1948 int prot;
1951 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1952 abi_ulong end, int new_prot)
1954 if (data->start != -1ul) {
1955 int rc = data->fn(data->priv, data->start, end, data->prot);
1956 if (rc != 0) {
1957 return rc;
1961 data->start = (new_prot ? end : -1ul);
1962 data->prot = new_prot;
1964 return 0;
1967 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1968 abi_ulong base, int level, void **lp)
1970 abi_ulong pa;
1971 int i, rc;
1973 if (*lp == NULL) {
1974 return walk_memory_regions_end(data, base, 0);
1977 if (level == 0) {
1978 PageDesc *pd = *lp;
1979 for (i = 0; i < L2_SIZE; ++i) {
1980 int prot = pd[i].flags;
1982 pa = base | (i << TARGET_PAGE_BITS);
1983 if (prot != data->prot) {
1984 rc = walk_memory_regions_end(data, pa, prot);
1985 if (rc != 0) {
1986 return rc;
1990 } else {
1991 void **pp = *lp;
1992 for (i = 0; i < L2_SIZE; ++i) {
1993 pa = base | ((abi_ulong)i <<
1994 (TARGET_PAGE_BITS + L2_BITS * level));
1995 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1996 if (rc != 0) {
1997 return rc;
2002 return 0;
2005 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2007 struct walk_memory_regions_data data;
2008 uintptr_t i;
2010 data.fn = fn;
2011 data.priv = priv;
2012 data.start = -1ul;
2013 data.prot = 0;
2015 for (i = 0; i < V_L1_SIZE; i++) {
2016 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2017 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2018 if (rc != 0) {
2019 return rc;
2023 return walk_memory_regions_end(&data, 0, 0);
2026 static int dump_region(void *priv, abi_ulong start,
2027 abi_ulong end, unsigned long prot)
2029 FILE *f = (FILE *)priv;
2031 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2032 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2033 start, end, end - start,
2034 ((prot & PAGE_READ) ? 'r' : '-'),
2035 ((prot & PAGE_WRITE) ? 'w' : '-'),
2036 ((prot & PAGE_EXEC) ? 'x' : '-'));
2038 return (0);
2041 /* dump memory mappings */
2042 void page_dump(FILE *f)
2044 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2045 "start", "end", "size", "prot");
2046 walk_memory_regions(f, dump_region);
2049 int page_get_flags(target_ulong address)
2051 PageDesc *p;
2053 p = page_find(address >> TARGET_PAGE_BITS);
2054 if (!p)
2055 return 0;
2056 return p->flags;
2059 /* Modify the flags of a page and invalidate the code if necessary.
2060 The flag PAGE_WRITE_ORG is positioned automatically depending
2061 on PAGE_WRITE. The mmap_lock should already be held. */
2062 void page_set_flags(target_ulong start, target_ulong end, int flags)
2064 target_ulong addr, len;
2066 /* This function should never be called with addresses outside the
2067 guest address space. If this assert fires, it probably indicates
2068 a missing call to h2g_valid. */
2069 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2070 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2071 #endif
2072 assert(start < end);
2074 start = start & TARGET_PAGE_MASK;
2075 end = TARGET_PAGE_ALIGN(end);
2077 if (flags & PAGE_WRITE) {
2078 flags |= PAGE_WRITE_ORG;
2081 for (addr = start, len = end - start;
2082 len != 0;
2083 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2084 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2086 /* If the write protection bit is set, then we invalidate
2087 the code inside. */
2088 if (!(p->flags & PAGE_WRITE) &&
2089 (flags & PAGE_WRITE) &&
2090 p->first_tb) {
2091 tb_invalidate_phys_page(addr, 0, NULL);
2093 p->flags = flags;
2097 int page_check_range(target_ulong start, target_ulong len, int flags)
2099 PageDesc *p;
2100 target_ulong end;
2101 target_ulong addr;
2103 /* This function should never be called with addresses outside the
2104 guest address space. If this assert fires, it probably indicates
2105 a missing call to h2g_valid. */
2106 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2107 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2108 #endif
2110 if (len == 0) {
2111 return 0;
2113 if (start + len - 1 < start) {
2114 /* We've wrapped around. */
2115 return -1;
2118 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2119 start = start & TARGET_PAGE_MASK;
2121 for (addr = start, len = end - start;
2122 len != 0;
2123 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2124 p = page_find(addr >> TARGET_PAGE_BITS);
2125 if( !p )
2126 return -1;
2127 if( !(p->flags & PAGE_VALID) )
2128 return -1;
2130 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2131 return -1;
2132 if (flags & PAGE_WRITE) {
2133 if (!(p->flags & PAGE_WRITE_ORG))
2134 return -1;
2135 /* unprotect the page if it was put read-only because it
2136 contains translated code */
2137 if (!(p->flags & PAGE_WRITE)) {
2138 if (!page_unprotect(addr, 0, NULL))
2139 return -1;
2141 return 0;
2144 return 0;
2147 /* called from signal handler: invalidate the code and unprotect the
2148 page. Return TRUE if the fault was successfully handled. */
2149 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2151 unsigned int prot;
2152 PageDesc *p;
2153 target_ulong host_start, host_end, addr;
2155 /* Technically this isn't safe inside a signal handler. However we
2156 know this only ever happens in a synchronous SEGV handler, so in
2157 practice it seems to be ok. */
2158 mmap_lock();
2160 p = page_find(address >> TARGET_PAGE_BITS);
2161 if (!p) {
2162 mmap_unlock();
2163 return 0;
2166 /* if the page was really writable, then we change its
2167 protection back to writable */
2168 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2169 host_start = address & qemu_host_page_mask;
2170 host_end = host_start + qemu_host_page_size;
2172 prot = 0;
2173 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2174 p = page_find(addr >> TARGET_PAGE_BITS);
2175 p->flags |= PAGE_WRITE;
2176 prot |= p->flags;
2178 /* and since the content will be modified, we must invalidate
2179 the corresponding translated code. */
2180 tb_invalidate_phys_page(addr, pc, puc);
2181 #ifdef DEBUG_TB_CHECK
2182 tb_invalidate_check(addr);
2183 #endif
2185 mprotect((void *)g2h(host_start), qemu_host_page_size,
2186 prot & PAGE_BITS);
2188 mmap_unlock();
2189 return 1;
2191 mmap_unlock();
2192 return 0;
2194 #endif /* defined(CONFIG_USER_ONLY) */
2196 #if !defined(CONFIG_USER_ONLY)
2198 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2199 typedef struct subpage_t {
2200 MemoryRegion iomem;
2201 hwaddr base;
2202 uint16_t sub_section[TARGET_PAGE_SIZE];
2203 } subpage_t;
2205 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2206 uint16_t section);
2207 static subpage_t *subpage_init(hwaddr base);
2208 static void destroy_page_desc(uint16_t section_index)
2210 MemoryRegionSection *section = &phys_sections[section_index];
2211 MemoryRegion *mr = section->mr;
2213 if (mr->subpage) {
2214 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2215 memory_region_destroy(&subpage->iomem);
2216 g_free(subpage);
2220 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2222 unsigned i;
2223 PhysPageEntry *p;
2225 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2226 return;
2229 p = phys_map_nodes[lp->ptr];
2230 for (i = 0; i < L2_SIZE; ++i) {
2231 if (!p[i].is_leaf) {
2232 destroy_l2_mapping(&p[i], level - 1);
2233 } else {
2234 destroy_page_desc(p[i].ptr);
2237 lp->is_leaf = 0;
2238 lp->ptr = PHYS_MAP_NODE_NIL;
2241 static void destroy_all_mappings(AddressSpaceDispatch *d)
2243 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2244 phys_map_nodes_reset();
2247 static uint16_t phys_section_add(MemoryRegionSection *section)
2249 if (phys_sections_nb == phys_sections_nb_alloc) {
2250 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2251 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2252 phys_sections_nb_alloc);
2254 phys_sections[phys_sections_nb] = *section;
2255 return phys_sections_nb++;
2258 static void phys_sections_clear(void)
2260 phys_sections_nb = 0;
2263 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2265 subpage_t *subpage;
2266 hwaddr base = section->offset_within_address_space
2267 & TARGET_PAGE_MASK;
2268 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2269 MemoryRegionSection subsection = {
2270 .offset_within_address_space = base,
2271 .size = TARGET_PAGE_SIZE,
2273 hwaddr start, end;
2275 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2277 if (!(existing->mr->subpage)) {
2278 subpage = subpage_init(base);
2279 subsection.mr = &subpage->iomem;
2280 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2281 phys_section_add(&subsection));
2282 } else {
2283 subpage = container_of(existing->mr, subpage_t, iomem);
2285 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2286 end = start + section->size - 1;
2287 subpage_register(subpage, start, end, phys_section_add(section));
2291 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2293 hwaddr start_addr = section->offset_within_address_space;
2294 ram_addr_t size = section->size;
2295 hwaddr addr;
2296 uint16_t section_index = phys_section_add(section);
2298 assert(size);
2300 addr = start_addr;
2301 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2302 section_index);
2305 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2307 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2308 MemoryRegionSection now = *section, remain = *section;
2310 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2311 || (now.size < TARGET_PAGE_SIZE)) {
2312 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2313 - now.offset_within_address_space,
2314 now.size);
2315 register_subpage(d, &now);
2316 remain.size -= now.size;
2317 remain.offset_within_address_space += now.size;
2318 remain.offset_within_region += now.size;
2320 while (remain.size >= TARGET_PAGE_SIZE) {
2321 now = remain;
2322 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2323 now.size = TARGET_PAGE_SIZE;
2324 register_subpage(d, &now);
2325 } else {
2326 now.size &= TARGET_PAGE_MASK;
2327 register_multipage(d, &now);
2329 remain.size -= now.size;
2330 remain.offset_within_address_space += now.size;
2331 remain.offset_within_region += now.size;
2333 now = remain;
2334 if (now.size) {
2335 register_subpage(d, &now);
2339 void qemu_flush_coalesced_mmio_buffer(void)
2341 if (kvm_enabled())
2342 kvm_flush_coalesced_mmio_buffer();
2345 #if defined(__linux__) && !defined(TARGET_S390X)
2347 #include <sys/vfs.h>
2349 #define HUGETLBFS_MAGIC 0x958458f6
2351 static long gethugepagesize(const char *path)
2353 struct statfs fs;
2354 int ret;
2356 do {
2357 ret = statfs(path, &fs);
2358 } while (ret != 0 && errno == EINTR);
2360 if (ret != 0) {
2361 perror(path);
2362 return 0;
2365 if (fs.f_type != HUGETLBFS_MAGIC)
2366 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2368 return fs.f_bsize;
2371 static void *file_ram_alloc(RAMBlock *block,
2372 ram_addr_t memory,
2373 const char *path)
2375 char *filename;
2376 void *area;
2377 int fd;
2378 #ifdef MAP_POPULATE
2379 int flags;
2380 #endif
2381 unsigned long hpagesize;
2383 hpagesize = gethugepagesize(path);
2384 if (!hpagesize) {
2385 return NULL;
2388 if (memory < hpagesize) {
2389 return NULL;
2392 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2393 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2394 return NULL;
2397 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2398 return NULL;
2401 fd = mkstemp(filename);
2402 if (fd < 0) {
2403 perror("unable to create backing store for hugepages");
2404 free(filename);
2405 return NULL;
2407 unlink(filename);
2408 free(filename);
2410 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2413 * ftruncate is not supported by hugetlbfs in older
2414 * hosts, so don't bother bailing out on errors.
2415 * If anything goes wrong with it under other filesystems,
2416 * mmap will fail.
2418 if (ftruncate(fd, memory))
2419 perror("ftruncate");
2421 #ifdef MAP_POPULATE
2422 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2423 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2424 * to sidestep this quirk.
2426 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2427 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2428 #else
2429 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2430 #endif
2431 if (area == MAP_FAILED) {
2432 perror("file_ram_alloc: can't mmap RAM pages");
2433 close(fd);
2434 return (NULL);
2436 block->fd = fd;
2437 return area;
2439 #endif
2441 static ram_addr_t find_ram_offset(ram_addr_t size)
2443 RAMBlock *block, *next_block;
2444 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2446 if (QLIST_EMPTY(&ram_list.blocks))
2447 return 0;
2449 QLIST_FOREACH(block, &ram_list.blocks, next) {
2450 ram_addr_t end, next = RAM_ADDR_MAX;
2452 end = block->offset + block->length;
2454 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2455 if (next_block->offset >= end) {
2456 next = MIN(next, next_block->offset);
2459 if (next - end >= size && next - end < mingap) {
2460 offset = end;
2461 mingap = next - end;
2465 if (offset == RAM_ADDR_MAX) {
2466 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2467 (uint64_t)size);
2468 abort();
2471 return offset;
2474 ram_addr_t last_ram_offset(void)
2476 RAMBlock *block;
2477 ram_addr_t last = 0;
2479 QLIST_FOREACH(block, &ram_list.blocks, next)
2480 last = MAX(last, block->offset + block->length);
2482 return last;
2485 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2487 int ret;
2488 QemuOpts *machine_opts;
2490 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2491 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2492 if (machine_opts &&
2493 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2494 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2495 if (ret) {
2496 perror("qemu_madvise");
2497 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2498 "but dump_guest_core=off specified\n");
2503 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2505 RAMBlock *new_block, *block;
2507 new_block = NULL;
2508 QLIST_FOREACH(block, &ram_list.blocks, next) {
2509 if (block->offset == addr) {
2510 new_block = block;
2511 break;
2514 assert(new_block);
2515 assert(!new_block->idstr[0]);
2517 if (dev) {
2518 char *id = qdev_get_dev_path(dev);
2519 if (id) {
2520 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2521 g_free(id);
2524 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2526 QLIST_FOREACH(block, &ram_list.blocks, next) {
2527 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2528 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2529 new_block->idstr);
2530 abort();
2535 static int memory_try_enable_merging(void *addr, size_t len)
2537 QemuOpts *opts;
2539 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2540 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2541 /* disabled by the user */
2542 return 0;
2545 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2548 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2549 MemoryRegion *mr)
2551 RAMBlock *new_block;
2553 size = TARGET_PAGE_ALIGN(size);
2554 new_block = g_malloc0(sizeof(*new_block));
2556 new_block->mr = mr;
2557 new_block->offset = find_ram_offset(size);
2558 if (host) {
2559 new_block->host = host;
2560 new_block->flags |= RAM_PREALLOC_MASK;
2561 } else {
2562 if (mem_path) {
2563 #if defined (__linux__) && !defined(TARGET_S390X)
2564 new_block->host = file_ram_alloc(new_block, size, mem_path);
2565 if (!new_block->host) {
2566 new_block->host = qemu_vmalloc(size);
2567 memory_try_enable_merging(new_block->host, size);
2569 #else
2570 fprintf(stderr, "-mem-path option unsupported\n");
2571 exit(1);
2572 #endif
2573 } else {
2574 if (xen_enabled()) {
2575 xen_ram_alloc(new_block->offset, size, mr);
2576 } else if (kvm_enabled()) {
2577 /* some s390/kvm configurations have special constraints */
2578 new_block->host = kvm_vmalloc(size);
2579 } else {
2580 new_block->host = qemu_vmalloc(size);
2582 memory_try_enable_merging(new_block->host, size);
2585 new_block->length = size;
2587 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2589 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2590 last_ram_offset() >> TARGET_PAGE_BITS);
2591 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2592 0, size >> TARGET_PAGE_BITS);
2593 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2595 qemu_ram_setup_dump(new_block->host, size);
2596 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2598 if (kvm_enabled())
2599 kvm_setup_guest_memory(new_block->host, size);
2601 return new_block->offset;
2604 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2606 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2609 void qemu_ram_free_from_ptr(ram_addr_t addr)
2611 RAMBlock *block;
2613 QLIST_FOREACH(block, &ram_list.blocks, next) {
2614 if (addr == block->offset) {
2615 QLIST_REMOVE(block, next);
2616 g_free(block);
2617 return;
2622 void qemu_ram_free(ram_addr_t addr)
2624 RAMBlock *block;
2626 QLIST_FOREACH(block, &ram_list.blocks, next) {
2627 if (addr == block->offset) {
2628 QLIST_REMOVE(block, next);
2629 if (block->flags & RAM_PREALLOC_MASK) {
2631 } else if (mem_path) {
2632 #if defined (__linux__) && !defined(TARGET_S390X)
2633 if (block->fd) {
2634 munmap(block->host, block->length);
2635 close(block->fd);
2636 } else {
2637 qemu_vfree(block->host);
2639 #else
2640 abort();
2641 #endif
2642 } else {
2643 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2644 munmap(block->host, block->length);
2645 #else
2646 if (xen_enabled()) {
2647 xen_invalidate_map_cache_entry(block->host);
2648 } else {
2649 qemu_vfree(block->host);
2651 #endif
2653 g_free(block);
2654 return;
2660 #ifndef _WIN32
2661 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2663 RAMBlock *block;
2664 ram_addr_t offset;
2665 int flags;
2666 void *area, *vaddr;
2668 QLIST_FOREACH(block, &ram_list.blocks, next) {
2669 offset = addr - block->offset;
2670 if (offset < block->length) {
2671 vaddr = block->host + offset;
2672 if (block->flags & RAM_PREALLOC_MASK) {
2674 } else {
2675 flags = MAP_FIXED;
2676 munmap(vaddr, length);
2677 if (mem_path) {
2678 #if defined(__linux__) && !defined(TARGET_S390X)
2679 if (block->fd) {
2680 #ifdef MAP_POPULATE
2681 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2682 MAP_PRIVATE;
2683 #else
2684 flags |= MAP_PRIVATE;
2685 #endif
2686 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2687 flags, block->fd, offset);
2688 } else {
2689 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2690 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2691 flags, -1, 0);
2693 #else
2694 abort();
2695 #endif
2696 } else {
2697 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2698 flags |= MAP_SHARED | MAP_ANONYMOUS;
2699 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2700 flags, -1, 0);
2701 #else
2702 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2703 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2704 flags, -1, 0);
2705 #endif
2707 if (area != vaddr) {
2708 fprintf(stderr, "Could not remap addr: "
2709 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2710 length, addr);
2711 exit(1);
2713 memory_try_enable_merging(vaddr, length);
2714 qemu_ram_setup_dump(vaddr, length);
2716 return;
2720 #endif /* !_WIN32 */
2722 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2723 With the exception of the softmmu code in this file, this should
2724 only be used for local memory (e.g. video ram) that the device owns,
2725 and knows it isn't going to access beyond the end of the block.
2727 It should not be used for general purpose DMA.
2728 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2730 void *qemu_get_ram_ptr(ram_addr_t addr)
2732 RAMBlock *block;
2734 QLIST_FOREACH(block, &ram_list.blocks, next) {
2735 if (addr - block->offset < block->length) {
2736 /* Move this entry to to start of the list. */
2737 if (block != QLIST_FIRST(&ram_list.blocks)) {
2738 QLIST_REMOVE(block, next);
2739 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2741 if (xen_enabled()) {
2742 /* We need to check if the requested address is in the RAM
2743 * because we don't want to map the entire memory in QEMU.
2744 * In that case just map until the end of the page.
2746 if (block->offset == 0) {
2747 return xen_map_cache(addr, 0, 0);
2748 } else if (block->host == NULL) {
2749 block->host =
2750 xen_map_cache(block->offset, block->length, 1);
2753 return block->host + (addr - block->offset);
2757 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2758 abort();
2760 return NULL;
2763 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2764 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2766 static void *qemu_safe_ram_ptr(ram_addr_t addr)
2768 RAMBlock *block;
2770 QLIST_FOREACH(block, &ram_list.blocks, next) {
2771 if (addr - block->offset < block->length) {
2772 if (xen_enabled()) {
2773 /* We need to check if the requested address is in the RAM
2774 * because we don't want to map the entire memory in QEMU.
2775 * In that case just map until the end of the page.
2777 if (block->offset == 0) {
2778 return xen_map_cache(addr, 0, 0);
2779 } else if (block->host == NULL) {
2780 block->host =
2781 xen_map_cache(block->offset, block->length, 1);
2784 return block->host + (addr - block->offset);
2788 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2789 abort();
2791 return NULL;
2794 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2795 * but takes a size argument */
2796 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2798 if (*size == 0) {
2799 return NULL;
2801 if (xen_enabled()) {
2802 return xen_map_cache(addr, *size, 1);
2803 } else {
2804 RAMBlock *block;
2806 QLIST_FOREACH(block, &ram_list.blocks, next) {
2807 if (addr - block->offset < block->length) {
2808 if (addr - block->offset + *size > block->length)
2809 *size = block->length - addr + block->offset;
2810 return block->host + (addr - block->offset);
2814 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2815 abort();
2819 void qemu_put_ram_ptr(void *addr)
2821 trace_qemu_put_ram_ptr(addr);
2824 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2826 RAMBlock *block;
2827 uint8_t *host = ptr;
2829 if (xen_enabled()) {
2830 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2831 return 0;
2834 QLIST_FOREACH(block, &ram_list.blocks, next) {
2835 /* This case append when the block is not mapped. */
2836 if (block->host == NULL) {
2837 continue;
2839 if (host - block->host < block->length) {
2840 *ram_addr = block->offset + (host - block->host);
2841 return 0;
2845 return -1;
2848 /* Some of the softmmu routines need to translate from a host pointer
2849 (typically a TLB entry) back to a ram offset. */
2850 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2852 ram_addr_t ram_addr;
2854 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2855 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2856 abort();
2858 return ram_addr;
2861 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2862 unsigned size)
2864 #ifdef DEBUG_UNASSIGNED
2865 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2866 #endif
2867 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2868 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2869 #endif
2870 return 0;
2873 static void unassigned_mem_write(void *opaque, hwaddr addr,
2874 uint64_t val, unsigned size)
2876 #ifdef DEBUG_UNASSIGNED
2877 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2878 #endif
2879 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2880 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2881 #endif
2884 static const MemoryRegionOps unassigned_mem_ops = {
2885 .read = unassigned_mem_read,
2886 .write = unassigned_mem_write,
2887 .endianness = DEVICE_NATIVE_ENDIAN,
2890 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2891 unsigned size)
2893 abort();
2896 static void error_mem_write(void *opaque, hwaddr addr,
2897 uint64_t value, unsigned size)
2899 abort();
2902 static const MemoryRegionOps error_mem_ops = {
2903 .read = error_mem_read,
2904 .write = error_mem_write,
2905 .endianness = DEVICE_NATIVE_ENDIAN,
2908 static const MemoryRegionOps rom_mem_ops = {
2909 .read = error_mem_read,
2910 .write = unassigned_mem_write,
2911 .endianness = DEVICE_NATIVE_ENDIAN,
2914 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2915 uint64_t val, unsigned size)
2917 int dirty_flags;
2918 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2919 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2920 #if !defined(CONFIG_USER_ONLY)
2921 tb_invalidate_phys_page_fast(ram_addr, size);
2922 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2923 #endif
2925 switch (size) {
2926 case 1:
2927 stb_p(qemu_get_ram_ptr(ram_addr), val);
2928 break;
2929 case 2:
2930 stw_p(qemu_get_ram_ptr(ram_addr), val);
2931 break;
2932 case 4:
2933 stl_p(qemu_get_ram_ptr(ram_addr), val);
2934 break;
2935 default:
2936 abort();
2938 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2939 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2940 /* we remove the notdirty callback only if the code has been
2941 flushed */
2942 if (dirty_flags == 0xff)
2943 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2946 static const MemoryRegionOps notdirty_mem_ops = {
2947 .read = error_mem_read,
2948 .write = notdirty_mem_write,
2949 .endianness = DEVICE_NATIVE_ENDIAN,
2952 /* Generate a debug exception if a watchpoint has been hit. */
2953 static void check_watchpoint(int offset, int len_mask, int flags)
2955 CPUArchState *env = cpu_single_env;
2956 target_ulong pc, cs_base;
2957 TranslationBlock *tb;
2958 target_ulong vaddr;
2959 CPUWatchpoint *wp;
2960 int cpu_flags;
2962 if (env->watchpoint_hit) {
2963 /* We re-entered the check after replacing the TB. Now raise
2964 * the debug interrupt so that is will trigger after the
2965 * current instruction. */
2966 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2967 return;
2969 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2970 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2971 if ((vaddr == (wp->vaddr & len_mask) ||
2972 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2973 wp->flags |= BP_WATCHPOINT_HIT;
2974 if (!env->watchpoint_hit) {
2975 env->watchpoint_hit = wp;
2976 tb = tb_find_pc(env->mem_io_pc);
2977 if (!tb) {
2978 cpu_abort(env, "check_watchpoint: could not find TB for "
2979 "pc=%p", (void *)env->mem_io_pc);
2981 cpu_restore_state(tb, env, env->mem_io_pc);
2982 tb_phys_invalidate(tb, -1);
2983 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2984 env->exception_index = EXCP_DEBUG;
2985 cpu_loop_exit(env);
2986 } else {
2987 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2988 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2989 cpu_resume_from_signal(env, NULL);
2992 } else {
2993 wp->flags &= ~BP_WATCHPOINT_HIT;
2998 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2999 so these check for a hit then pass through to the normal out-of-line
3000 phys routines. */
3001 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
3002 unsigned size)
3004 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3005 switch (size) {
3006 case 1: return ldub_phys(addr);
3007 case 2: return lduw_phys(addr);
3008 case 4: return ldl_phys(addr);
3009 default: abort();
3013 static void watch_mem_write(void *opaque, hwaddr addr,
3014 uint64_t val, unsigned size)
3016 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3017 switch (size) {
3018 case 1:
3019 stb_phys(addr, val);
3020 break;
3021 case 2:
3022 stw_phys(addr, val);
3023 break;
3024 case 4:
3025 stl_phys(addr, val);
3026 break;
3027 default: abort();
3031 static const MemoryRegionOps watch_mem_ops = {
3032 .read = watch_mem_read,
3033 .write = watch_mem_write,
3034 .endianness = DEVICE_NATIVE_ENDIAN,
3037 static uint64_t subpage_read(void *opaque, hwaddr addr,
3038 unsigned len)
3040 subpage_t *mmio = opaque;
3041 unsigned int idx = SUBPAGE_IDX(addr);
3042 MemoryRegionSection *section;
3043 #if defined(DEBUG_SUBPAGE)
3044 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3045 mmio, len, addr, idx);
3046 #endif
3048 section = &phys_sections[mmio->sub_section[idx]];
3049 addr += mmio->base;
3050 addr -= section->offset_within_address_space;
3051 addr += section->offset_within_region;
3052 return io_mem_read(section->mr, addr, len);
3055 static void subpage_write(void *opaque, hwaddr addr,
3056 uint64_t value, unsigned len)
3058 subpage_t *mmio = opaque;
3059 unsigned int idx = SUBPAGE_IDX(addr);
3060 MemoryRegionSection *section;
3061 #if defined(DEBUG_SUBPAGE)
3062 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3063 " idx %d value %"PRIx64"\n",
3064 __func__, mmio, len, addr, idx, value);
3065 #endif
3067 section = &phys_sections[mmio->sub_section[idx]];
3068 addr += mmio->base;
3069 addr -= section->offset_within_address_space;
3070 addr += section->offset_within_region;
3071 io_mem_write(section->mr, addr, value, len);
3074 static const MemoryRegionOps subpage_ops = {
3075 .read = subpage_read,
3076 .write = subpage_write,
3077 .endianness = DEVICE_NATIVE_ENDIAN,
3080 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3081 unsigned size)
3083 ram_addr_t raddr = addr;
3084 void *ptr = qemu_get_ram_ptr(raddr);
3085 switch (size) {
3086 case 1: return ldub_p(ptr);
3087 case 2: return lduw_p(ptr);
3088 case 4: return ldl_p(ptr);
3089 default: abort();
3093 static void subpage_ram_write(void *opaque, hwaddr addr,
3094 uint64_t value, unsigned size)
3096 ram_addr_t raddr = addr;
3097 void *ptr = qemu_get_ram_ptr(raddr);
3098 switch (size) {
3099 case 1: return stb_p(ptr, value);
3100 case 2: return stw_p(ptr, value);
3101 case 4: return stl_p(ptr, value);
3102 default: abort();
3106 static const MemoryRegionOps subpage_ram_ops = {
3107 .read = subpage_ram_read,
3108 .write = subpage_ram_write,
3109 .endianness = DEVICE_NATIVE_ENDIAN,
3112 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3113 uint16_t section)
3115 int idx, eidx;
3117 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3118 return -1;
3119 idx = SUBPAGE_IDX(start);
3120 eidx = SUBPAGE_IDX(end);
3121 #if defined(DEBUG_SUBPAGE)
3122 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3123 mmio, start, end, idx, eidx, memory);
3124 #endif
3125 if (memory_region_is_ram(phys_sections[section].mr)) {
3126 MemoryRegionSection new_section = phys_sections[section];
3127 new_section.mr = &io_mem_subpage_ram;
3128 section = phys_section_add(&new_section);
3130 for (; idx <= eidx; idx++) {
3131 mmio->sub_section[idx] = section;
3134 return 0;
3137 static subpage_t *subpage_init(hwaddr base)
3139 subpage_t *mmio;
3141 mmio = g_malloc0(sizeof(subpage_t));
3143 mmio->base = base;
3144 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3145 "subpage", TARGET_PAGE_SIZE);
3146 mmio->iomem.subpage = true;
3147 #if defined(DEBUG_SUBPAGE)
3148 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3149 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3150 #endif
3151 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3153 return mmio;
3156 static uint16_t dummy_section(MemoryRegion *mr)
3158 MemoryRegionSection section = {
3159 .mr = mr,
3160 .offset_within_address_space = 0,
3161 .offset_within_region = 0,
3162 .size = UINT64_MAX,
3165 return phys_section_add(&section);
3168 MemoryRegion *iotlb_to_region(hwaddr index)
3170 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3173 static void io_mem_init(void)
3175 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3176 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3177 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3178 "unassigned", UINT64_MAX);
3179 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3180 "notdirty", UINT64_MAX);
3181 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3182 "subpage-ram", UINT64_MAX);
3183 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3184 "watch", UINT64_MAX);
3187 static void mem_begin(MemoryListener *listener)
3189 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3191 destroy_all_mappings(d);
3192 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3195 static void core_begin(MemoryListener *listener)
3197 phys_sections_clear();
3198 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3199 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3200 phys_section_rom = dummy_section(&io_mem_rom);
3201 phys_section_watch = dummy_section(&io_mem_watch);
3204 static void tcg_commit(MemoryListener *listener)
3206 CPUArchState *env;
3208 /* since each CPU stores ram addresses in its TLB cache, we must
3209 reset the modified entries */
3210 /* XXX: slow ! */
3211 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3212 tlb_flush(env, 1);
3216 static void core_log_global_start(MemoryListener *listener)
3218 cpu_physical_memory_set_dirty_tracking(1);
3221 static void core_log_global_stop(MemoryListener *listener)
3223 cpu_physical_memory_set_dirty_tracking(0);
3226 static void io_region_add(MemoryListener *listener,
3227 MemoryRegionSection *section)
3229 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3231 mrio->mr = section->mr;
3232 mrio->offset = section->offset_within_region;
3233 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3234 section->offset_within_address_space, section->size);
3235 ioport_register(&mrio->iorange);
3238 static void io_region_del(MemoryListener *listener,
3239 MemoryRegionSection *section)
3241 isa_unassign_ioport(section->offset_within_address_space, section->size);
3244 static MemoryListener core_memory_listener = {
3245 .begin = core_begin,
3246 .log_global_start = core_log_global_start,
3247 .log_global_stop = core_log_global_stop,
3248 .priority = 1,
3251 static MemoryListener io_memory_listener = {
3252 .region_add = io_region_add,
3253 .region_del = io_region_del,
3254 .priority = 0,
3257 static MemoryListener tcg_memory_listener = {
3258 .commit = tcg_commit,
3261 void address_space_init_dispatch(AddressSpace *as)
3263 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3265 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3266 d->listener = (MemoryListener) {
3267 .begin = mem_begin,
3268 .region_add = mem_add,
3269 .region_nop = mem_add,
3270 .priority = 0,
3272 as->dispatch = d;
3273 memory_listener_register(&d->listener, as);
3276 void address_space_destroy_dispatch(AddressSpace *as)
3278 AddressSpaceDispatch *d = as->dispatch;
3280 memory_listener_unregister(&d->listener);
3281 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3282 g_free(d);
3283 as->dispatch = NULL;
3286 static void memory_map_init(void)
3288 system_memory = g_malloc(sizeof(*system_memory));
3289 memory_region_init(system_memory, "system", INT64_MAX);
3290 address_space_init(&address_space_memory, system_memory);
3291 address_space_memory.name = "memory";
3293 system_io = g_malloc(sizeof(*system_io));
3294 memory_region_init(system_io, "io", 65536);
3295 address_space_init(&address_space_io, system_io);
3296 address_space_io.name = "I/O";
3298 memory_listener_register(&core_memory_listener, &address_space_memory);
3299 memory_listener_register(&io_memory_listener, &address_space_io);
3300 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3302 dma_context_init(&dma_context_memory, &address_space_memory,
3303 NULL, NULL, NULL);
3306 MemoryRegion *get_system_memory(void)
3308 return system_memory;
3311 MemoryRegion *get_system_io(void)
3313 return system_io;
3316 #endif /* !defined(CONFIG_USER_ONLY) */
3318 /* physical memory access (slow version, mainly for debug) */
3319 #if defined(CONFIG_USER_ONLY)
3320 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3321 uint8_t *buf, int len, int is_write)
3323 int l, flags;
3324 target_ulong page;
3325 void * p;
3327 while (len > 0) {
3328 page = addr & TARGET_PAGE_MASK;
3329 l = (page + TARGET_PAGE_SIZE) - addr;
3330 if (l > len)
3331 l = len;
3332 flags = page_get_flags(page);
3333 if (!(flags & PAGE_VALID))
3334 return -1;
3335 if (is_write) {
3336 if (!(flags & PAGE_WRITE))
3337 return -1;
3338 /* XXX: this code should not depend on lock_user */
3339 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3340 return -1;
3341 memcpy(p, buf, l);
3342 unlock_user(p, addr, l);
3343 } else {
3344 if (!(flags & PAGE_READ))
3345 return -1;
3346 /* XXX: this code should not depend on lock_user */
3347 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3348 return -1;
3349 memcpy(buf, p, l);
3350 unlock_user(p, addr, 0);
3352 len -= l;
3353 buf += l;
3354 addr += l;
3356 return 0;
3359 #else
3361 static void invalidate_and_set_dirty(hwaddr addr,
3362 hwaddr length)
3364 if (!cpu_physical_memory_is_dirty(addr)) {
3365 /* invalidate code */
3366 tb_invalidate_phys_page_range(addr, addr + length, 0);
3367 /* set dirty bit */
3368 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3370 xen_modified_memory(addr, length);
3373 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3374 int len, bool is_write)
3376 AddressSpaceDispatch *d = as->dispatch;
3377 int l;
3378 uint8_t *ptr;
3379 uint32_t val;
3380 hwaddr page;
3381 MemoryRegionSection *section;
3383 while (len > 0) {
3384 page = addr & TARGET_PAGE_MASK;
3385 l = (page + TARGET_PAGE_SIZE) - addr;
3386 if (l > len)
3387 l = len;
3388 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3390 if (is_write) {
3391 if (!memory_region_is_ram(section->mr)) {
3392 hwaddr addr1;
3393 addr1 = memory_region_section_addr(section, addr);
3394 /* XXX: could force cpu_single_env to NULL to avoid
3395 potential bugs */
3396 if (l >= 4 && ((addr1 & 3) == 0)) {
3397 /* 32 bit write access */
3398 val = ldl_p(buf);
3399 io_mem_write(section->mr, addr1, val, 4);
3400 l = 4;
3401 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3402 /* 16 bit write access */
3403 val = lduw_p(buf);
3404 io_mem_write(section->mr, addr1, val, 2);
3405 l = 2;
3406 } else {
3407 /* 8 bit write access */
3408 val = ldub_p(buf);
3409 io_mem_write(section->mr, addr1, val, 1);
3410 l = 1;
3412 } else if (!section->readonly) {
3413 ram_addr_t addr1;
3414 addr1 = memory_region_get_ram_addr(section->mr)
3415 + memory_region_section_addr(section, addr);
3416 /* RAM case */
3417 ptr = qemu_get_ram_ptr(addr1);
3418 memcpy(ptr, buf, l);
3419 invalidate_and_set_dirty(addr1, l);
3420 qemu_put_ram_ptr(ptr);
3422 } else {
3423 if (!(memory_region_is_ram(section->mr) ||
3424 memory_region_is_romd(section->mr))) {
3425 hwaddr addr1;
3426 /* I/O case */
3427 addr1 = memory_region_section_addr(section, addr);
3428 if (l >= 4 && ((addr1 & 3) == 0)) {
3429 /* 32 bit read access */
3430 val = io_mem_read(section->mr, addr1, 4);
3431 stl_p(buf, val);
3432 l = 4;
3433 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3434 /* 16 bit read access */
3435 val = io_mem_read(section->mr, addr1, 2);
3436 stw_p(buf, val);
3437 l = 2;
3438 } else {
3439 /* 8 bit read access */
3440 val = io_mem_read(section->mr, addr1, 1);
3441 stb_p(buf, val);
3442 l = 1;
3444 } else {
3445 /* RAM case */
3446 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3447 + memory_region_section_addr(section,
3448 addr));
3449 memcpy(buf, ptr, l);
3450 qemu_put_ram_ptr(ptr);
3453 len -= l;
3454 buf += l;
3455 addr += l;
3459 void address_space_write(AddressSpace *as, hwaddr addr,
3460 const uint8_t *buf, int len)
3462 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3466 * address_space_read: read from an address space.
3468 * @as: #AddressSpace to be accessed
3469 * @addr: address within that address space
3470 * @buf: buffer with the data transferred
3472 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3474 address_space_rw(as, addr, buf, len, false);
3478 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3479 int len, int is_write)
3481 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3484 /* used for ROM loading : can write in RAM and ROM */
3485 void cpu_physical_memory_write_rom(hwaddr addr,
3486 const uint8_t *buf, int len)
3488 AddressSpaceDispatch *d = address_space_memory.dispatch;
3489 int l;
3490 uint8_t *ptr;
3491 hwaddr page;
3492 MemoryRegionSection *section;
3494 while (len > 0) {
3495 page = addr & TARGET_PAGE_MASK;
3496 l = (page + TARGET_PAGE_SIZE) - addr;
3497 if (l > len)
3498 l = len;
3499 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3501 if (!(memory_region_is_ram(section->mr) ||
3502 memory_region_is_romd(section->mr))) {
3503 /* do nothing */
3504 } else {
3505 unsigned long addr1;
3506 addr1 = memory_region_get_ram_addr(section->mr)
3507 + memory_region_section_addr(section, addr);
3508 /* ROM/RAM case */
3509 ptr = qemu_get_ram_ptr(addr1);
3510 memcpy(ptr, buf, l);
3511 invalidate_and_set_dirty(addr1, l);
3512 qemu_put_ram_ptr(ptr);
3514 len -= l;
3515 buf += l;
3516 addr += l;
3520 typedef struct {
3521 void *buffer;
3522 hwaddr addr;
3523 hwaddr len;
3524 } BounceBuffer;
3526 static BounceBuffer bounce;
3528 typedef struct MapClient {
3529 void *opaque;
3530 void (*callback)(void *opaque);
3531 QLIST_ENTRY(MapClient) link;
3532 } MapClient;
3534 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3535 = QLIST_HEAD_INITIALIZER(map_client_list);
3537 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3539 MapClient *client = g_malloc(sizeof(*client));
3541 client->opaque = opaque;
3542 client->callback = callback;
3543 QLIST_INSERT_HEAD(&map_client_list, client, link);
3544 return client;
3547 static void cpu_unregister_map_client(void *_client)
3549 MapClient *client = (MapClient *)_client;
3551 QLIST_REMOVE(client, link);
3552 g_free(client);
3555 static void cpu_notify_map_clients(void)
3557 MapClient *client;
3559 while (!QLIST_EMPTY(&map_client_list)) {
3560 client = QLIST_FIRST(&map_client_list);
3561 client->callback(client->opaque);
3562 cpu_unregister_map_client(client);
3566 /* Map a physical memory region into a host virtual address.
3567 * May map a subset of the requested range, given by and returned in *plen.
3568 * May return NULL if resources needed to perform the mapping are exhausted.
3569 * Use only for reads OR writes - not for read-modify-write operations.
3570 * Use cpu_register_map_client() to know when retrying the map operation is
3571 * likely to succeed.
3573 void *address_space_map(AddressSpace *as,
3574 hwaddr addr,
3575 hwaddr *plen,
3576 bool is_write)
3578 AddressSpaceDispatch *d = as->dispatch;
3579 hwaddr len = *plen;
3580 hwaddr todo = 0;
3581 int l;
3582 hwaddr page;
3583 MemoryRegionSection *section;
3584 ram_addr_t raddr = RAM_ADDR_MAX;
3585 ram_addr_t rlen;
3586 void *ret;
3588 while (len > 0) {
3589 page = addr & TARGET_PAGE_MASK;
3590 l = (page + TARGET_PAGE_SIZE) - addr;
3591 if (l > len)
3592 l = len;
3593 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3595 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3596 if (todo || bounce.buffer) {
3597 break;
3599 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3600 bounce.addr = addr;
3601 bounce.len = l;
3602 if (!is_write) {
3603 address_space_read(as, addr, bounce.buffer, l);
3606 *plen = l;
3607 return bounce.buffer;
3609 if (!todo) {
3610 raddr = memory_region_get_ram_addr(section->mr)
3611 + memory_region_section_addr(section, addr);
3614 len -= l;
3615 addr += l;
3616 todo += l;
3618 rlen = todo;
3619 ret = qemu_ram_ptr_length(raddr, &rlen);
3620 *plen = rlen;
3621 return ret;
3624 /* Unmaps a memory region previously mapped by address_space_map().
3625 * Will also mark the memory as dirty if is_write == 1. access_len gives
3626 * the amount of memory that was actually read or written by the caller.
3628 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3629 int is_write, hwaddr access_len)
3631 if (buffer != bounce.buffer) {
3632 if (is_write) {
3633 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3634 while (access_len) {
3635 unsigned l;
3636 l = TARGET_PAGE_SIZE;
3637 if (l > access_len)
3638 l = access_len;
3639 invalidate_and_set_dirty(addr1, l);
3640 addr1 += l;
3641 access_len -= l;
3644 if (xen_enabled()) {
3645 xen_invalidate_map_cache_entry(buffer);
3647 return;
3649 if (is_write) {
3650 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3652 qemu_vfree(bounce.buffer);
3653 bounce.buffer = NULL;
3654 cpu_notify_map_clients();
3657 void *cpu_physical_memory_map(hwaddr addr,
3658 hwaddr *plen,
3659 int is_write)
3661 return address_space_map(&address_space_memory, addr, plen, is_write);
3664 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3665 int is_write, hwaddr access_len)
3667 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3670 /* warning: addr must be aligned */
3671 static inline uint32_t ldl_phys_internal(hwaddr addr,
3672 enum device_endian endian)
3674 uint8_t *ptr;
3675 uint32_t val;
3676 MemoryRegionSection *section;
3678 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3680 if (!(memory_region_is_ram(section->mr) ||
3681 memory_region_is_romd(section->mr))) {
3682 /* I/O case */
3683 addr = memory_region_section_addr(section, addr);
3684 val = io_mem_read(section->mr, addr, 4);
3685 #if defined(TARGET_WORDS_BIGENDIAN)
3686 if (endian == DEVICE_LITTLE_ENDIAN) {
3687 val = bswap32(val);
3689 #else
3690 if (endian == DEVICE_BIG_ENDIAN) {
3691 val = bswap32(val);
3693 #endif
3694 } else {
3695 /* RAM case */
3696 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3697 & TARGET_PAGE_MASK)
3698 + memory_region_section_addr(section, addr));
3699 switch (endian) {
3700 case DEVICE_LITTLE_ENDIAN:
3701 val = ldl_le_p(ptr);
3702 break;
3703 case DEVICE_BIG_ENDIAN:
3704 val = ldl_be_p(ptr);
3705 break;
3706 default:
3707 val = ldl_p(ptr);
3708 break;
3711 return val;
3714 uint32_t ldl_phys(hwaddr addr)
3716 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3719 uint32_t ldl_le_phys(hwaddr addr)
3721 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3724 uint32_t ldl_be_phys(hwaddr addr)
3726 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3729 /* warning: addr must be aligned */
3730 static inline uint64_t ldq_phys_internal(hwaddr addr,
3731 enum device_endian endian)
3733 uint8_t *ptr;
3734 uint64_t val;
3735 MemoryRegionSection *section;
3737 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3739 if (!(memory_region_is_ram(section->mr) ||
3740 memory_region_is_romd(section->mr))) {
3741 /* I/O case */
3742 addr = memory_region_section_addr(section, addr);
3744 /* XXX This is broken when device endian != cpu endian.
3745 Fix and add "endian" variable check */
3746 #ifdef TARGET_WORDS_BIGENDIAN
3747 val = io_mem_read(section->mr, addr, 4) << 32;
3748 val |= io_mem_read(section->mr, addr + 4, 4);
3749 #else
3750 val = io_mem_read(section->mr, addr, 4);
3751 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3752 #endif
3753 } else {
3754 /* RAM case */
3755 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3756 & TARGET_PAGE_MASK)
3757 + memory_region_section_addr(section, addr));
3758 switch (endian) {
3759 case DEVICE_LITTLE_ENDIAN:
3760 val = ldq_le_p(ptr);
3761 break;
3762 case DEVICE_BIG_ENDIAN:
3763 val = ldq_be_p(ptr);
3764 break;
3765 default:
3766 val = ldq_p(ptr);
3767 break;
3770 return val;
3773 uint64_t ldq_phys(hwaddr addr)
3775 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3778 uint64_t ldq_le_phys(hwaddr addr)
3780 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3783 uint64_t ldq_be_phys(hwaddr addr)
3785 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3788 /* XXX: optimize */
3789 uint32_t ldub_phys(hwaddr addr)
3791 uint8_t val;
3792 cpu_physical_memory_read(addr, &val, 1);
3793 return val;
3796 /* warning: addr must be aligned */
3797 static inline uint32_t lduw_phys_internal(hwaddr addr,
3798 enum device_endian endian)
3800 uint8_t *ptr;
3801 uint64_t val;
3802 MemoryRegionSection *section;
3804 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3806 if (!(memory_region_is_ram(section->mr) ||
3807 memory_region_is_romd(section->mr))) {
3808 /* I/O case */
3809 addr = memory_region_section_addr(section, addr);
3810 val = io_mem_read(section->mr, addr, 2);
3811 #if defined(TARGET_WORDS_BIGENDIAN)
3812 if (endian == DEVICE_LITTLE_ENDIAN) {
3813 val = bswap16(val);
3815 #else
3816 if (endian == DEVICE_BIG_ENDIAN) {
3817 val = bswap16(val);
3819 #endif
3820 } else {
3821 /* RAM case */
3822 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3823 & TARGET_PAGE_MASK)
3824 + memory_region_section_addr(section, addr));
3825 switch (endian) {
3826 case DEVICE_LITTLE_ENDIAN:
3827 val = lduw_le_p(ptr);
3828 break;
3829 case DEVICE_BIG_ENDIAN:
3830 val = lduw_be_p(ptr);
3831 break;
3832 default:
3833 val = lduw_p(ptr);
3834 break;
3837 return val;
3840 uint32_t lduw_phys(hwaddr addr)
3842 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3845 uint32_t lduw_le_phys(hwaddr addr)
3847 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3850 uint32_t lduw_be_phys(hwaddr addr)
3852 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3855 /* warning: addr must be aligned. The ram page is not masked as dirty
3856 and the code inside is not invalidated. It is useful if the dirty
3857 bits are used to track modified PTEs */
3858 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3860 uint8_t *ptr;
3861 MemoryRegionSection *section;
3863 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3865 if (!memory_region_is_ram(section->mr) || section->readonly) {
3866 addr = memory_region_section_addr(section, addr);
3867 if (memory_region_is_ram(section->mr)) {
3868 section = &phys_sections[phys_section_rom];
3870 io_mem_write(section->mr, addr, val, 4);
3871 } else {
3872 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3873 & TARGET_PAGE_MASK)
3874 + memory_region_section_addr(section, addr);
3875 ptr = qemu_get_ram_ptr(addr1);
3876 stl_p(ptr, val);
3878 if (unlikely(in_migration)) {
3879 if (!cpu_physical_memory_is_dirty(addr1)) {
3880 /* invalidate code */
3881 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3882 /* set dirty bit */
3883 cpu_physical_memory_set_dirty_flags(
3884 addr1, (0xff & ~CODE_DIRTY_FLAG));
3890 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3892 uint8_t *ptr;
3893 MemoryRegionSection *section;
3895 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3897 if (!memory_region_is_ram(section->mr) || section->readonly) {
3898 addr = memory_region_section_addr(section, addr);
3899 if (memory_region_is_ram(section->mr)) {
3900 section = &phys_sections[phys_section_rom];
3902 #ifdef TARGET_WORDS_BIGENDIAN
3903 io_mem_write(section->mr, addr, val >> 32, 4);
3904 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3905 #else
3906 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3907 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3908 #endif
3909 } else {
3910 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3911 & TARGET_PAGE_MASK)
3912 + memory_region_section_addr(section, addr));
3913 stq_p(ptr, val);
3917 /* warning: addr must be aligned */
3918 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3919 enum device_endian endian)
3921 uint8_t *ptr;
3922 MemoryRegionSection *section;
3924 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3926 if (!memory_region_is_ram(section->mr) || section->readonly) {
3927 addr = memory_region_section_addr(section, addr);
3928 if (memory_region_is_ram(section->mr)) {
3929 section = &phys_sections[phys_section_rom];
3931 #if defined(TARGET_WORDS_BIGENDIAN)
3932 if (endian == DEVICE_LITTLE_ENDIAN) {
3933 val = bswap32(val);
3935 #else
3936 if (endian == DEVICE_BIG_ENDIAN) {
3937 val = bswap32(val);
3939 #endif
3940 io_mem_write(section->mr, addr, val, 4);
3941 } else {
3942 unsigned long addr1;
3943 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3944 + memory_region_section_addr(section, addr);
3945 /* RAM case */
3946 ptr = qemu_get_ram_ptr(addr1);
3947 switch (endian) {
3948 case DEVICE_LITTLE_ENDIAN:
3949 stl_le_p(ptr, val);
3950 break;
3951 case DEVICE_BIG_ENDIAN:
3952 stl_be_p(ptr, val);
3953 break;
3954 default:
3955 stl_p(ptr, val);
3956 break;
3958 invalidate_and_set_dirty(addr1, 4);
3962 void stl_phys(hwaddr addr, uint32_t val)
3964 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3967 void stl_le_phys(hwaddr addr, uint32_t val)
3969 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3972 void stl_be_phys(hwaddr addr, uint32_t val)
3974 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3977 /* XXX: optimize */
3978 void stb_phys(hwaddr addr, uint32_t val)
3980 uint8_t v = val;
3981 cpu_physical_memory_write(addr, &v, 1);
3984 /* warning: addr must be aligned */
3985 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3986 enum device_endian endian)
3988 uint8_t *ptr;
3989 MemoryRegionSection *section;
3991 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3993 if (!memory_region_is_ram(section->mr) || section->readonly) {
3994 addr = memory_region_section_addr(section, addr);
3995 if (memory_region_is_ram(section->mr)) {
3996 section = &phys_sections[phys_section_rom];
3998 #if defined(TARGET_WORDS_BIGENDIAN)
3999 if (endian == DEVICE_LITTLE_ENDIAN) {
4000 val = bswap16(val);
4002 #else
4003 if (endian == DEVICE_BIG_ENDIAN) {
4004 val = bswap16(val);
4006 #endif
4007 io_mem_write(section->mr, addr, val, 2);
4008 } else {
4009 unsigned long addr1;
4010 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4011 + memory_region_section_addr(section, addr);
4012 /* RAM case */
4013 ptr = qemu_get_ram_ptr(addr1);
4014 switch (endian) {
4015 case DEVICE_LITTLE_ENDIAN:
4016 stw_le_p(ptr, val);
4017 break;
4018 case DEVICE_BIG_ENDIAN:
4019 stw_be_p(ptr, val);
4020 break;
4021 default:
4022 stw_p(ptr, val);
4023 break;
4025 invalidate_and_set_dirty(addr1, 2);
4029 void stw_phys(hwaddr addr, uint32_t val)
4031 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4034 void stw_le_phys(hwaddr addr, uint32_t val)
4036 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4039 void stw_be_phys(hwaddr addr, uint32_t val)
4041 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4044 /* XXX: optimize */
4045 void stq_phys(hwaddr addr, uint64_t val)
4047 val = tswap64(val);
4048 cpu_physical_memory_write(addr, &val, 8);
4051 void stq_le_phys(hwaddr addr, uint64_t val)
4053 val = cpu_to_le64(val);
4054 cpu_physical_memory_write(addr, &val, 8);
4057 void stq_be_phys(hwaddr addr, uint64_t val)
4059 val = cpu_to_be64(val);
4060 cpu_physical_memory_write(addr, &val, 8);
4063 /* virtual memory access for debug (includes writing to ROM) */
4064 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4065 uint8_t *buf, int len, int is_write)
4067 int l;
4068 hwaddr phys_addr;
4069 target_ulong page;
4071 while (len > 0) {
4072 page = addr & TARGET_PAGE_MASK;
4073 phys_addr = cpu_get_phys_page_debug(env, page);
4074 /* if no physical page mapped, return an error */
4075 if (phys_addr == -1)
4076 return -1;
4077 l = (page + TARGET_PAGE_SIZE) - addr;
4078 if (l > len)
4079 l = len;
4080 phys_addr += (addr & ~TARGET_PAGE_MASK);
4081 if (is_write)
4082 cpu_physical_memory_write_rom(phys_addr, buf, l);
4083 else
4084 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4085 len -= l;
4086 buf += l;
4087 addr += l;
4089 return 0;
4091 #endif
4093 /* in deterministic execution mode, instructions doing device I/Os
4094 must be at the end of the TB */
4095 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4097 TranslationBlock *tb;
4098 uint32_t n, cflags;
4099 target_ulong pc, cs_base;
4100 uint64_t flags;
4102 tb = tb_find_pc(retaddr);
4103 if (!tb) {
4104 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4105 (void *)retaddr);
4107 n = env->icount_decr.u16.low + tb->icount;
4108 cpu_restore_state(tb, env, retaddr);
4109 /* Calculate how many instructions had been executed before the fault
4110 occurred. */
4111 n = n - env->icount_decr.u16.low;
4112 /* Generate a new TB ending on the I/O insn. */
4113 n++;
4114 /* On MIPS and SH, delay slot instructions can only be restarted if
4115 they were already the first instruction in the TB. If this is not
4116 the first instruction in a TB then re-execute the preceding
4117 branch. */
4118 #if defined(TARGET_MIPS)
4119 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4120 env->active_tc.PC -= 4;
4121 env->icount_decr.u16.low++;
4122 env->hflags &= ~MIPS_HFLAG_BMASK;
4124 #elif defined(TARGET_SH4)
4125 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4126 && n > 1) {
4127 env->pc -= 2;
4128 env->icount_decr.u16.low++;
4129 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4131 #endif
4132 /* This should never happen. */
4133 if (n > CF_COUNT_MASK)
4134 cpu_abort(env, "TB too big during recompile");
4136 cflags = n | CF_LAST_IO;
4137 pc = tb->pc;
4138 cs_base = tb->cs_base;
4139 flags = tb->flags;
4140 tb_phys_invalidate(tb, -1);
4141 /* FIXME: In theory this could raise an exception. In practice
4142 we have already translated the block once so it's probably ok. */
4143 tb_gen_code(env, pc, cs_base, flags, cflags);
4144 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4145 the first in the TB) then we end up generating a whole new TB and
4146 repeating the fault, which is horribly inefficient.
4147 Better would be to execute just this insn uncached, or generate a
4148 second new TB. */
4149 cpu_resume_from_signal(env, NULL);
4152 #if !defined(CONFIG_USER_ONLY)
4154 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4156 int i, target_code_size, max_target_code_size;
4157 int direct_jmp_count, direct_jmp2_count, cross_page;
4158 TranslationBlock *tb;
4160 target_code_size = 0;
4161 max_target_code_size = 0;
4162 cross_page = 0;
4163 direct_jmp_count = 0;
4164 direct_jmp2_count = 0;
4165 for(i = 0; i < nb_tbs; i++) {
4166 tb = &tbs[i];
4167 target_code_size += tb->size;
4168 if (tb->size > max_target_code_size)
4169 max_target_code_size = tb->size;
4170 if (tb->page_addr[1] != -1)
4171 cross_page++;
4172 if (tb->tb_next_offset[0] != 0xffff) {
4173 direct_jmp_count++;
4174 if (tb->tb_next_offset[1] != 0xffff) {
4175 direct_jmp2_count++;
4179 /* XXX: avoid using doubles ? */
4180 cpu_fprintf(f, "Translation buffer state:\n");
4181 cpu_fprintf(f, "gen code size %td/%zd\n",
4182 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4183 cpu_fprintf(f, "TB count %d/%d\n",
4184 nb_tbs, code_gen_max_blocks);
4185 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4186 nb_tbs ? target_code_size / nb_tbs : 0,
4187 max_target_code_size);
4188 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4189 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4190 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4191 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4192 cross_page,
4193 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4194 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4195 direct_jmp_count,
4196 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4197 direct_jmp2_count,
4198 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4199 cpu_fprintf(f, "\nStatistics:\n");
4200 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4201 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4202 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4203 tcg_dump_info(f, cpu_fprintf);
4207 * A helper function for the _utterly broken_ virtio device model to find out if
4208 * it's running on a big endian machine. Don't do this at home kids!
4210 bool virtio_is_big_endian(void);
4211 bool virtio_is_big_endian(void)
4213 #if defined(TARGET_WORDS_BIGENDIAN)
4214 return true;
4215 #else
4216 return false;
4217 #endif
4220 #endif
4222 #ifndef CONFIG_USER_ONLY
4223 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4225 MemoryRegionSection *section;
4227 section = phys_page_find(address_space_memory.dispatch,
4228 phys_addr >> TARGET_PAGE_BITS);
4230 return !(memory_region_is_ram(section->mr) ||
4231 memory_region_is_romd(section->mr));
4233 #endif