memory: per-AddressSpace dispatch
[qemu/ar7.git] / exec.c
blobbfc4acc206318eeb296e4f3716f37b4c37bf42d2
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #include "cputlb.h"
62 #include "memory-internal.h"
64 //#define DEBUG_TB_INVALIDATE
65 //#define DEBUG_FLUSH
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
79 #define SMC_BITMAP_USE_THRESHOLD 10
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88 #if defined(__arm__) || defined(__sparc__)
89 /* The prologue must be reachable with a direct jump. ARM and Sparc64
90 have limited branch ranges (possibly also PPC) so place it in a
91 section close to code segment. */
92 #define code_gen_section \
93 __attribute__((__section__(".gen_code"))) \
94 __attribute__((aligned (32)))
95 #elif defined(_WIN32) && !defined(_WIN64)
96 #define code_gen_section \
97 __attribute__((aligned (16)))
98 #else
99 #define code_gen_section \
100 __attribute__((aligned (32)))
101 #endif
103 uint8_t code_gen_prologue[1024] code_gen_section;
104 static uint8_t *code_gen_buffer;
105 static unsigned long code_gen_buffer_size;
106 /* threshold to flush the translated code buffer */
107 static unsigned long code_gen_buffer_max_size;
108 static uint8_t *code_gen_ptr;
110 #if !defined(CONFIG_USER_ONLY)
111 int phys_ram_fd;
112 static int in_migration;
114 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
116 static MemoryRegion *system_memory;
117 static MemoryRegion *system_io;
119 AddressSpace address_space_io;
120 AddressSpace address_space_memory;
122 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
123 static MemoryRegion io_mem_subpage_ram;
125 #endif
127 CPUArchState *first_cpu;
128 /* current CPU in the current thread. It is only valid inside
129 cpu_exec() */
130 DEFINE_TLS(CPUArchState *,cpu_single_env);
131 /* 0 = Do not count executed instructions.
132 1 = Precise instruction counting.
133 2 = Adaptive rate instruction counting. */
134 int use_icount = 0;
136 typedef struct PageDesc {
137 /* list of TBs intersecting this ram page */
138 TranslationBlock *first_tb;
139 /* in order to optimize self modifying code, we count the number
140 of lookups we do to a given page to use a bitmap */
141 unsigned int code_write_count;
142 uint8_t *code_bitmap;
143 #if defined(CONFIG_USER_ONLY)
144 unsigned long flags;
145 #endif
146 } PageDesc;
148 /* In system mode we want L1_MAP to be based on ram offsets,
149 while in user mode we want it to be based on virtual addresses. */
150 #if !defined(CONFIG_USER_ONLY)
151 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
152 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
153 #else
154 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
155 #endif
156 #else
157 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
158 #endif
160 /* Size of the L2 (and L3, etc) page tables. */
161 #define L2_BITS 10
162 #define L2_SIZE (1 << L2_BITS)
164 #define P_L2_LEVELS \
165 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
167 /* The bits remaining after N lower levels of page tables. */
168 #define V_L1_BITS_REM \
169 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
171 #if V_L1_BITS_REM < 4
172 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
173 #else
174 #define V_L1_BITS V_L1_BITS_REM
175 #endif
177 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
179 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
181 uintptr_t qemu_real_host_page_size;
182 uintptr_t qemu_host_page_size;
183 uintptr_t qemu_host_page_mask;
185 /* This is a multi-level map on the virtual address space.
186 The bottom level has pointers to PageDesc. */
187 static void *l1_map[V_L1_SIZE];
189 #if !defined(CONFIG_USER_ONLY)
191 static MemoryRegionSection *phys_sections;
192 static unsigned phys_sections_nb, phys_sections_nb_alloc;
193 static uint16_t phys_section_unassigned;
194 static uint16_t phys_section_notdirty;
195 static uint16_t phys_section_rom;
196 static uint16_t phys_section_watch;
198 /* Simple allocator for PhysPageEntry nodes */
199 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
200 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
202 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
204 static void io_mem_init(void);
205 static void memory_map_init(void);
207 static MemoryRegion io_mem_watch;
208 #endif
210 /* statistics */
211 static int tb_flush_count;
212 static int tb_phys_invalidate_count;
214 #ifdef _WIN32
215 static void map_exec(void *addr, long size)
217 DWORD old_protect;
218 VirtualProtect(addr, size,
219 PAGE_EXECUTE_READWRITE, &old_protect);
222 #else
223 static void map_exec(void *addr, long size)
225 unsigned long start, end, page_size;
227 page_size = getpagesize();
228 start = (unsigned long)addr;
229 start &= ~(page_size - 1);
231 end = (unsigned long)addr + size;
232 end += page_size - 1;
233 end &= ~(page_size - 1);
235 mprotect((void *)start, end - start,
236 PROT_READ | PROT_WRITE | PROT_EXEC);
238 #endif
240 static void page_init(void)
242 /* NOTE: we can always suppose that qemu_host_page_size >=
243 TARGET_PAGE_SIZE */
244 #ifdef _WIN32
246 SYSTEM_INFO system_info;
248 GetSystemInfo(&system_info);
249 qemu_real_host_page_size = system_info.dwPageSize;
251 #else
252 qemu_real_host_page_size = getpagesize();
253 #endif
254 if (qemu_host_page_size == 0)
255 qemu_host_page_size = qemu_real_host_page_size;
256 if (qemu_host_page_size < TARGET_PAGE_SIZE)
257 qemu_host_page_size = TARGET_PAGE_SIZE;
258 qemu_host_page_mask = ~(qemu_host_page_size - 1);
260 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
262 #ifdef HAVE_KINFO_GETVMMAP
263 struct kinfo_vmentry *freep;
264 int i, cnt;
266 freep = kinfo_getvmmap(getpid(), &cnt);
267 if (freep) {
268 mmap_lock();
269 for (i = 0; i < cnt; i++) {
270 unsigned long startaddr, endaddr;
272 startaddr = freep[i].kve_start;
273 endaddr = freep[i].kve_end;
274 if (h2g_valid(startaddr)) {
275 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
277 if (h2g_valid(endaddr)) {
278 endaddr = h2g(endaddr);
279 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
280 } else {
281 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
282 endaddr = ~0ul;
283 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
284 #endif
288 free(freep);
289 mmap_unlock();
291 #else
292 FILE *f;
294 last_brk = (unsigned long)sbrk(0);
296 f = fopen("/compat/linux/proc/self/maps", "r");
297 if (f) {
298 mmap_lock();
300 do {
301 unsigned long startaddr, endaddr;
302 int n;
304 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
306 if (n == 2 && h2g_valid(startaddr)) {
307 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
309 if (h2g_valid(endaddr)) {
310 endaddr = h2g(endaddr);
311 } else {
312 endaddr = ~0ul;
314 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
316 } while (!feof(f));
318 fclose(f);
319 mmap_unlock();
321 #endif
323 #endif
326 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
328 PageDesc *pd;
329 void **lp;
330 int i;
332 #if defined(CONFIG_USER_ONLY)
333 /* We can't use g_malloc because it may recurse into a locked mutex. */
334 # define ALLOC(P, SIZE) \
335 do { \
336 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
337 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
338 } while (0)
339 #else
340 # define ALLOC(P, SIZE) \
341 do { P = g_malloc0(SIZE); } while (0)
342 #endif
344 /* Level 1. Always allocated. */
345 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
347 /* Level 2..N-1. */
348 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
349 void **p = *lp;
351 if (p == NULL) {
352 if (!alloc) {
353 return NULL;
355 ALLOC(p, sizeof(void *) * L2_SIZE);
356 *lp = p;
359 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
362 pd = *lp;
363 if (pd == NULL) {
364 if (!alloc) {
365 return NULL;
367 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
368 *lp = pd;
371 #undef ALLOC
373 return pd + (index & (L2_SIZE - 1));
376 static inline PageDesc *page_find(tb_page_addr_t index)
378 return page_find_alloc(index, 0);
381 #if !defined(CONFIG_USER_ONLY)
383 static void phys_map_node_reserve(unsigned nodes)
385 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
386 typedef PhysPageEntry Node[L2_SIZE];
387 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
388 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
389 phys_map_nodes_nb + nodes);
390 phys_map_nodes = g_renew(Node, phys_map_nodes,
391 phys_map_nodes_nb_alloc);
395 static uint16_t phys_map_node_alloc(void)
397 unsigned i;
398 uint16_t ret;
400 ret = phys_map_nodes_nb++;
401 assert(ret != PHYS_MAP_NODE_NIL);
402 assert(ret != phys_map_nodes_nb_alloc);
403 for (i = 0; i < L2_SIZE; ++i) {
404 phys_map_nodes[ret][i].is_leaf = 0;
405 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
407 return ret;
410 static void phys_map_nodes_reset(void)
412 phys_map_nodes_nb = 0;
416 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
417 target_phys_addr_t *nb, uint16_t leaf,
418 int level)
420 PhysPageEntry *p;
421 int i;
422 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
424 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
425 lp->ptr = phys_map_node_alloc();
426 p = phys_map_nodes[lp->ptr];
427 if (level == 0) {
428 for (i = 0; i < L2_SIZE; i++) {
429 p[i].is_leaf = 1;
430 p[i].ptr = phys_section_unassigned;
433 } else {
434 p = phys_map_nodes[lp->ptr];
436 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
438 while (*nb && lp < &p[L2_SIZE]) {
439 if ((*index & (step - 1)) == 0 && *nb >= step) {
440 lp->is_leaf = true;
441 lp->ptr = leaf;
442 *index += step;
443 *nb -= step;
444 } else {
445 phys_page_set_level(lp, index, nb, leaf, level - 1);
447 ++lp;
451 static void phys_page_set(AddressSpaceDispatch *d,
452 target_phys_addr_t index, target_phys_addr_t nb,
453 uint16_t leaf)
455 /* Wildly overreserve - it doesn't matter much. */
456 phys_map_node_reserve(3 * P_L2_LEVELS);
458 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
461 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, target_phys_addr_t index)
463 PhysPageEntry lp = d->phys_map;
464 PhysPageEntry *p;
465 int i;
466 uint16_t s_index = phys_section_unassigned;
468 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
469 if (lp.ptr == PHYS_MAP_NODE_NIL) {
470 goto not_found;
472 p = phys_map_nodes[lp.ptr];
473 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
476 s_index = lp.ptr;
477 not_found:
478 return &phys_sections[s_index];
481 bool memory_region_is_unassigned(MemoryRegion *mr)
483 return mr != &io_mem_ram && mr != &io_mem_rom
484 && mr != &io_mem_notdirty && !mr->rom_device
485 && mr != &io_mem_watch;
488 #define mmap_lock() do { } while(0)
489 #define mmap_unlock() do { } while(0)
490 #endif
492 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
494 #if defined(CONFIG_USER_ONLY)
495 /* Currently it is not recommended to allocate big chunks of data in
496 user mode. It will change when a dedicated libc will be used */
497 #define USE_STATIC_CODE_GEN_BUFFER
498 #endif
500 #ifdef USE_STATIC_CODE_GEN_BUFFER
501 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
502 __attribute__((aligned (CODE_GEN_ALIGN)));
503 #endif
505 static void code_gen_alloc(unsigned long tb_size)
507 #ifdef USE_STATIC_CODE_GEN_BUFFER
508 code_gen_buffer = static_code_gen_buffer;
509 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
510 map_exec(code_gen_buffer, code_gen_buffer_size);
511 #else
512 code_gen_buffer_size = tb_size;
513 if (code_gen_buffer_size == 0) {
514 #if defined(CONFIG_USER_ONLY)
515 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
516 #else
517 /* XXX: needs adjustments */
518 code_gen_buffer_size = (unsigned long)(ram_size / 4);
519 #endif
521 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
522 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
523 /* The code gen buffer location may have constraints depending on
524 the host cpu and OS */
525 #if defined(__linux__)
527 int flags;
528 void *start = NULL;
530 flags = MAP_PRIVATE | MAP_ANONYMOUS;
531 #if defined(__x86_64__)
532 flags |= MAP_32BIT;
533 /* Cannot map more than that */
534 if (code_gen_buffer_size > (800 * 1024 * 1024))
535 code_gen_buffer_size = (800 * 1024 * 1024);
536 #elif defined(__sparc__) && HOST_LONG_BITS == 64
537 // Map the buffer below 2G, so we can use direct calls and branches
538 start = (void *) 0x40000000UL;
539 if (code_gen_buffer_size > (512 * 1024 * 1024))
540 code_gen_buffer_size = (512 * 1024 * 1024);
541 #elif defined(__arm__)
542 /* Keep the buffer no bigger than 16MB to branch between blocks */
543 if (code_gen_buffer_size > 16 * 1024 * 1024)
544 code_gen_buffer_size = 16 * 1024 * 1024;
545 #elif defined(__s390x__)
546 /* Map the buffer so that we can use direct calls and branches. */
547 /* We have a +- 4GB range on the branches; leave some slop. */
548 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
549 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
551 start = (void *)0x90000000UL;
552 #endif
553 code_gen_buffer = mmap(start, code_gen_buffer_size,
554 PROT_WRITE | PROT_READ | PROT_EXEC,
555 flags, -1, 0);
556 if (code_gen_buffer == MAP_FAILED) {
557 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
558 exit(1);
561 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
562 || defined(__DragonFly__) || defined(__OpenBSD__) \
563 || defined(__NetBSD__)
565 int flags;
566 void *addr = NULL;
567 flags = MAP_PRIVATE | MAP_ANONYMOUS;
568 #if defined(__x86_64__)
569 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
570 * 0x40000000 is free */
571 flags |= MAP_FIXED;
572 addr = (void *)0x40000000;
573 /* Cannot map more than that */
574 if (code_gen_buffer_size > (800 * 1024 * 1024))
575 code_gen_buffer_size = (800 * 1024 * 1024);
576 #elif defined(__sparc__) && HOST_LONG_BITS == 64
577 // Map the buffer below 2G, so we can use direct calls and branches
578 addr = (void *) 0x40000000UL;
579 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
580 code_gen_buffer_size = (512 * 1024 * 1024);
582 #endif
583 code_gen_buffer = mmap(addr, code_gen_buffer_size,
584 PROT_WRITE | PROT_READ | PROT_EXEC,
585 flags, -1, 0);
586 if (code_gen_buffer == MAP_FAILED) {
587 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
588 exit(1);
591 #else
592 code_gen_buffer = g_malloc(code_gen_buffer_size);
593 map_exec(code_gen_buffer, code_gen_buffer_size);
594 #endif
595 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
596 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
597 code_gen_buffer_max_size = code_gen_buffer_size -
598 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
599 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
600 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
603 /* Must be called before using the QEMU cpus. 'tb_size' is the size
604 (in bytes) allocated to the translation buffer. Zero means default
605 size. */
606 void tcg_exec_init(unsigned long tb_size)
608 cpu_gen_init();
609 code_gen_alloc(tb_size);
610 code_gen_ptr = code_gen_buffer;
611 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
612 page_init();
613 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
614 /* There's no guest base to take into account, so go ahead and
615 initialize the prologue now. */
616 tcg_prologue_init(&tcg_ctx);
617 #endif
620 bool tcg_enabled(void)
622 return code_gen_buffer != NULL;
625 void cpu_exec_init_all(void)
627 #if !defined(CONFIG_USER_ONLY)
628 memory_map_init();
629 io_mem_init();
630 #endif
633 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
635 static int cpu_common_post_load(void *opaque, int version_id)
637 CPUArchState *env = opaque;
639 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
640 version_id is increased. */
641 env->interrupt_request &= ~0x01;
642 tlb_flush(env, 1);
644 return 0;
647 static const VMStateDescription vmstate_cpu_common = {
648 .name = "cpu_common",
649 .version_id = 1,
650 .minimum_version_id = 1,
651 .minimum_version_id_old = 1,
652 .post_load = cpu_common_post_load,
653 .fields = (VMStateField []) {
654 VMSTATE_UINT32(halted, CPUArchState),
655 VMSTATE_UINT32(interrupt_request, CPUArchState),
656 VMSTATE_END_OF_LIST()
659 #endif
661 CPUArchState *qemu_get_cpu(int cpu)
663 CPUArchState *env = first_cpu;
665 while (env) {
666 if (env->cpu_index == cpu)
667 break;
668 env = env->next_cpu;
671 return env;
674 void cpu_exec_init(CPUArchState *env)
676 CPUArchState **penv;
677 int cpu_index;
679 #if defined(CONFIG_USER_ONLY)
680 cpu_list_lock();
681 #endif
682 env->next_cpu = NULL;
683 penv = &first_cpu;
684 cpu_index = 0;
685 while (*penv != NULL) {
686 penv = &(*penv)->next_cpu;
687 cpu_index++;
689 env->cpu_index = cpu_index;
690 env->numa_node = 0;
691 QTAILQ_INIT(&env->breakpoints);
692 QTAILQ_INIT(&env->watchpoints);
693 #ifndef CONFIG_USER_ONLY
694 env->thread_id = qemu_get_thread_id();
695 #endif
696 *penv = env;
697 #if defined(CONFIG_USER_ONLY)
698 cpu_list_unlock();
699 #endif
700 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
701 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
702 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
703 cpu_save, cpu_load, env);
704 #endif
707 /* Allocate a new translation block. Flush the translation buffer if
708 too many translation blocks or too much generated code. */
709 static TranslationBlock *tb_alloc(target_ulong pc)
711 TranslationBlock *tb;
713 if (nb_tbs >= code_gen_max_blocks ||
714 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
715 return NULL;
716 tb = &tbs[nb_tbs++];
717 tb->pc = pc;
718 tb->cflags = 0;
719 return tb;
722 void tb_free(TranslationBlock *tb)
724 /* In practice this is mostly used for single use temporary TB
725 Ignore the hard cases and just back up if this TB happens to
726 be the last one generated. */
727 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
728 code_gen_ptr = tb->tc_ptr;
729 nb_tbs--;
733 static inline void invalidate_page_bitmap(PageDesc *p)
735 if (p->code_bitmap) {
736 g_free(p->code_bitmap);
737 p->code_bitmap = NULL;
739 p->code_write_count = 0;
742 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
744 static void page_flush_tb_1 (int level, void **lp)
746 int i;
748 if (*lp == NULL) {
749 return;
751 if (level == 0) {
752 PageDesc *pd = *lp;
753 for (i = 0; i < L2_SIZE; ++i) {
754 pd[i].first_tb = NULL;
755 invalidate_page_bitmap(pd + i);
757 } else {
758 void **pp = *lp;
759 for (i = 0; i < L2_SIZE; ++i) {
760 page_flush_tb_1 (level - 1, pp + i);
765 static void page_flush_tb(void)
767 int i;
768 for (i = 0; i < V_L1_SIZE; i++) {
769 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
773 /* flush all the translation blocks */
774 /* XXX: tb_flush is currently not thread safe */
775 void tb_flush(CPUArchState *env1)
777 CPUArchState *env;
778 #if defined(DEBUG_FLUSH)
779 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
780 (unsigned long)(code_gen_ptr - code_gen_buffer),
781 nb_tbs, nb_tbs > 0 ?
782 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
783 #endif
784 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
785 cpu_abort(env1, "Internal error: code buffer overflow\n");
787 nb_tbs = 0;
789 for(env = first_cpu; env != NULL; env = env->next_cpu) {
790 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
793 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
794 page_flush_tb();
796 code_gen_ptr = code_gen_buffer;
797 /* XXX: flush processor icache at this point if cache flush is
798 expensive */
799 tb_flush_count++;
802 #ifdef DEBUG_TB_CHECK
804 static void tb_invalidate_check(target_ulong address)
806 TranslationBlock *tb;
807 int i;
808 address &= TARGET_PAGE_MASK;
809 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
810 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
811 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
812 address >= tb->pc + tb->size)) {
813 printf("ERROR invalidate: address=" TARGET_FMT_lx
814 " PC=%08lx size=%04x\n",
815 address, (long)tb->pc, tb->size);
821 /* verify that all the pages have correct rights for code */
822 static void tb_page_check(void)
824 TranslationBlock *tb;
825 int i, flags1, flags2;
827 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
828 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
829 flags1 = page_get_flags(tb->pc);
830 flags2 = page_get_flags(tb->pc + tb->size - 1);
831 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
832 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
833 (long)tb->pc, tb->size, flags1, flags2);
839 #endif
841 /* invalidate one TB */
842 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
843 int next_offset)
845 TranslationBlock *tb1;
846 for(;;) {
847 tb1 = *ptb;
848 if (tb1 == tb) {
849 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
850 break;
852 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
856 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
858 TranslationBlock *tb1;
859 unsigned int n1;
861 for(;;) {
862 tb1 = *ptb;
863 n1 = (uintptr_t)tb1 & 3;
864 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
865 if (tb1 == tb) {
866 *ptb = tb1->page_next[n1];
867 break;
869 ptb = &tb1->page_next[n1];
873 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
875 TranslationBlock *tb1, **ptb;
876 unsigned int n1;
878 ptb = &tb->jmp_next[n];
879 tb1 = *ptb;
880 if (tb1) {
881 /* find tb(n) in circular list */
882 for(;;) {
883 tb1 = *ptb;
884 n1 = (uintptr_t)tb1 & 3;
885 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
886 if (n1 == n && tb1 == tb)
887 break;
888 if (n1 == 2) {
889 ptb = &tb1->jmp_first;
890 } else {
891 ptb = &tb1->jmp_next[n1];
894 /* now we can suppress tb(n) from the list */
895 *ptb = tb->jmp_next[n];
897 tb->jmp_next[n] = NULL;
901 /* reset the jump entry 'n' of a TB so that it is not chained to
902 another TB */
903 static inline void tb_reset_jump(TranslationBlock *tb, int n)
905 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
908 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
910 CPUArchState *env;
911 PageDesc *p;
912 unsigned int h, n1;
913 tb_page_addr_t phys_pc;
914 TranslationBlock *tb1, *tb2;
916 /* remove the TB from the hash list */
917 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
918 h = tb_phys_hash_func(phys_pc);
919 tb_remove(&tb_phys_hash[h], tb,
920 offsetof(TranslationBlock, phys_hash_next));
922 /* remove the TB from the page list */
923 if (tb->page_addr[0] != page_addr) {
924 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
925 tb_page_remove(&p->first_tb, tb);
926 invalidate_page_bitmap(p);
928 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
929 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
930 tb_page_remove(&p->first_tb, tb);
931 invalidate_page_bitmap(p);
934 tb_invalidated_flag = 1;
936 /* remove the TB from the hash list */
937 h = tb_jmp_cache_hash_func(tb->pc);
938 for(env = first_cpu; env != NULL; env = env->next_cpu) {
939 if (env->tb_jmp_cache[h] == tb)
940 env->tb_jmp_cache[h] = NULL;
943 /* suppress this TB from the two jump lists */
944 tb_jmp_remove(tb, 0);
945 tb_jmp_remove(tb, 1);
947 /* suppress any remaining jumps to this TB */
948 tb1 = tb->jmp_first;
949 for(;;) {
950 n1 = (uintptr_t)tb1 & 3;
951 if (n1 == 2)
952 break;
953 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
954 tb2 = tb1->jmp_next[n1];
955 tb_reset_jump(tb1, n1);
956 tb1->jmp_next[n1] = NULL;
957 tb1 = tb2;
959 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
961 tb_phys_invalidate_count++;
964 static inline void set_bits(uint8_t *tab, int start, int len)
966 int end, mask, end1;
968 end = start + len;
969 tab += start >> 3;
970 mask = 0xff << (start & 7);
971 if ((start & ~7) == (end & ~7)) {
972 if (start < end) {
973 mask &= ~(0xff << (end & 7));
974 *tab |= mask;
976 } else {
977 *tab++ |= mask;
978 start = (start + 8) & ~7;
979 end1 = end & ~7;
980 while (start < end1) {
981 *tab++ = 0xff;
982 start += 8;
984 if (start < end) {
985 mask = ~(0xff << (end & 7));
986 *tab |= mask;
991 static void build_page_bitmap(PageDesc *p)
993 int n, tb_start, tb_end;
994 TranslationBlock *tb;
996 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
998 tb = p->first_tb;
999 while (tb != NULL) {
1000 n = (uintptr_t)tb & 3;
1001 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1002 /* NOTE: this is subtle as a TB may span two physical pages */
1003 if (n == 0) {
1004 /* NOTE: tb_end may be after the end of the page, but
1005 it is not a problem */
1006 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1007 tb_end = tb_start + tb->size;
1008 if (tb_end > TARGET_PAGE_SIZE)
1009 tb_end = TARGET_PAGE_SIZE;
1010 } else {
1011 tb_start = 0;
1012 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1014 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1015 tb = tb->page_next[n];
1019 TranslationBlock *tb_gen_code(CPUArchState *env,
1020 target_ulong pc, target_ulong cs_base,
1021 int flags, int cflags)
1023 TranslationBlock *tb;
1024 uint8_t *tc_ptr;
1025 tb_page_addr_t phys_pc, phys_page2;
1026 target_ulong virt_page2;
1027 int code_gen_size;
1029 phys_pc = get_page_addr_code(env, pc);
1030 tb = tb_alloc(pc);
1031 if (!tb) {
1032 /* flush must be done */
1033 tb_flush(env);
1034 /* cannot fail at this point */
1035 tb = tb_alloc(pc);
1036 /* Don't forget to invalidate previous TB info. */
1037 tb_invalidated_flag = 1;
1039 tc_ptr = code_gen_ptr;
1040 tb->tc_ptr = tc_ptr;
1041 tb->cs_base = cs_base;
1042 tb->flags = flags;
1043 tb->cflags = cflags;
1044 cpu_gen_code(env, tb, &code_gen_size);
1045 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1046 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1048 /* check next page if needed */
1049 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1050 phys_page2 = -1;
1051 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1052 phys_page2 = get_page_addr_code(env, virt_page2);
1054 tb_link_page(tb, phys_pc, phys_page2);
1055 return tb;
1059 * Invalidate all TBs which intersect with the target physical address range
1060 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1061 * 'is_cpu_write_access' should be true if called from a real cpu write
1062 * access: the virtual CPU will exit the current TB if code is modified inside
1063 * this TB.
1065 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1066 int is_cpu_write_access)
1068 while (start < end) {
1069 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1070 start &= TARGET_PAGE_MASK;
1071 start += TARGET_PAGE_SIZE;
1076 * Invalidate all TBs which intersect with the target physical address range
1077 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1078 * 'is_cpu_write_access' should be true if called from a real cpu write
1079 * access: the virtual CPU will exit the current TB if code is modified inside
1080 * this TB.
1082 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1083 int is_cpu_write_access)
1085 TranslationBlock *tb, *tb_next, *saved_tb;
1086 CPUArchState *env = cpu_single_env;
1087 tb_page_addr_t tb_start, tb_end;
1088 PageDesc *p;
1089 int n;
1090 #ifdef TARGET_HAS_PRECISE_SMC
1091 int current_tb_not_found = is_cpu_write_access;
1092 TranslationBlock *current_tb = NULL;
1093 int current_tb_modified = 0;
1094 target_ulong current_pc = 0;
1095 target_ulong current_cs_base = 0;
1096 int current_flags = 0;
1097 #endif /* TARGET_HAS_PRECISE_SMC */
1099 p = page_find(start >> TARGET_PAGE_BITS);
1100 if (!p)
1101 return;
1102 if (!p->code_bitmap &&
1103 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1104 is_cpu_write_access) {
1105 /* build code bitmap */
1106 build_page_bitmap(p);
1109 /* we remove all the TBs in the range [start, end[ */
1110 /* XXX: see if in some cases it could be faster to invalidate all the code */
1111 tb = p->first_tb;
1112 while (tb != NULL) {
1113 n = (uintptr_t)tb & 3;
1114 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1115 tb_next = tb->page_next[n];
1116 /* NOTE: this is subtle as a TB may span two physical pages */
1117 if (n == 0) {
1118 /* NOTE: tb_end may be after the end of the page, but
1119 it is not a problem */
1120 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1121 tb_end = tb_start + tb->size;
1122 } else {
1123 tb_start = tb->page_addr[1];
1124 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1126 if (!(tb_end <= start || tb_start >= end)) {
1127 #ifdef TARGET_HAS_PRECISE_SMC
1128 if (current_tb_not_found) {
1129 current_tb_not_found = 0;
1130 current_tb = NULL;
1131 if (env->mem_io_pc) {
1132 /* now we have a real cpu fault */
1133 current_tb = tb_find_pc(env->mem_io_pc);
1136 if (current_tb == tb &&
1137 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1138 /* If we are modifying the current TB, we must stop
1139 its execution. We could be more precise by checking
1140 that the modification is after the current PC, but it
1141 would require a specialized function to partially
1142 restore the CPU state */
1144 current_tb_modified = 1;
1145 cpu_restore_state(current_tb, env, env->mem_io_pc);
1146 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1147 &current_flags);
1149 #endif /* TARGET_HAS_PRECISE_SMC */
1150 /* we need to do that to handle the case where a signal
1151 occurs while doing tb_phys_invalidate() */
1152 saved_tb = NULL;
1153 if (env) {
1154 saved_tb = env->current_tb;
1155 env->current_tb = NULL;
1157 tb_phys_invalidate(tb, -1);
1158 if (env) {
1159 env->current_tb = saved_tb;
1160 if (env->interrupt_request && env->current_tb)
1161 cpu_interrupt(env, env->interrupt_request);
1164 tb = tb_next;
1166 #if !defined(CONFIG_USER_ONLY)
1167 /* if no code remaining, no need to continue to use slow writes */
1168 if (!p->first_tb) {
1169 invalidate_page_bitmap(p);
1170 if (is_cpu_write_access) {
1171 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1174 #endif
1175 #ifdef TARGET_HAS_PRECISE_SMC
1176 if (current_tb_modified) {
1177 /* we generate a block containing just the instruction
1178 modifying the memory. It will ensure that it cannot modify
1179 itself */
1180 env->current_tb = NULL;
1181 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1182 cpu_resume_from_signal(env, NULL);
1184 #endif
1187 /* len must be <= 8 and start must be a multiple of len */
1188 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1190 PageDesc *p;
1191 int offset, b;
1192 #if 0
1193 if (1) {
1194 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1195 cpu_single_env->mem_io_vaddr, len,
1196 cpu_single_env->eip,
1197 cpu_single_env->eip +
1198 (intptr_t)cpu_single_env->segs[R_CS].base);
1200 #endif
1201 p = page_find(start >> TARGET_PAGE_BITS);
1202 if (!p)
1203 return;
1204 if (p->code_bitmap) {
1205 offset = start & ~TARGET_PAGE_MASK;
1206 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1207 if (b & ((1 << len) - 1))
1208 goto do_invalidate;
1209 } else {
1210 do_invalidate:
1211 tb_invalidate_phys_page_range(start, start + len, 1);
1215 #if !defined(CONFIG_SOFTMMU)
1216 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1217 uintptr_t pc, void *puc)
1219 TranslationBlock *tb;
1220 PageDesc *p;
1221 int n;
1222 #ifdef TARGET_HAS_PRECISE_SMC
1223 TranslationBlock *current_tb = NULL;
1224 CPUArchState *env = cpu_single_env;
1225 int current_tb_modified = 0;
1226 target_ulong current_pc = 0;
1227 target_ulong current_cs_base = 0;
1228 int current_flags = 0;
1229 #endif
1231 addr &= TARGET_PAGE_MASK;
1232 p = page_find(addr >> TARGET_PAGE_BITS);
1233 if (!p)
1234 return;
1235 tb = p->first_tb;
1236 #ifdef TARGET_HAS_PRECISE_SMC
1237 if (tb && pc != 0) {
1238 current_tb = tb_find_pc(pc);
1240 #endif
1241 while (tb != NULL) {
1242 n = (uintptr_t)tb & 3;
1243 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1244 #ifdef TARGET_HAS_PRECISE_SMC
1245 if (current_tb == tb &&
1246 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1247 /* If we are modifying the current TB, we must stop
1248 its execution. We could be more precise by checking
1249 that the modification is after the current PC, but it
1250 would require a specialized function to partially
1251 restore the CPU state */
1253 current_tb_modified = 1;
1254 cpu_restore_state(current_tb, env, pc);
1255 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1256 &current_flags);
1258 #endif /* TARGET_HAS_PRECISE_SMC */
1259 tb_phys_invalidate(tb, addr);
1260 tb = tb->page_next[n];
1262 p->first_tb = NULL;
1263 #ifdef TARGET_HAS_PRECISE_SMC
1264 if (current_tb_modified) {
1265 /* we generate a block containing just the instruction
1266 modifying the memory. It will ensure that it cannot modify
1267 itself */
1268 env->current_tb = NULL;
1269 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1270 cpu_resume_from_signal(env, puc);
1272 #endif
1274 #endif
1276 /* add the tb in the target page and protect it if necessary */
1277 static inline void tb_alloc_page(TranslationBlock *tb,
1278 unsigned int n, tb_page_addr_t page_addr)
1280 PageDesc *p;
1281 #ifndef CONFIG_USER_ONLY
1282 bool page_already_protected;
1283 #endif
1285 tb->page_addr[n] = page_addr;
1286 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1287 tb->page_next[n] = p->first_tb;
1288 #ifndef CONFIG_USER_ONLY
1289 page_already_protected = p->first_tb != NULL;
1290 #endif
1291 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1292 invalidate_page_bitmap(p);
1294 #if defined(TARGET_HAS_SMC) || 1
1296 #if defined(CONFIG_USER_ONLY)
1297 if (p->flags & PAGE_WRITE) {
1298 target_ulong addr;
1299 PageDesc *p2;
1300 int prot;
1302 /* force the host page as non writable (writes will have a
1303 page fault + mprotect overhead) */
1304 page_addr &= qemu_host_page_mask;
1305 prot = 0;
1306 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1307 addr += TARGET_PAGE_SIZE) {
1309 p2 = page_find (addr >> TARGET_PAGE_BITS);
1310 if (!p2)
1311 continue;
1312 prot |= p2->flags;
1313 p2->flags &= ~PAGE_WRITE;
1315 mprotect(g2h(page_addr), qemu_host_page_size,
1316 (prot & PAGE_BITS) & ~PAGE_WRITE);
1317 #ifdef DEBUG_TB_INVALIDATE
1318 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1319 page_addr);
1320 #endif
1322 #else
1323 /* if some code is already present, then the pages are already
1324 protected. So we handle the case where only the first TB is
1325 allocated in a physical page */
1326 if (!page_already_protected) {
1327 tlb_protect_code(page_addr);
1329 #endif
1331 #endif /* TARGET_HAS_SMC */
1334 /* add a new TB and link it to the physical page tables. phys_page2 is
1335 (-1) to indicate that only one page contains the TB. */
1336 void tb_link_page(TranslationBlock *tb,
1337 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1339 unsigned int h;
1340 TranslationBlock **ptb;
1342 /* Grab the mmap lock to stop another thread invalidating this TB
1343 before we are done. */
1344 mmap_lock();
1345 /* add in the physical hash table */
1346 h = tb_phys_hash_func(phys_pc);
1347 ptb = &tb_phys_hash[h];
1348 tb->phys_hash_next = *ptb;
1349 *ptb = tb;
1351 /* add in the page list */
1352 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1353 if (phys_page2 != -1)
1354 tb_alloc_page(tb, 1, phys_page2);
1355 else
1356 tb->page_addr[1] = -1;
1358 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1359 tb->jmp_next[0] = NULL;
1360 tb->jmp_next[1] = NULL;
1362 /* init original jump addresses */
1363 if (tb->tb_next_offset[0] != 0xffff)
1364 tb_reset_jump(tb, 0);
1365 if (tb->tb_next_offset[1] != 0xffff)
1366 tb_reset_jump(tb, 1);
1368 #ifdef DEBUG_TB_CHECK
1369 tb_page_check();
1370 #endif
1371 mmap_unlock();
1374 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1375 tb[1].tc_ptr. Return NULL if not found */
1376 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1378 int m_min, m_max, m;
1379 uintptr_t v;
1380 TranslationBlock *tb;
1382 if (nb_tbs <= 0)
1383 return NULL;
1384 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1385 tc_ptr >= (uintptr_t)code_gen_ptr) {
1386 return NULL;
1388 /* binary search (cf Knuth) */
1389 m_min = 0;
1390 m_max = nb_tbs - 1;
1391 while (m_min <= m_max) {
1392 m = (m_min + m_max) >> 1;
1393 tb = &tbs[m];
1394 v = (uintptr_t)tb->tc_ptr;
1395 if (v == tc_ptr)
1396 return tb;
1397 else if (tc_ptr < v) {
1398 m_max = m - 1;
1399 } else {
1400 m_min = m + 1;
1403 return &tbs[m_max];
1406 static void tb_reset_jump_recursive(TranslationBlock *tb);
1408 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1410 TranslationBlock *tb1, *tb_next, **ptb;
1411 unsigned int n1;
1413 tb1 = tb->jmp_next[n];
1414 if (tb1 != NULL) {
1415 /* find head of list */
1416 for(;;) {
1417 n1 = (uintptr_t)tb1 & 3;
1418 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1419 if (n1 == 2)
1420 break;
1421 tb1 = tb1->jmp_next[n1];
1423 /* we are now sure now that tb jumps to tb1 */
1424 tb_next = tb1;
1426 /* remove tb from the jmp_first list */
1427 ptb = &tb_next->jmp_first;
1428 for(;;) {
1429 tb1 = *ptb;
1430 n1 = (uintptr_t)tb1 & 3;
1431 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1432 if (n1 == n && tb1 == tb)
1433 break;
1434 ptb = &tb1->jmp_next[n1];
1436 *ptb = tb->jmp_next[n];
1437 tb->jmp_next[n] = NULL;
1439 /* suppress the jump to next tb in generated code */
1440 tb_reset_jump(tb, n);
1442 /* suppress jumps in the tb on which we could have jumped */
1443 tb_reset_jump_recursive(tb_next);
1447 static void tb_reset_jump_recursive(TranslationBlock *tb)
1449 tb_reset_jump_recursive2(tb, 0);
1450 tb_reset_jump_recursive2(tb, 1);
1453 #if defined(TARGET_HAS_ICE)
1454 #if defined(CONFIG_USER_ONLY)
1455 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1457 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1459 #else
1460 void tb_invalidate_phys_addr(target_phys_addr_t addr)
1462 ram_addr_t ram_addr;
1463 MemoryRegionSection *section;
1465 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1466 if (!(memory_region_is_ram(section->mr)
1467 || (section->mr->rom_device && section->mr->readable))) {
1468 return;
1470 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1471 + memory_region_section_addr(section, addr);
1472 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1475 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1477 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1478 (pc & ~TARGET_PAGE_MASK));
1480 #endif
1481 #endif /* TARGET_HAS_ICE */
1483 #if defined(CONFIG_USER_ONLY)
1484 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1489 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1490 int flags, CPUWatchpoint **watchpoint)
1492 return -ENOSYS;
1494 #else
1495 /* Add a watchpoint. */
1496 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1497 int flags, CPUWatchpoint **watchpoint)
1499 target_ulong len_mask = ~(len - 1);
1500 CPUWatchpoint *wp;
1502 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1503 if ((len & (len - 1)) || (addr & ~len_mask) ||
1504 len == 0 || len > TARGET_PAGE_SIZE) {
1505 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1506 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1507 return -EINVAL;
1509 wp = g_malloc(sizeof(*wp));
1511 wp->vaddr = addr;
1512 wp->len_mask = len_mask;
1513 wp->flags = flags;
1515 /* keep all GDB-injected watchpoints in front */
1516 if (flags & BP_GDB)
1517 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1518 else
1519 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1521 tlb_flush_page(env, addr);
1523 if (watchpoint)
1524 *watchpoint = wp;
1525 return 0;
1528 /* Remove a specific watchpoint. */
1529 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1530 int flags)
1532 target_ulong len_mask = ~(len - 1);
1533 CPUWatchpoint *wp;
1535 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1536 if (addr == wp->vaddr && len_mask == wp->len_mask
1537 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1538 cpu_watchpoint_remove_by_ref(env, wp);
1539 return 0;
1542 return -ENOENT;
1545 /* Remove a specific watchpoint by reference. */
1546 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1548 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1550 tlb_flush_page(env, watchpoint->vaddr);
1552 g_free(watchpoint);
1555 /* Remove all matching watchpoints. */
1556 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1558 CPUWatchpoint *wp, *next;
1560 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1561 if (wp->flags & mask)
1562 cpu_watchpoint_remove_by_ref(env, wp);
1565 #endif
1567 /* Add a breakpoint. */
1568 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1569 CPUBreakpoint **breakpoint)
1571 #if defined(TARGET_HAS_ICE)
1572 CPUBreakpoint *bp;
1574 bp = g_malloc(sizeof(*bp));
1576 bp->pc = pc;
1577 bp->flags = flags;
1579 /* keep all GDB-injected breakpoints in front */
1580 if (flags & BP_GDB)
1581 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1582 else
1583 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1585 breakpoint_invalidate(env, pc);
1587 if (breakpoint)
1588 *breakpoint = bp;
1589 return 0;
1590 #else
1591 return -ENOSYS;
1592 #endif
1595 /* Remove a specific breakpoint. */
1596 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1598 #if defined(TARGET_HAS_ICE)
1599 CPUBreakpoint *bp;
1601 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1602 if (bp->pc == pc && bp->flags == flags) {
1603 cpu_breakpoint_remove_by_ref(env, bp);
1604 return 0;
1607 return -ENOENT;
1608 #else
1609 return -ENOSYS;
1610 #endif
1613 /* Remove a specific breakpoint by reference. */
1614 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1616 #if defined(TARGET_HAS_ICE)
1617 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1619 breakpoint_invalidate(env, breakpoint->pc);
1621 g_free(breakpoint);
1622 #endif
1625 /* Remove all matching breakpoints. */
1626 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1628 #if defined(TARGET_HAS_ICE)
1629 CPUBreakpoint *bp, *next;
1631 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1632 if (bp->flags & mask)
1633 cpu_breakpoint_remove_by_ref(env, bp);
1635 #endif
1638 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1639 CPU loop after each instruction */
1640 void cpu_single_step(CPUArchState *env, int enabled)
1642 #if defined(TARGET_HAS_ICE)
1643 if (env->singlestep_enabled != enabled) {
1644 env->singlestep_enabled = enabled;
1645 if (kvm_enabled())
1646 kvm_update_guest_debug(env, 0);
1647 else {
1648 /* must flush all the translated code to avoid inconsistencies */
1649 /* XXX: only flush what is necessary */
1650 tb_flush(env);
1653 #endif
1656 static void cpu_unlink_tb(CPUArchState *env)
1658 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1659 problem and hope the cpu will stop of its own accord. For userspace
1660 emulation this often isn't actually as bad as it sounds. Often
1661 signals are used primarily to interrupt blocking syscalls. */
1662 TranslationBlock *tb;
1663 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1665 spin_lock(&interrupt_lock);
1666 tb = env->current_tb;
1667 /* if the cpu is currently executing code, we must unlink it and
1668 all the potentially executing TB */
1669 if (tb) {
1670 env->current_tb = NULL;
1671 tb_reset_jump_recursive(tb);
1673 spin_unlock(&interrupt_lock);
1676 #ifndef CONFIG_USER_ONLY
1677 /* mask must never be zero, except for A20 change call */
1678 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1680 int old_mask;
1682 old_mask = env->interrupt_request;
1683 env->interrupt_request |= mask;
1686 * If called from iothread context, wake the target cpu in
1687 * case its halted.
1689 if (!qemu_cpu_is_self(env)) {
1690 qemu_cpu_kick(env);
1691 return;
1694 if (use_icount) {
1695 env->icount_decr.u16.high = 0xffff;
1696 if (!can_do_io(env)
1697 && (mask & ~old_mask) != 0) {
1698 cpu_abort(env, "Raised interrupt while not in I/O function");
1700 } else {
1701 cpu_unlink_tb(env);
1705 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1707 #else /* CONFIG_USER_ONLY */
1709 void cpu_interrupt(CPUArchState *env, int mask)
1711 env->interrupt_request |= mask;
1712 cpu_unlink_tb(env);
1714 #endif /* CONFIG_USER_ONLY */
1716 void cpu_reset_interrupt(CPUArchState *env, int mask)
1718 env->interrupt_request &= ~mask;
1721 void cpu_exit(CPUArchState *env)
1723 env->exit_request = 1;
1724 cpu_unlink_tb(env);
1727 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1729 va_list ap;
1730 va_list ap2;
1732 va_start(ap, fmt);
1733 va_copy(ap2, ap);
1734 fprintf(stderr, "qemu: fatal: ");
1735 vfprintf(stderr, fmt, ap);
1736 fprintf(stderr, "\n");
1737 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1738 if (qemu_log_enabled()) {
1739 qemu_log("qemu: fatal: ");
1740 qemu_log_vprintf(fmt, ap2);
1741 qemu_log("\n");
1742 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1743 qemu_log_flush();
1744 qemu_log_close();
1746 va_end(ap2);
1747 va_end(ap);
1748 #if defined(CONFIG_USER_ONLY)
1750 struct sigaction act;
1751 sigfillset(&act.sa_mask);
1752 act.sa_handler = SIG_DFL;
1753 sigaction(SIGABRT, &act, NULL);
1755 #endif
1756 abort();
1759 CPUArchState *cpu_copy(CPUArchState *env)
1761 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1762 CPUArchState *next_cpu = new_env->next_cpu;
1763 int cpu_index = new_env->cpu_index;
1764 #if defined(TARGET_HAS_ICE)
1765 CPUBreakpoint *bp;
1766 CPUWatchpoint *wp;
1767 #endif
1769 memcpy(new_env, env, sizeof(CPUArchState));
1771 /* Preserve chaining and index. */
1772 new_env->next_cpu = next_cpu;
1773 new_env->cpu_index = cpu_index;
1775 /* Clone all break/watchpoints.
1776 Note: Once we support ptrace with hw-debug register access, make sure
1777 BP_CPU break/watchpoints are handled correctly on clone. */
1778 QTAILQ_INIT(&env->breakpoints);
1779 QTAILQ_INIT(&env->watchpoints);
1780 #if defined(TARGET_HAS_ICE)
1781 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1782 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1784 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1785 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1786 wp->flags, NULL);
1788 #endif
1790 return new_env;
1793 #if !defined(CONFIG_USER_ONLY)
1794 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1796 unsigned int i;
1798 /* Discard jump cache entries for any tb which might potentially
1799 overlap the flushed page. */
1800 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1801 memset (&env->tb_jmp_cache[i], 0,
1802 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1804 i = tb_jmp_cache_hash_page(addr);
1805 memset (&env->tb_jmp_cache[i], 0,
1806 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1809 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1810 uintptr_t length)
1812 uintptr_t start1;
1814 /* we modify the TLB cache so that the dirty bit will be set again
1815 when accessing the range */
1816 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1817 /* Check that we don't span multiple blocks - this breaks the
1818 address comparisons below. */
1819 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1820 != (end - 1) - start) {
1821 abort();
1823 cpu_tlb_reset_dirty_all(start1, length);
1827 /* Note: start and end must be within the same ram block. */
1828 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1829 int dirty_flags)
1831 uintptr_t length;
1833 start &= TARGET_PAGE_MASK;
1834 end = TARGET_PAGE_ALIGN(end);
1836 length = end - start;
1837 if (length == 0)
1838 return;
1839 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1841 if (tcg_enabled()) {
1842 tlb_reset_dirty_range_all(start, end, length);
1846 int cpu_physical_memory_set_dirty_tracking(int enable)
1848 int ret = 0;
1849 in_migration = enable;
1850 return ret;
1853 target_phys_addr_t memory_region_section_get_iotlb(CPUArchState *env,
1854 MemoryRegionSection *section,
1855 target_ulong vaddr,
1856 target_phys_addr_t paddr,
1857 int prot,
1858 target_ulong *address)
1860 target_phys_addr_t iotlb;
1861 CPUWatchpoint *wp;
1863 if (memory_region_is_ram(section->mr)) {
1864 /* Normal RAM. */
1865 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1866 + memory_region_section_addr(section, paddr);
1867 if (!section->readonly) {
1868 iotlb |= phys_section_notdirty;
1869 } else {
1870 iotlb |= phys_section_rom;
1872 } else {
1873 /* IO handlers are currently passed a physical address.
1874 It would be nice to pass an offset from the base address
1875 of that region. This would avoid having to special case RAM,
1876 and avoid full address decoding in every device.
1877 We can't use the high bits of pd for this because
1878 IO_MEM_ROMD uses these as a ram address. */
1879 iotlb = section - phys_sections;
1880 iotlb += memory_region_section_addr(section, paddr);
1883 /* Make accesses to pages with watchpoints go via the
1884 watchpoint trap routines. */
1885 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1886 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1887 /* Avoid trapping reads of pages with a write breakpoint. */
1888 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1889 iotlb = phys_section_watch + paddr;
1890 *address |= TLB_MMIO;
1891 break;
1896 return iotlb;
1899 #else
1901 * Walks guest process memory "regions" one by one
1902 * and calls callback function 'fn' for each region.
1905 struct walk_memory_regions_data
1907 walk_memory_regions_fn fn;
1908 void *priv;
1909 uintptr_t start;
1910 int prot;
1913 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1914 abi_ulong end, int new_prot)
1916 if (data->start != -1ul) {
1917 int rc = data->fn(data->priv, data->start, end, data->prot);
1918 if (rc != 0) {
1919 return rc;
1923 data->start = (new_prot ? end : -1ul);
1924 data->prot = new_prot;
1926 return 0;
1929 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1930 abi_ulong base, int level, void **lp)
1932 abi_ulong pa;
1933 int i, rc;
1935 if (*lp == NULL) {
1936 return walk_memory_regions_end(data, base, 0);
1939 if (level == 0) {
1940 PageDesc *pd = *lp;
1941 for (i = 0; i < L2_SIZE; ++i) {
1942 int prot = pd[i].flags;
1944 pa = base | (i << TARGET_PAGE_BITS);
1945 if (prot != data->prot) {
1946 rc = walk_memory_regions_end(data, pa, prot);
1947 if (rc != 0) {
1948 return rc;
1952 } else {
1953 void **pp = *lp;
1954 for (i = 0; i < L2_SIZE; ++i) {
1955 pa = base | ((abi_ulong)i <<
1956 (TARGET_PAGE_BITS + L2_BITS * level));
1957 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1958 if (rc != 0) {
1959 return rc;
1964 return 0;
1967 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1969 struct walk_memory_regions_data data;
1970 uintptr_t i;
1972 data.fn = fn;
1973 data.priv = priv;
1974 data.start = -1ul;
1975 data.prot = 0;
1977 for (i = 0; i < V_L1_SIZE; i++) {
1978 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1979 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1980 if (rc != 0) {
1981 return rc;
1985 return walk_memory_regions_end(&data, 0, 0);
1988 static int dump_region(void *priv, abi_ulong start,
1989 abi_ulong end, unsigned long prot)
1991 FILE *f = (FILE *)priv;
1993 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
1994 " "TARGET_ABI_FMT_lx" %c%c%c\n",
1995 start, end, end - start,
1996 ((prot & PAGE_READ) ? 'r' : '-'),
1997 ((prot & PAGE_WRITE) ? 'w' : '-'),
1998 ((prot & PAGE_EXEC) ? 'x' : '-'));
2000 return (0);
2003 /* dump memory mappings */
2004 void page_dump(FILE *f)
2006 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2007 "start", "end", "size", "prot");
2008 walk_memory_regions(f, dump_region);
2011 int page_get_flags(target_ulong address)
2013 PageDesc *p;
2015 p = page_find(address >> TARGET_PAGE_BITS);
2016 if (!p)
2017 return 0;
2018 return p->flags;
2021 /* Modify the flags of a page and invalidate the code if necessary.
2022 The flag PAGE_WRITE_ORG is positioned automatically depending
2023 on PAGE_WRITE. The mmap_lock should already be held. */
2024 void page_set_flags(target_ulong start, target_ulong end, int flags)
2026 target_ulong addr, len;
2028 /* This function should never be called with addresses outside the
2029 guest address space. If this assert fires, it probably indicates
2030 a missing call to h2g_valid. */
2031 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2032 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2033 #endif
2034 assert(start < end);
2036 start = start & TARGET_PAGE_MASK;
2037 end = TARGET_PAGE_ALIGN(end);
2039 if (flags & PAGE_WRITE) {
2040 flags |= PAGE_WRITE_ORG;
2043 for (addr = start, len = end - start;
2044 len != 0;
2045 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2046 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2048 /* If the write protection bit is set, then we invalidate
2049 the code inside. */
2050 if (!(p->flags & PAGE_WRITE) &&
2051 (flags & PAGE_WRITE) &&
2052 p->first_tb) {
2053 tb_invalidate_phys_page(addr, 0, NULL);
2055 p->flags = flags;
2059 int page_check_range(target_ulong start, target_ulong len, int flags)
2061 PageDesc *p;
2062 target_ulong end;
2063 target_ulong addr;
2065 /* This function should never be called with addresses outside the
2066 guest address space. If this assert fires, it probably indicates
2067 a missing call to h2g_valid. */
2068 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2069 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2070 #endif
2072 if (len == 0) {
2073 return 0;
2075 if (start + len - 1 < start) {
2076 /* We've wrapped around. */
2077 return -1;
2080 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2081 start = start & TARGET_PAGE_MASK;
2083 for (addr = start, len = end - start;
2084 len != 0;
2085 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2086 p = page_find(addr >> TARGET_PAGE_BITS);
2087 if( !p )
2088 return -1;
2089 if( !(p->flags & PAGE_VALID) )
2090 return -1;
2092 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2093 return -1;
2094 if (flags & PAGE_WRITE) {
2095 if (!(p->flags & PAGE_WRITE_ORG))
2096 return -1;
2097 /* unprotect the page if it was put read-only because it
2098 contains translated code */
2099 if (!(p->flags & PAGE_WRITE)) {
2100 if (!page_unprotect(addr, 0, NULL))
2101 return -1;
2103 return 0;
2106 return 0;
2109 /* called from signal handler: invalidate the code and unprotect the
2110 page. Return TRUE if the fault was successfully handled. */
2111 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2113 unsigned int prot;
2114 PageDesc *p;
2115 target_ulong host_start, host_end, addr;
2117 /* Technically this isn't safe inside a signal handler. However we
2118 know this only ever happens in a synchronous SEGV handler, so in
2119 practice it seems to be ok. */
2120 mmap_lock();
2122 p = page_find(address >> TARGET_PAGE_BITS);
2123 if (!p) {
2124 mmap_unlock();
2125 return 0;
2128 /* if the page was really writable, then we change its
2129 protection back to writable */
2130 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2131 host_start = address & qemu_host_page_mask;
2132 host_end = host_start + qemu_host_page_size;
2134 prot = 0;
2135 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2136 p = page_find(addr >> TARGET_PAGE_BITS);
2137 p->flags |= PAGE_WRITE;
2138 prot |= p->flags;
2140 /* and since the content will be modified, we must invalidate
2141 the corresponding translated code. */
2142 tb_invalidate_phys_page(addr, pc, puc);
2143 #ifdef DEBUG_TB_CHECK
2144 tb_invalidate_check(addr);
2145 #endif
2147 mprotect((void *)g2h(host_start), qemu_host_page_size,
2148 prot & PAGE_BITS);
2150 mmap_unlock();
2151 return 1;
2153 mmap_unlock();
2154 return 0;
2156 #endif /* defined(CONFIG_USER_ONLY) */
2158 #if !defined(CONFIG_USER_ONLY)
2160 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2161 typedef struct subpage_t {
2162 MemoryRegion iomem;
2163 target_phys_addr_t base;
2164 uint16_t sub_section[TARGET_PAGE_SIZE];
2165 } subpage_t;
2167 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2168 uint16_t section);
2169 static subpage_t *subpage_init(target_phys_addr_t base);
2170 static void destroy_page_desc(uint16_t section_index)
2172 MemoryRegionSection *section = &phys_sections[section_index];
2173 MemoryRegion *mr = section->mr;
2175 if (mr->subpage) {
2176 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2177 memory_region_destroy(&subpage->iomem);
2178 g_free(subpage);
2182 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2184 unsigned i;
2185 PhysPageEntry *p;
2187 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2188 return;
2191 p = phys_map_nodes[lp->ptr];
2192 for (i = 0; i < L2_SIZE; ++i) {
2193 if (!p[i].is_leaf) {
2194 destroy_l2_mapping(&p[i], level - 1);
2195 } else {
2196 destroy_page_desc(p[i].ptr);
2199 lp->is_leaf = 0;
2200 lp->ptr = PHYS_MAP_NODE_NIL;
2203 static void destroy_all_mappings(AddressSpaceDispatch *d)
2205 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2206 phys_map_nodes_reset();
2209 static uint16_t phys_section_add(MemoryRegionSection *section)
2211 if (phys_sections_nb == phys_sections_nb_alloc) {
2212 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2213 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2214 phys_sections_nb_alloc);
2216 phys_sections[phys_sections_nb] = *section;
2217 return phys_sections_nb++;
2220 static void phys_sections_clear(void)
2222 phys_sections_nb = 0;
2225 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2227 subpage_t *subpage;
2228 target_phys_addr_t base = section->offset_within_address_space
2229 & TARGET_PAGE_MASK;
2230 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2231 MemoryRegionSection subsection = {
2232 .offset_within_address_space = base,
2233 .size = TARGET_PAGE_SIZE,
2235 target_phys_addr_t start, end;
2237 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2239 if (!(existing->mr->subpage)) {
2240 subpage = subpage_init(base);
2241 subsection.mr = &subpage->iomem;
2242 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2243 phys_section_add(&subsection));
2244 } else {
2245 subpage = container_of(existing->mr, subpage_t, iomem);
2247 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2248 end = start + section->size - 1;
2249 subpage_register(subpage, start, end, phys_section_add(section));
2253 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2255 target_phys_addr_t start_addr = section->offset_within_address_space;
2256 ram_addr_t size = section->size;
2257 target_phys_addr_t addr;
2258 uint16_t section_index = phys_section_add(section);
2260 assert(size);
2262 addr = start_addr;
2263 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2264 section_index);
2267 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2269 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2270 MemoryRegionSection now = *section, remain = *section;
2272 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2273 || (now.size < TARGET_PAGE_SIZE)) {
2274 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2275 - now.offset_within_address_space,
2276 now.size);
2277 register_subpage(d, &now);
2278 remain.size -= now.size;
2279 remain.offset_within_address_space += now.size;
2280 remain.offset_within_region += now.size;
2282 while (remain.size >= TARGET_PAGE_SIZE) {
2283 now = remain;
2284 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2285 now.size = TARGET_PAGE_SIZE;
2286 register_subpage(d, &now);
2287 } else {
2288 now.size &= TARGET_PAGE_MASK;
2289 register_multipage(d, &now);
2291 remain.size -= now.size;
2292 remain.offset_within_address_space += now.size;
2293 remain.offset_within_region += now.size;
2295 now = remain;
2296 if (now.size) {
2297 register_subpage(d, &now);
2301 void qemu_flush_coalesced_mmio_buffer(void)
2303 if (kvm_enabled())
2304 kvm_flush_coalesced_mmio_buffer();
2307 #if defined(__linux__) && !defined(TARGET_S390X)
2309 #include <sys/vfs.h>
2311 #define HUGETLBFS_MAGIC 0x958458f6
2313 static long gethugepagesize(const char *path)
2315 struct statfs fs;
2316 int ret;
2318 do {
2319 ret = statfs(path, &fs);
2320 } while (ret != 0 && errno == EINTR);
2322 if (ret != 0) {
2323 perror(path);
2324 return 0;
2327 if (fs.f_type != HUGETLBFS_MAGIC)
2328 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2330 return fs.f_bsize;
2333 static void *file_ram_alloc(RAMBlock *block,
2334 ram_addr_t memory,
2335 const char *path)
2337 char *filename;
2338 void *area;
2339 int fd;
2340 #ifdef MAP_POPULATE
2341 int flags;
2342 #endif
2343 unsigned long hpagesize;
2345 hpagesize = gethugepagesize(path);
2346 if (!hpagesize) {
2347 return NULL;
2350 if (memory < hpagesize) {
2351 return NULL;
2354 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2355 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2356 return NULL;
2359 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2360 return NULL;
2363 fd = mkstemp(filename);
2364 if (fd < 0) {
2365 perror("unable to create backing store for hugepages");
2366 free(filename);
2367 return NULL;
2369 unlink(filename);
2370 free(filename);
2372 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2375 * ftruncate is not supported by hugetlbfs in older
2376 * hosts, so don't bother bailing out on errors.
2377 * If anything goes wrong with it under other filesystems,
2378 * mmap will fail.
2380 if (ftruncate(fd, memory))
2381 perror("ftruncate");
2383 #ifdef MAP_POPULATE
2384 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2385 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2386 * to sidestep this quirk.
2388 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2389 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2390 #else
2391 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2392 #endif
2393 if (area == MAP_FAILED) {
2394 perror("file_ram_alloc: can't mmap RAM pages");
2395 close(fd);
2396 return (NULL);
2398 block->fd = fd;
2399 return area;
2401 #endif
2403 static ram_addr_t find_ram_offset(ram_addr_t size)
2405 RAMBlock *block, *next_block;
2406 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2408 if (QLIST_EMPTY(&ram_list.blocks))
2409 return 0;
2411 QLIST_FOREACH(block, &ram_list.blocks, next) {
2412 ram_addr_t end, next = RAM_ADDR_MAX;
2414 end = block->offset + block->length;
2416 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2417 if (next_block->offset >= end) {
2418 next = MIN(next, next_block->offset);
2421 if (next - end >= size && next - end < mingap) {
2422 offset = end;
2423 mingap = next - end;
2427 if (offset == RAM_ADDR_MAX) {
2428 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2429 (uint64_t)size);
2430 abort();
2433 return offset;
2436 static ram_addr_t last_ram_offset(void)
2438 RAMBlock *block;
2439 ram_addr_t last = 0;
2441 QLIST_FOREACH(block, &ram_list.blocks, next)
2442 last = MAX(last, block->offset + block->length);
2444 return last;
2447 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2449 int ret;
2450 QemuOpts *machine_opts;
2452 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2453 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2454 if (machine_opts &&
2455 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2456 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2457 if (ret) {
2458 perror("qemu_madvise");
2459 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2460 "but dump_guest_core=off specified\n");
2465 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2467 RAMBlock *new_block, *block;
2469 new_block = NULL;
2470 QLIST_FOREACH(block, &ram_list.blocks, next) {
2471 if (block->offset == addr) {
2472 new_block = block;
2473 break;
2476 assert(new_block);
2477 assert(!new_block->idstr[0]);
2479 if (dev) {
2480 char *id = qdev_get_dev_path(dev);
2481 if (id) {
2482 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2483 g_free(id);
2486 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2488 QLIST_FOREACH(block, &ram_list.blocks, next) {
2489 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2490 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2491 new_block->idstr);
2492 abort();
2497 static int memory_try_enable_merging(void *addr, size_t len)
2499 QemuOpts *opts;
2501 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2502 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2503 /* disabled by the user */
2504 return 0;
2507 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2510 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2511 MemoryRegion *mr)
2513 RAMBlock *new_block;
2515 size = TARGET_PAGE_ALIGN(size);
2516 new_block = g_malloc0(sizeof(*new_block));
2518 new_block->mr = mr;
2519 new_block->offset = find_ram_offset(size);
2520 if (host) {
2521 new_block->host = host;
2522 new_block->flags |= RAM_PREALLOC_MASK;
2523 } else {
2524 if (mem_path) {
2525 #if defined (__linux__) && !defined(TARGET_S390X)
2526 new_block->host = file_ram_alloc(new_block, size, mem_path);
2527 if (!new_block->host) {
2528 new_block->host = qemu_vmalloc(size);
2529 memory_try_enable_merging(new_block->host, size);
2531 #else
2532 fprintf(stderr, "-mem-path option unsupported\n");
2533 exit(1);
2534 #endif
2535 } else {
2536 if (xen_enabled()) {
2537 xen_ram_alloc(new_block->offset, size, mr);
2538 } else if (kvm_enabled()) {
2539 /* some s390/kvm configurations have special constraints */
2540 new_block->host = kvm_vmalloc(size);
2541 } else {
2542 new_block->host = qemu_vmalloc(size);
2544 memory_try_enable_merging(new_block->host, size);
2547 new_block->length = size;
2549 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2551 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2552 last_ram_offset() >> TARGET_PAGE_BITS);
2553 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2554 0, size >> TARGET_PAGE_BITS);
2555 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2557 qemu_ram_setup_dump(new_block->host, size);
2559 if (kvm_enabled())
2560 kvm_setup_guest_memory(new_block->host, size);
2562 return new_block->offset;
2565 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2567 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2570 void qemu_ram_free_from_ptr(ram_addr_t addr)
2572 RAMBlock *block;
2574 QLIST_FOREACH(block, &ram_list.blocks, next) {
2575 if (addr == block->offset) {
2576 QLIST_REMOVE(block, next);
2577 g_free(block);
2578 return;
2583 void qemu_ram_free(ram_addr_t addr)
2585 RAMBlock *block;
2587 QLIST_FOREACH(block, &ram_list.blocks, next) {
2588 if (addr == block->offset) {
2589 QLIST_REMOVE(block, next);
2590 if (block->flags & RAM_PREALLOC_MASK) {
2592 } else if (mem_path) {
2593 #if defined (__linux__) && !defined(TARGET_S390X)
2594 if (block->fd) {
2595 munmap(block->host, block->length);
2596 close(block->fd);
2597 } else {
2598 qemu_vfree(block->host);
2600 #else
2601 abort();
2602 #endif
2603 } else {
2604 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2605 munmap(block->host, block->length);
2606 #else
2607 if (xen_enabled()) {
2608 xen_invalidate_map_cache_entry(block->host);
2609 } else {
2610 qemu_vfree(block->host);
2612 #endif
2614 g_free(block);
2615 return;
2621 #ifndef _WIN32
2622 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2624 RAMBlock *block;
2625 ram_addr_t offset;
2626 int flags;
2627 void *area, *vaddr;
2629 QLIST_FOREACH(block, &ram_list.blocks, next) {
2630 offset = addr - block->offset;
2631 if (offset < block->length) {
2632 vaddr = block->host + offset;
2633 if (block->flags & RAM_PREALLOC_MASK) {
2635 } else {
2636 flags = MAP_FIXED;
2637 munmap(vaddr, length);
2638 if (mem_path) {
2639 #if defined(__linux__) && !defined(TARGET_S390X)
2640 if (block->fd) {
2641 #ifdef MAP_POPULATE
2642 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2643 MAP_PRIVATE;
2644 #else
2645 flags |= MAP_PRIVATE;
2646 #endif
2647 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2648 flags, block->fd, offset);
2649 } else {
2650 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2651 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2652 flags, -1, 0);
2654 #else
2655 abort();
2656 #endif
2657 } else {
2658 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2659 flags |= MAP_SHARED | MAP_ANONYMOUS;
2660 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2661 flags, -1, 0);
2662 #else
2663 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2664 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2665 flags, -1, 0);
2666 #endif
2668 if (area != vaddr) {
2669 fprintf(stderr, "Could not remap addr: "
2670 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2671 length, addr);
2672 exit(1);
2674 memory_try_enable_merging(vaddr, length);
2675 qemu_ram_setup_dump(vaddr, length);
2677 return;
2681 #endif /* !_WIN32 */
2683 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2684 With the exception of the softmmu code in this file, this should
2685 only be used for local memory (e.g. video ram) that the device owns,
2686 and knows it isn't going to access beyond the end of the block.
2688 It should not be used for general purpose DMA.
2689 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2691 void *qemu_get_ram_ptr(ram_addr_t addr)
2693 RAMBlock *block;
2695 QLIST_FOREACH(block, &ram_list.blocks, next) {
2696 if (addr - block->offset < block->length) {
2697 /* Move this entry to to start of the list. */
2698 if (block != QLIST_FIRST(&ram_list.blocks)) {
2699 QLIST_REMOVE(block, next);
2700 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2702 if (xen_enabled()) {
2703 /* We need to check if the requested address is in the RAM
2704 * because we don't want to map the entire memory in QEMU.
2705 * In that case just map until the end of the page.
2707 if (block->offset == 0) {
2708 return xen_map_cache(addr, 0, 0);
2709 } else if (block->host == NULL) {
2710 block->host =
2711 xen_map_cache(block->offset, block->length, 1);
2714 return block->host + (addr - block->offset);
2718 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2719 abort();
2721 return NULL;
2724 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2725 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2727 void *qemu_safe_ram_ptr(ram_addr_t addr)
2729 RAMBlock *block;
2731 QLIST_FOREACH(block, &ram_list.blocks, next) {
2732 if (addr - block->offset < block->length) {
2733 if (xen_enabled()) {
2734 /* We need to check if the requested address is in the RAM
2735 * because we don't want to map the entire memory in QEMU.
2736 * In that case just map until the end of the page.
2738 if (block->offset == 0) {
2739 return xen_map_cache(addr, 0, 0);
2740 } else if (block->host == NULL) {
2741 block->host =
2742 xen_map_cache(block->offset, block->length, 1);
2745 return block->host + (addr - block->offset);
2749 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2750 abort();
2752 return NULL;
2755 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2756 * but takes a size argument */
2757 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2759 if (*size == 0) {
2760 return NULL;
2762 if (xen_enabled()) {
2763 return xen_map_cache(addr, *size, 1);
2764 } else {
2765 RAMBlock *block;
2767 QLIST_FOREACH(block, &ram_list.blocks, next) {
2768 if (addr - block->offset < block->length) {
2769 if (addr - block->offset + *size > block->length)
2770 *size = block->length - addr + block->offset;
2771 return block->host + (addr - block->offset);
2775 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2776 abort();
2780 void qemu_put_ram_ptr(void *addr)
2782 trace_qemu_put_ram_ptr(addr);
2785 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2787 RAMBlock *block;
2788 uint8_t *host = ptr;
2790 if (xen_enabled()) {
2791 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2792 return 0;
2795 QLIST_FOREACH(block, &ram_list.blocks, next) {
2796 /* This case append when the block is not mapped. */
2797 if (block->host == NULL) {
2798 continue;
2800 if (host - block->host < block->length) {
2801 *ram_addr = block->offset + (host - block->host);
2802 return 0;
2806 return -1;
2809 /* Some of the softmmu routines need to translate from a host pointer
2810 (typically a TLB entry) back to a ram offset. */
2811 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2813 ram_addr_t ram_addr;
2815 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2816 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2817 abort();
2819 return ram_addr;
2822 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
2823 unsigned size)
2825 #ifdef DEBUG_UNASSIGNED
2826 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2827 #endif
2828 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2829 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2830 #endif
2831 return 0;
2834 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
2835 uint64_t val, unsigned size)
2837 #ifdef DEBUG_UNASSIGNED
2838 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2839 #endif
2840 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2841 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2842 #endif
2845 static const MemoryRegionOps unassigned_mem_ops = {
2846 .read = unassigned_mem_read,
2847 .write = unassigned_mem_write,
2848 .endianness = DEVICE_NATIVE_ENDIAN,
2851 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
2852 unsigned size)
2854 abort();
2857 static void error_mem_write(void *opaque, target_phys_addr_t addr,
2858 uint64_t value, unsigned size)
2860 abort();
2863 static const MemoryRegionOps error_mem_ops = {
2864 .read = error_mem_read,
2865 .write = error_mem_write,
2866 .endianness = DEVICE_NATIVE_ENDIAN,
2869 static const MemoryRegionOps rom_mem_ops = {
2870 .read = error_mem_read,
2871 .write = unassigned_mem_write,
2872 .endianness = DEVICE_NATIVE_ENDIAN,
2875 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
2876 uint64_t val, unsigned size)
2878 int dirty_flags;
2879 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2880 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2881 #if !defined(CONFIG_USER_ONLY)
2882 tb_invalidate_phys_page_fast(ram_addr, size);
2883 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2884 #endif
2886 switch (size) {
2887 case 1:
2888 stb_p(qemu_get_ram_ptr(ram_addr), val);
2889 break;
2890 case 2:
2891 stw_p(qemu_get_ram_ptr(ram_addr), val);
2892 break;
2893 case 4:
2894 stl_p(qemu_get_ram_ptr(ram_addr), val);
2895 break;
2896 default:
2897 abort();
2899 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2900 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2901 /* we remove the notdirty callback only if the code has been
2902 flushed */
2903 if (dirty_flags == 0xff)
2904 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2907 static const MemoryRegionOps notdirty_mem_ops = {
2908 .read = error_mem_read,
2909 .write = notdirty_mem_write,
2910 .endianness = DEVICE_NATIVE_ENDIAN,
2913 /* Generate a debug exception if a watchpoint has been hit. */
2914 static void check_watchpoint(int offset, int len_mask, int flags)
2916 CPUArchState *env = cpu_single_env;
2917 target_ulong pc, cs_base;
2918 TranslationBlock *tb;
2919 target_ulong vaddr;
2920 CPUWatchpoint *wp;
2921 int cpu_flags;
2923 if (env->watchpoint_hit) {
2924 /* We re-entered the check after replacing the TB. Now raise
2925 * the debug interrupt so that is will trigger after the
2926 * current instruction. */
2927 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2928 return;
2930 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2931 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2932 if ((vaddr == (wp->vaddr & len_mask) ||
2933 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2934 wp->flags |= BP_WATCHPOINT_HIT;
2935 if (!env->watchpoint_hit) {
2936 env->watchpoint_hit = wp;
2937 tb = tb_find_pc(env->mem_io_pc);
2938 if (!tb) {
2939 cpu_abort(env, "check_watchpoint: could not find TB for "
2940 "pc=%p", (void *)env->mem_io_pc);
2942 cpu_restore_state(tb, env, env->mem_io_pc);
2943 tb_phys_invalidate(tb, -1);
2944 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2945 env->exception_index = EXCP_DEBUG;
2946 cpu_loop_exit(env);
2947 } else {
2948 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2949 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2950 cpu_resume_from_signal(env, NULL);
2953 } else {
2954 wp->flags &= ~BP_WATCHPOINT_HIT;
2959 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2960 so these check for a hit then pass through to the normal out-of-line
2961 phys routines. */
2962 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
2963 unsigned size)
2965 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2966 switch (size) {
2967 case 1: return ldub_phys(addr);
2968 case 2: return lduw_phys(addr);
2969 case 4: return ldl_phys(addr);
2970 default: abort();
2974 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
2975 uint64_t val, unsigned size)
2977 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2978 switch (size) {
2979 case 1:
2980 stb_phys(addr, val);
2981 break;
2982 case 2:
2983 stw_phys(addr, val);
2984 break;
2985 case 4:
2986 stl_phys(addr, val);
2987 break;
2988 default: abort();
2992 static const MemoryRegionOps watch_mem_ops = {
2993 .read = watch_mem_read,
2994 .write = watch_mem_write,
2995 .endianness = DEVICE_NATIVE_ENDIAN,
2998 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
2999 unsigned len)
3001 subpage_t *mmio = opaque;
3002 unsigned int idx = SUBPAGE_IDX(addr);
3003 MemoryRegionSection *section;
3004 #if defined(DEBUG_SUBPAGE)
3005 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3006 mmio, len, addr, idx);
3007 #endif
3009 section = &phys_sections[mmio->sub_section[idx]];
3010 addr += mmio->base;
3011 addr -= section->offset_within_address_space;
3012 addr += section->offset_within_region;
3013 return io_mem_read(section->mr, addr, len);
3016 static void subpage_write(void *opaque, target_phys_addr_t addr,
3017 uint64_t value, unsigned len)
3019 subpage_t *mmio = opaque;
3020 unsigned int idx = SUBPAGE_IDX(addr);
3021 MemoryRegionSection *section;
3022 #if defined(DEBUG_SUBPAGE)
3023 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3024 " idx %d value %"PRIx64"\n",
3025 __func__, mmio, len, addr, idx, value);
3026 #endif
3028 section = &phys_sections[mmio->sub_section[idx]];
3029 addr += mmio->base;
3030 addr -= section->offset_within_address_space;
3031 addr += section->offset_within_region;
3032 io_mem_write(section->mr, addr, value, len);
3035 static const MemoryRegionOps subpage_ops = {
3036 .read = subpage_read,
3037 .write = subpage_write,
3038 .endianness = DEVICE_NATIVE_ENDIAN,
3041 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3042 unsigned size)
3044 ram_addr_t raddr = addr;
3045 void *ptr = qemu_get_ram_ptr(raddr);
3046 switch (size) {
3047 case 1: return ldub_p(ptr);
3048 case 2: return lduw_p(ptr);
3049 case 4: return ldl_p(ptr);
3050 default: abort();
3054 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3055 uint64_t value, unsigned size)
3057 ram_addr_t raddr = addr;
3058 void *ptr = qemu_get_ram_ptr(raddr);
3059 switch (size) {
3060 case 1: return stb_p(ptr, value);
3061 case 2: return stw_p(ptr, value);
3062 case 4: return stl_p(ptr, value);
3063 default: abort();
3067 static const MemoryRegionOps subpage_ram_ops = {
3068 .read = subpage_ram_read,
3069 .write = subpage_ram_write,
3070 .endianness = DEVICE_NATIVE_ENDIAN,
3073 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3074 uint16_t section)
3076 int idx, eidx;
3078 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3079 return -1;
3080 idx = SUBPAGE_IDX(start);
3081 eidx = SUBPAGE_IDX(end);
3082 #if defined(DEBUG_SUBPAGE)
3083 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3084 mmio, start, end, idx, eidx, memory);
3085 #endif
3086 if (memory_region_is_ram(phys_sections[section].mr)) {
3087 MemoryRegionSection new_section = phys_sections[section];
3088 new_section.mr = &io_mem_subpage_ram;
3089 section = phys_section_add(&new_section);
3091 for (; idx <= eidx; idx++) {
3092 mmio->sub_section[idx] = section;
3095 return 0;
3098 static subpage_t *subpage_init(target_phys_addr_t base)
3100 subpage_t *mmio;
3102 mmio = g_malloc0(sizeof(subpage_t));
3104 mmio->base = base;
3105 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3106 "subpage", TARGET_PAGE_SIZE);
3107 mmio->iomem.subpage = true;
3108 #if defined(DEBUG_SUBPAGE)
3109 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3110 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3111 #endif
3112 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3114 return mmio;
3117 static uint16_t dummy_section(MemoryRegion *mr)
3119 MemoryRegionSection section = {
3120 .mr = mr,
3121 .offset_within_address_space = 0,
3122 .offset_within_region = 0,
3123 .size = UINT64_MAX,
3126 return phys_section_add(&section);
3129 MemoryRegion *iotlb_to_region(target_phys_addr_t index)
3131 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3134 static void io_mem_init(void)
3136 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3137 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3138 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3139 "unassigned", UINT64_MAX);
3140 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3141 "notdirty", UINT64_MAX);
3142 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3143 "subpage-ram", UINT64_MAX);
3144 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3145 "watch", UINT64_MAX);
3148 static void mem_begin(MemoryListener *listener)
3150 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3152 destroy_all_mappings(d);
3153 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3156 static void core_begin(MemoryListener *listener)
3158 phys_sections_clear();
3159 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3160 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3161 phys_section_rom = dummy_section(&io_mem_rom);
3162 phys_section_watch = dummy_section(&io_mem_watch);
3165 static void tcg_commit(MemoryListener *listener)
3167 CPUArchState *env;
3169 /* since each CPU stores ram addresses in its TLB cache, we must
3170 reset the modified entries */
3171 /* XXX: slow ! */
3172 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3173 tlb_flush(env, 1);
3177 static void core_log_global_start(MemoryListener *listener)
3179 cpu_physical_memory_set_dirty_tracking(1);
3182 static void core_log_global_stop(MemoryListener *listener)
3184 cpu_physical_memory_set_dirty_tracking(0);
3187 static void io_region_add(MemoryListener *listener,
3188 MemoryRegionSection *section)
3190 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3192 mrio->mr = section->mr;
3193 mrio->offset = section->offset_within_region;
3194 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3195 section->offset_within_address_space, section->size);
3196 ioport_register(&mrio->iorange);
3199 static void io_region_del(MemoryListener *listener,
3200 MemoryRegionSection *section)
3202 isa_unassign_ioport(section->offset_within_address_space, section->size);
3205 static MemoryListener core_memory_listener = {
3206 .begin = core_begin,
3207 .log_global_start = core_log_global_start,
3208 .log_global_stop = core_log_global_stop,
3209 .priority = 1,
3212 static MemoryListener io_memory_listener = {
3213 .region_add = io_region_add,
3214 .region_del = io_region_del,
3215 .priority = 0,
3218 static MemoryListener tcg_memory_listener = {
3219 .commit = tcg_commit,
3222 void address_space_init_dispatch(AddressSpace *as)
3224 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3226 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3227 d->listener = (MemoryListener) {
3228 .begin = mem_begin,
3229 .region_add = mem_add,
3230 .region_nop = mem_add,
3231 .priority = 0,
3233 as->dispatch = d;
3234 memory_listener_register(&d->listener, as);
3237 static void memory_map_init(void)
3239 system_memory = g_malloc(sizeof(*system_memory));
3240 memory_region_init(system_memory, "system", INT64_MAX);
3241 address_space_init(&address_space_memory, system_memory);
3242 address_space_memory.name = "memory";
3244 system_io = g_malloc(sizeof(*system_io));
3245 memory_region_init(system_io, "io", 65536);
3246 address_space_init(&address_space_io, system_io);
3247 address_space_io.name = "I/O";
3249 memory_listener_register(&core_memory_listener, &address_space_memory);
3250 memory_listener_register(&io_memory_listener, &address_space_io);
3251 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3254 MemoryRegion *get_system_memory(void)
3256 return system_memory;
3259 MemoryRegion *get_system_io(void)
3261 return system_io;
3264 #endif /* !defined(CONFIG_USER_ONLY) */
3266 /* physical memory access (slow version, mainly for debug) */
3267 #if defined(CONFIG_USER_ONLY)
3268 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3269 uint8_t *buf, int len, int is_write)
3271 int l, flags;
3272 target_ulong page;
3273 void * p;
3275 while (len > 0) {
3276 page = addr & TARGET_PAGE_MASK;
3277 l = (page + TARGET_PAGE_SIZE) - addr;
3278 if (l > len)
3279 l = len;
3280 flags = page_get_flags(page);
3281 if (!(flags & PAGE_VALID))
3282 return -1;
3283 if (is_write) {
3284 if (!(flags & PAGE_WRITE))
3285 return -1;
3286 /* XXX: this code should not depend on lock_user */
3287 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3288 return -1;
3289 memcpy(p, buf, l);
3290 unlock_user(p, addr, l);
3291 } else {
3292 if (!(flags & PAGE_READ))
3293 return -1;
3294 /* XXX: this code should not depend on lock_user */
3295 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3296 return -1;
3297 memcpy(buf, p, l);
3298 unlock_user(p, addr, 0);
3300 len -= l;
3301 buf += l;
3302 addr += l;
3304 return 0;
3307 #else
3309 static void invalidate_and_set_dirty(target_phys_addr_t addr,
3310 target_phys_addr_t length)
3312 if (!cpu_physical_memory_is_dirty(addr)) {
3313 /* invalidate code */
3314 tb_invalidate_phys_page_range(addr, addr + length, 0);
3315 /* set dirty bit */
3316 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3318 xen_modified_memory(addr, length);
3321 void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
3322 int len, bool is_write)
3324 AddressSpaceDispatch *d = as->dispatch;
3325 int l;
3326 uint8_t *ptr;
3327 uint32_t val;
3328 target_phys_addr_t page;
3329 MemoryRegionSection *section;
3331 while (len > 0) {
3332 page = addr & TARGET_PAGE_MASK;
3333 l = (page + TARGET_PAGE_SIZE) - addr;
3334 if (l > len)
3335 l = len;
3336 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3338 if (is_write) {
3339 if (!memory_region_is_ram(section->mr)) {
3340 target_phys_addr_t addr1;
3341 addr1 = memory_region_section_addr(section, addr);
3342 /* XXX: could force cpu_single_env to NULL to avoid
3343 potential bugs */
3344 if (l >= 4 && ((addr1 & 3) == 0)) {
3345 /* 32 bit write access */
3346 val = ldl_p(buf);
3347 io_mem_write(section->mr, addr1, val, 4);
3348 l = 4;
3349 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3350 /* 16 bit write access */
3351 val = lduw_p(buf);
3352 io_mem_write(section->mr, addr1, val, 2);
3353 l = 2;
3354 } else {
3355 /* 8 bit write access */
3356 val = ldub_p(buf);
3357 io_mem_write(section->mr, addr1, val, 1);
3358 l = 1;
3360 } else if (!section->readonly) {
3361 ram_addr_t addr1;
3362 addr1 = memory_region_get_ram_addr(section->mr)
3363 + memory_region_section_addr(section, addr);
3364 /* RAM case */
3365 ptr = qemu_get_ram_ptr(addr1);
3366 memcpy(ptr, buf, l);
3367 invalidate_and_set_dirty(addr1, l);
3368 qemu_put_ram_ptr(ptr);
3370 } else {
3371 if (!(memory_region_is_ram(section->mr) ||
3372 memory_region_is_romd(section->mr))) {
3373 target_phys_addr_t addr1;
3374 /* I/O case */
3375 addr1 = memory_region_section_addr(section, addr);
3376 if (l >= 4 && ((addr1 & 3) == 0)) {
3377 /* 32 bit read access */
3378 val = io_mem_read(section->mr, addr1, 4);
3379 stl_p(buf, val);
3380 l = 4;
3381 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3382 /* 16 bit read access */
3383 val = io_mem_read(section->mr, addr1, 2);
3384 stw_p(buf, val);
3385 l = 2;
3386 } else {
3387 /* 8 bit read access */
3388 val = io_mem_read(section->mr, addr1, 1);
3389 stb_p(buf, val);
3390 l = 1;
3392 } else {
3393 /* RAM case */
3394 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3395 + memory_region_section_addr(section,
3396 addr));
3397 memcpy(buf, ptr, l);
3398 qemu_put_ram_ptr(ptr);
3401 len -= l;
3402 buf += l;
3403 addr += l;
3407 void address_space_write(AddressSpace *as, target_phys_addr_t addr,
3408 const uint8_t *buf, int len)
3410 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3414 * address_space_read: read from an address space.
3416 * @as: #AddressSpace to be accessed
3417 * @addr: address within that address space
3418 * @buf: buffer with the data transferred
3420 void address_space_read(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf, int len)
3422 address_space_rw(as, addr, buf, len, false);
3426 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3427 int len, int is_write)
3429 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3432 /* used for ROM loading : can write in RAM and ROM */
3433 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3434 const uint8_t *buf, int len)
3436 AddressSpaceDispatch *d = address_space_memory.dispatch;
3437 int l;
3438 uint8_t *ptr;
3439 target_phys_addr_t page;
3440 MemoryRegionSection *section;
3442 while (len > 0) {
3443 page = addr & TARGET_PAGE_MASK;
3444 l = (page + TARGET_PAGE_SIZE) - addr;
3445 if (l > len)
3446 l = len;
3447 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3449 if (!(memory_region_is_ram(section->mr) ||
3450 memory_region_is_romd(section->mr))) {
3451 /* do nothing */
3452 } else {
3453 unsigned long addr1;
3454 addr1 = memory_region_get_ram_addr(section->mr)
3455 + memory_region_section_addr(section, addr);
3456 /* ROM/RAM case */
3457 ptr = qemu_get_ram_ptr(addr1);
3458 memcpy(ptr, buf, l);
3459 invalidate_and_set_dirty(addr1, l);
3460 qemu_put_ram_ptr(ptr);
3462 len -= l;
3463 buf += l;
3464 addr += l;
3468 typedef struct {
3469 void *buffer;
3470 target_phys_addr_t addr;
3471 target_phys_addr_t len;
3472 } BounceBuffer;
3474 static BounceBuffer bounce;
3476 typedef struct MapClient {
3477 void *opaque;
3478 void (*callback)(void *opaque);
3479 QLIST_ENTRY(MapClient) link;
3480 } MapClient;
3482 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3483 = QLIST_HEAD_INITIALIZER(map_client_list);
3485 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3487 MapClient *client = g_malloc(sizeof(*client));
3489 client->opaque = opaque;
3490 client->callback = callback;
3491 QLIST_INSERT_HEAD(&map_client_list, client, link);
3492 return client;
3495 void cpu_unregister_map_client(void *_client)
3497 MapClient *client = (MapClient *)_client;
3499 QLIST_REMOVE(client, link);
3500 g_free(client);
3503 static void cpu_notify_map_clients(void)
3505 MapClient *client;
3507 while (!QLIST_EMPTY(&map_client_list)) {
3508 client = QLIST_FIRST(&map_client_list);
3509 client->callback(client->opaque);
3510 cpu_unregister_map_client(client);
3514 /* Map a physical memory region into a host virtual address.
3515 * May map a subset of the requested range, given by and returned in *plen.
3516 * May return NULL if resources needed to perform the mapping are exhausted.
3517 * Use only for reads OR writes - not for read-modify-write operations.
3518 * Use cpu_register_map_client() to know when retrying the map operation is
3519 * likely to succeed.
3521 void *address_space_map(AddressSpace *as,
3522 target_phys_addr_t addr,
3523 target_phys_addr_t *plen,
3524 bool is_write)
3526 AddressSpaceDispatch *d = as->dispatch;
3527 target_phys_addr_t len = *plen;
3528 target_phys_addr_t todo = 0;
3529 int l;
3530 target_phys_addr_t page;
3531 MemoryRegionSection *section;
3532 ram_addr_t raddr = RAM_ADDR_MAX;
3533 ram_addr_t rlen;
3534 void *ret;
3536 while (len > 0) {
3537 page = addr & TARGET_PAGE_MASK;
3538 l = (page + TARGET_PAGE_SIZE) - addr;
3539 if (l > len)
3540 l = len;
3541 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3543 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3544 if (todo || bounce.buffer) {
3545 break;
3547 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3548 bounce.addr = addr;
3549 bounce.len = l;
3550 if (!is_write) {
3551 address_space_read(as, addr, bounce.buffer, l);
3554 *plen = l;
3555 return bounce.buffer;
3557 if (!todo) {
3558 raddr = memory_region_get_ram_addr(section->mr)
3559 + memory_region_section_addr(section, addr);
3562 len -= l;
3563 addr += l;
3564 todo += l;
3566 rlen = todo;
3567 ret = qemu_ram_ptr_length(raddr, &rlen);
3568 *plen = rlen;
3569 return ret;
3572 /* Unmaps a memory region previously mapped by address_space_map().
3573 * Will also mark the memory as dirty if is_write == 1. access_len gives
3574 * the amount of memory that was actually read or written by the caller.
3576 void address_space_unmap(AddressSpace *as, void *buffer, target_phys_addr_t len,
3577 int is_write, target_phys_addr_t access_len)
3579 if (buffer != bounce.buffer) {
3580 if (is_write) {
3581 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3582 while (access_len) {
3583 unsigned l;
3584 l = TARGET_PAGE_SIZE;
3585 if (l > access_len)
3586 l = access_len;
3587 invalidate_and_set_dirty(addr1, l);
3588 addr1 += l;
3589 access_len -= l;
3592 if (xen_enabled()) {
3593 xen_invalidate_map_cache_entry(buffer);
3595 return;
3597 if (is_write) {
3598 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3600 qemu_vfree(bounce.buffer);
3601 bounce.buffer = NULL;
3602 cpu_notify_map_clients();
3605 void *cpu_physical_memory_map(target_phys_addr_t addr,
3606 target_phys_addr_t *plen,
3607 int is_write)
3609 return address_space_map(&address_space_memory, addr, plen, is_write);
3612 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
3613 int is_write, target_phys_addr_t access_len)
3615 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3618 /* warning: addr must be aligned */
3619 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
3620 enum device_endian endian)
3622 uint8_t *ptr;
3623 uint32_t val;
3624 MemoryRegionSection *section;
3626 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3628 if (!(memory_region_is_ram(section->mr) ||
3629 memory_region_is_romd(section->mr))) {
3630 /* I/O case */
3631 addr = memory_region_section_addr(section, addr);
3632 val = io_mem_read(section->mr, addr, 4);
3633 #if defined(TARGET_WORDS_BIGENDIAN)
3634 if (endian == DEVICE_LITTLE_ENDIAN) {
3635 val = bswap32(val);
3637 #else
3638 if (endian == DEVICE_BIG_ENDIAN) {
3639 val = bswap32(val);
3641 #endif
3642 } else {
3643 /* RAM case */
3644 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3645 & TARGET_PAGE_MASK)
3646 + memory_region_section_addr(section, addr));
3647 switch (endian) {
3648 case DEVICE_LITTLE_ENDIAN:
3649 val = ldl_le_p(ptr);
3650 break;
3651 case DEVICE_BIG_ENDIAN:
3652 val = ldl_be_p(ptr);
3653 break;
3654 default:
3655 val = ldl_p(ptr);
3656 break;
3659 return val;
3662 uint32_t ldl_phys(target_phys_addr_t addr)
3664 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3667 uint32_t ldl_le_phys(target_phys_addr_t addr)
3669 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3672 uint32_t ldl_be_phys(target_phys_addr_t addr)
3674 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3677 /* warning: addr must be aligned */
3678 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
3679 enum device_endian endian)
3681 uint8_t *ptr;
3682 uint64_t val;
3683 MemoryRegionSection *section;
3685 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3687 if (!(memory_region_is_ram(section->mr) ||
3688 memory_region_is_romd(section->mr))) {
3689 /* I/O case */
3690 addr = memory_region_section_addr(section, addr);
3692 /* XXX This is broken when device endian != cpu endian.
3693 Fix and add "endian" variable check */
3694 #ifdef TARGET_WORDS_BIGENDIAN
3695 val = io_mem_read(section->mr, addr, 4) << 32;
3696 val |= io_mem_read(section->mr, addr + 4, 4);
3697 #else
3698 val = io_mem_read(section->mr, addr, 4);
3699 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3700 #endif
3701 } else {
3702 /* RAM case */
3703 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3704 & TARGET_PAGE_MASK)
3705 + memory_region_section_addr(section, addr));
3706 switch (endian) {
3707 case DEVICE_LITTLE_ENDIAN:
3708 val = ldq_le_p(ptr);
3709 break;
3710 case DEVICE_BIG_ENDIAN:
3711 val = ldq_be_p(ptr);
3712 break;
3713 default:
3714 val = ldq_p(ptr);
3715 break;
3718 return val;
3721 uint64_t ldq_phys(target_phys_addr_t addr)
3723 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3726 uint64_t ldq_le_phys(target_phys_addr_t addr)
3728 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3731 uint64_t ldq_be_phys(target_phys_addr_t addr)
3733 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3736 /* XXX: optimize */
3737 uint32_t ldub_phys(target_phys_addr_t addr)
3739 uint8_t val;
3740 cpu_physical_memory_read(addr, &val, 1);
3741 return val;
3744 /* warning: addr must be aligned */
3745 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
3746 enum device_endian endian)
3748 uint8_t *ptr;
3749 uint64_t val;
3750 MemoryRegionSection *section;
3752 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3754 if (!(memory_region_is_ram(section->mr) ||
3755 memory_region_is_romd(section->mr))) {
3756 /* I/O case */
3757 addr = memory_region_section_addr(section, addr);
3758 val = io_mem_read(section->mr, addr, 2);
3759 #if defined(TARGET_WORDS_BIGENDIAN)
3760 if (endian == DEVICE_LITTLE_ENDIAN) {
3761 val = bswap16(val);
3763 #else
3764 if (endian == DEVICE_BIG_ENDIAN) {
3765 val = bswap16(val);
3767 #endif
3768 } else {
3769 /* RAM case */
3770 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3771 & TARGET_PAGE_MASK)
3772 + memory_region_section_addr(section, addr));
3773 switch (endian) {
3774 case DEVICE_LITTLE_ENDIAN:
3775 val = lduw_le_p(ptr);
3776 break;
3777 case DEVICE_BIG_ENDIAN:
3778 val = lduw_be_p(ptr);
3779 break;
3780 default:
3781 val = lduw_p(ptr);
3782 break;
3785 return val;
3788 uint32_t lduw_phys(target_phys_addr_t addr)
3790 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3793 uint32_t lduw_le_phys(target_phys_addr_t addr)
3795 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3798 uint32_t lduw_be_phys(target_phys_addr_t addr)
3800 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3803 /* warning: addr must be aligned. The ram page is not masked as dirty
3804 and the code inside is not invalidated. It is useful if the dirty
3805 bits are used to track modified PTEs */
3806 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
3808 uint8_t *ptr;
3809 MemoryRegionSection *section;
3811 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3813 if (!memory_region_is_ram(section->mr) || section->readonly) {
3814 addr = memory_region_section_addr(section, addr);
3815 if (memory_region_is_ram(section->mr)) {
3816 section = &phys_sections[phys_section_rom];
3818 io_mem_write(section->mr, addr, val, 4);
3819 } else {
3820 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3821 & TARGET_PAGE_MASK)
3822 + memory_region_section_addr(section, addr);
3823 ptr = qemu_get_ram_ptr(addr1);
3824 stl_p(ptr, val);
3826 if (unlikely(in_migration)) {
3827 if (!cpu_physical_memory_is_dirty(addr1)) {
3828 /* invalidate code */
3829 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3830 /* set dirty bit */
3831 cpu_physical_memory_set_dirty_flags(
3832 addr1, (0xff & ~CODE_DIRTY_FLAG));
3838 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
3840 uint8_t *ptr;
3841 MemoryRegionSection *section;
3843 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3845 if (!memory_region_is_ram(section->mr) || section->readonly) {
3846 addr = memory_region_section_addr(section, addr);
3847 if (memory_region_is_ram(section->mr)) {
3848 section = &phys_sections[phys_section_rom];
3850 #ifdef TARGET_WORDS_BIGENDIAN
3851 io_mem_write(section->mr, addr, val >> 32, 4);
3852 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3853 #else
3854 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3855 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3856 #endif
3857 } else {
3858 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3859 & TARGET_PAGE_MASK)
3860 + memory_region_section_addr(section, addr));
3861 stq_p(ptr, val);
3865 /* warning: addr must be aligned */
3866 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
3867 enum device_endian endian)
3869 uint8_t *ptr;
3870 MemoryRegionSection *section;
3872 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3874 if (!memory_region_is_ram(section->mr) || section->readonly) {
3875 addr = memory_region_section_addr(section, addr);
3876 if (memory_region_is_ram(section->mr)) {
3877 section = &phys_sections[phys_section_rom];
3879 #if defined(TARGET_WORDS_BIGENDIAN)
3880 if (endian == DEVICE_LITTLE_ENDIAN) {
3881 val = bswap32(val);
3883 #else
3884 if (endian == DEVICE_BIG_ENDIAN) {
3885 val = bswap32(val);
3887 #endif
3888 io_mem_write(section->mr, addr, val, 4);
3889 } else {
3890 unsigned long addr1;
3891 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3892 + memory_region_section_addr(section, addr);
3893 /* RAM case */
3894 ptr = qemu_get_ram_ptr(addr1);
3895 switch (endian) {
3896 case DEVICE_LITTLE_ENDIAN:
3897 stl_le_p(ptr, val);
3898 break;
3899 case DEVICE_BIG_ENDIAN:
3900 stl_be_p(ptr, val);
3901 break;
3902 default:
3903 stl_p(ptr, val);
3904 break;
3906 invalidate_and_set_dirty(addr1, 4);
3910 void stl_phys(target_phys_addr_t addr, uint32_t val)
3912 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3915 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
3917 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3920 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
3922 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3925 /* XXX: optimize */
3926 void stb_phys(target_phys_addr_t addr, uint32_t val)
3928 uint8_t v = val;
3929 cpu_physical_memory_write(addr, &v, 1);
3932 /* warning: addr must be aligned */
3933 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
3934 enum device_endian endian)
3936 uint8_t *ptr;
3937 MemoryRegionSection *section;
3939 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3941 if (!memory_region_is_ram(section->mr) || section->readonly) {
3942 addr = memory_region_section_addr(section, addr);
3943 if (memory_region_is_ram(section->mr)) {
3944 section = &phys_sections[phys_section_rom];
3946 #if defined(TARGET_WORDS_BIGENDIAN)
3947 if (endian == DEVICE_LITTLE_ENDIAN) {
3948 val = bswap16(val);
3950 #else
3951 if (endian == DEVICE_BIG_ENDIAN) {
3952 val = bswap16(val);
3954 #endif
3955 io_mem_write(section->mr, addr, val, 2);
3956 } else {
3957 unsigned long addr1;
3958 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3959 + memory_region_section_addr(section, addr);
3960 /* RAM case */
3961 ptr = qemu_get_ram_ptr(addr1);
3962 switch (endian) {
3963 case DEVICE_LITTLE_ENDIAN:
3964 stw_le_p(ptr, val);
3965 break;
3966 case DEVICE_BIG_ENDIAN:
3967 stw_be_p(ptr, val);
3968 break;
3969 default:
3970 stw_p(ptr, val);
3971 break;
3973 invalidate_and_set_dirty(addr1, 2);
3977 void stw_phys(target_phys_addr_t addr, uint32_t val)
3979 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3982 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
3984 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3987 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
3989 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3992 /* XXX: optimize */
3993 void stq_phys(target_phys_addr_t addr, uint64_t val)
3995 val = tswap64(val);
3996 cpu_physical_memory_write(addr, &val, 8);
3999 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4001 val = cpu_to_le64(val);
4002 cpu_physical_memory_write(addr, &val, 8);
4005 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4007 val = cpu_to_be64(val);
4008 cpu_physical_memory_write(addr, &val, 8);
4011 /* virtual memory access for debug (includes writing to ROM) */
4012 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4013 uint8_t *buf, int len, int is_write)
4015 int l;
4016 target_phys_addr_t phys_addr;
4017 target_ulong page;
4019 while (len > 0) {
4020 page = addr & TARGET_PAGE_MASK;
4021 phys_addr = cpu_get_phys_page_debug(env, page);
4022 /* if no physical page mapped, return an error */
4023 if (phys_addr == -1)
4024 return -1;
4025 l = (page + TARGET_PAGE_SIZE) - addr;
4026 if (l > len)
4027 l = len;
4028 phys_addr += (addr & ~TARGET_PAGE_MASK);
4029 if (is_write)
4030 cpu_physical_memory_write_rom(phys_addr, buf, l);
4031 else
4032 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4033 len -= l;
4034 buf += l;
4035 addr += l;
4037 return 0;
4039 #endif
4041 /* in deterministic execution mode, instructions doing device I/Os
4042 must be at the end of the TB */
4043 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4045 TranslationBlock *tb;
4046 uint32_t n, cflags;
4047 target_ulong pc, cs_base;
4048 uint64_t flags;
4050 tb = tb_find_pc(retaddr);
4051 if (!tb) {
4052 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4053 (void *)retaddr);
4055 n = env->icount_decr.u16.low + tb->icount;
4056 cpu_restore_state(tb, env, retaddr);
4057 /* Calculate how many instructions had been executed before the fault
4058 occurred. */
4059 n = n - env->icount_decr.u16.low;
4060 /* Generate a new TB ending on the I/O insn. */
4061 n++;
4062 /* On MIPS and SH, delay slot instructions can only be restarted if
4063 they were already the first instruction in the TB. If this is not
4064 the first instruction in a TB then re-execute the preceding
4065 branch. */
4066 #if defined(TARGET_MIPS)
4067 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4068 env->active_tc.PC -= 4;
4069 env->icount_decr.u16.low++;
4070 env->hflags &= ~MIPS_HFLAG_BMASK;
4072 #elif defined(TARGET_SH4)
4073 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4074 && n > 1) {
4075 env->pc -= 2;
4076 env->icount_decr.u16.low++;
4077 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4079 #endif
4080 /* This should never happen. */
4081 if (n > CF_COUNT_MASK)
4082 cpu_abort(env, "TB too big during recompile");
4084 cflags = n | CF_LAST_IO;
4085 pc = tb->pc;
4086 cs_base = tb->cs_base;
4087 flags = tb->flags;
4088 tb_phys_invalidate(tb, -1);
4089 /* FIXME: In theory this could raise an exception. In practice
4090 we have already translated the block once so it's probably ok. */
4091 tb_gen_code(env, pc, cs_base, flags, cflags);
4092 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4093 the first in the TB) then we end up generating a whole new TB and
4094 repeating the fault, which is horribly inefficient.
4095 Better would be to execute just this insn uncached, or generate a
4096 second new TB. */
4097 cpu_resume_from_signal(env, NULL);
4100 #if !defined(CONFIG_USER_ONLY)
4102 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4104 int i, target_code_size, max_target_code_size;
4105 int direct_jmp_count, direct_jmp2_count, cross_page;
4106 TranslationBlock *tb;
4108 target_code_size = 0;
4109 max_target_code_size = 0;
4110 cross_page = 0;
4111 direct_jmp_count = 0;
4112 direct_jmp2_count = 0;
4113 for(i = 0; i < nb_tbs; i++) {
4114 tb = &tbs[i];
4115 target_code_size += tb->size;
4116 if (tb->size > max_target_code_size)
4117 max_target_code_size = tb->size;
4118 if (tb->page_addr[1] != -1)
4119 cross_page++;
4120 if (tb->tb_next_offset[0] != 0xffff) {
4121 direct_jmp_count++;
4122 if (tb->tb_next_offset[1] != 0xffff) {
4123 direct_jmp2_count++;
4127 /* XXX: avoid using doubles ? */
4128 cpu_fprintf(f, "Translation buffer state:\n");
4129 cpu_fprintf(f, "gen code size %td/%ld\n",
4130 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4131 cpu_fprintf(f, "TB count %d/%d\n",
4132 nb_tbs, code_gen_max_blocks);
4133 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4134 nb_tbs ? target_code_size / nb_tbs : 0,
4135 max_target_code_size);
4136 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4137 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4138 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4139 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4140 cross_page,
4141 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4142 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4143 direct_jmp_count,
4144 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4145 direct_jmp2_count,
4146 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4147 cpu_fprintf(f, "\nStatistics:\n");
4148 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4149 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4150 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4151 tcg_dump_info(f, cpu_fprintf);
4155 * A helper function for the _utterly broken_ virtio device model to find out if
4156 * it's running on a big endian machine. Don't do this at home kids!
4158 bool virtio_is_big_endian(void);
4159 bool virtio_is_big_endian(void)
4161 #if defined(TARGET_WORDS_BIGENDIAN)
4162 return true;
4163 #else
4164 return false;
4165 #endif
4168 #endif
4170 #ifndef CONFIG_USER_ONLY
4171 bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
4173 MemoryRegionSection *section;
4175 section = phys_page_find(address_space_memory.dispatch,
4176 phys_addr >> TARGET_PAGE_BITS);
4178 return !(memory_region_is_ram(section->mr) ||
4179 memory_region_is_romd(section->mr));
4181 #endif