Issue warning when deprecated drive parameter boot=on|off is used
[qemu-kvm.git] / exec.c
blobb0ed5939e9bc017c3ce5d1eecdeb2034ff263f18
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #include "cputlb.h"
62 #include "memory-internal.h"
64 //#define DEBUG_TB_INVALIDATE
65 //#define DEBUG_FLUSH
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
79 #define SMC_BITMAP_USE_THRESHOLD 10
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88 uint8_t *code_gen_prologue;
89 static uint8_t *code_gen_buffer;
90 static size_t code_gen_buffer_size;
91 /* threshold to flush the translated code buffer */
92 static size_t code_gen_buffer_max_size;
93 static uint8_t *code_gen_ptr;
95 #if !defined(CONFIG_USER_ONLY)
96 int phys_ram_fd;
97 static int in_migration;
99 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
101 static MemoryRegion *system_memory;
102 static MemoryRegion *system_io;
104 AddressSpace address_space_io;
105 AddressSpace address_space_memory;
107 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108 static MemoryRegion io_mem_subpage_ram;
110 #endif
112 CPUArchState *first_cpu;
113 /* current CPU in the current thread. It is only valid inside
114 cpu_exec() */
115 DEFINE_TLS(CPUArchState *,cpu_single_env);
116 /* 0 = Do not count executed instructions.
117 1 = Precise instruction counting.
118 2 = Adaptive rate instruction counting. */
119 int use_icount = 0;
121 typedef struct PageDesc {
122 /* list of TBs intersecting this ram page */
123 TranslationBlock *first_tb;
124 /* in order to optimize self modifying code, we count the number
125 of lookups we do to a given page to use a bitmap */
126 unsigned int code_write_count;
127 uint8_t *code_bitmap;
128 #if defined(CONFIG_USER_ONLY)
129 unsigned long flags;
130 #endif
131 } PageDesc;
133 /* In system mode we want L1_MAP to be based on ram offsets,
134 while in user mode we want it to be based on virtual addresses. */
135 #if !defined(CONFIG_USER_ONLY)
136 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
138 #else
139 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
140 #endif
141 #else
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
143 #endif
145 /* Size of the L2 (and L3, etc) page tables. */
146 #define L2_BITS 10
147 #define L2_SIZE (1 << L2_BITS)
149 #define P_L2_LEVELS \
150 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
152 /* The bits remaining after N lower levels of page tables. */
153 #define V_L1_BITS_REM \
154 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
156 #if V_L1_BITS_REM < 4
157 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
158 #else
159 #define V_L1_BITS V_L1_BITS_REM
160 #endif
162 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
164 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
166 uintptr_t qemu_real_host_page_size;
167 uintptr_t qemu_host_page_size;
168 uintptr_t qemu_host_page_mask;
170 /* This is a multi-level map on the virtual address space.
171 The bottom level has pointers to PageDesc. */
172 static void *l1_map[V_L1_SIZE];
174 #if !defined(CONFIG_USER_ONLY)
176 static MemoryRegionSection *phys_sections;
177 static unsigned phys_sections_nb, phys_sections_nb_alloc;
178 static uint16_t phys_section_unassigned;
179 static uint16_t phys_section_notdirty;
180 static uint16_t phys_section_rom;
181 static uint16_t phys_section_watch;
183 /* Simple allocator for PhysPageEntry nodes */
184 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
187 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
189 static void io_mem_init(void);
190 static void memory_map_init(void);
192 static MemoryRegion io_mem_watch;
193 #endif
195 /* statistics */
196 static int tb_flush_count;
197 static int tb_phys_invalidate_count;
199 #ifdef _WIN32
200 static inline void map_exec(void *addr, long size)
202 DWORD old_protect;
203 VirtualProtect(addr, size,
204 PAGE_EXECUTE_READWRITE, &old_protect);
207 #else
208 static inline void map_exec(void *addr, long size)
210 unsigned long start, end, page_size;
212 page_size = getpagesize();
213 start = (unsigned long)addr;
214 start &= ~(page_size - 1);
216 end = (unsigned long)addr + size;
217 end += page_size - 1;
218 end &= ~(page_size - 1);
220 mprotect((void *)start, end - start,
221 PROT_READ | PROT_WRITE | PROT_EXEC);
223 #endif
225 static void page_init(void)
227 /* NOTE: we can always suppose that qemu_host_page_size >=
228 TARGET_PAGE_SIZE */
229 #ifdef _WIN32
231 SYSTEM_INFO system_info;
233 GetSystemInfo(&system_info);
234 qemu_real_host_page_size = system_info.dwPageSize;
236 #else
237 qemu_real_host_page_size = getpagesize();
238 #endif
239 if (qemu_host_page_size == 0)
240 qemu_host_page_size = qemu_real_host_page_size;
241 if (qemu_host_page_size < TARGET_PAGE_SIZE)
242 qemu_host_page_size = TARGET_PAGE_SIZE;
243 qemu_host_page_mask = ~(qemu_host_page_size - 1);
245 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
247 #ifdef HAVE_KINFO_GETVMMAP
248 struct kinfo_vmentry *freep;
249 int i, cnt;
251 freep = kinfo_getvmmap(getpid(), &cnt);
252 if (freep) {
253 mmap_lock();
254 for (i = 0; i < cnt; i++) {
255 unsigned long startaddr, endaddr;
257 startaddr = freep[i].kve_start;
258 endaddr = freep[i].kve_end;
259 if (h2g_valid(startaddr)) {
260 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
262 if (h2g_valid(endaddr)) {
263 endaddr = h2g(endaddr);
264 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
265 } else {
266 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
267 endaddr = ~0ul;
268 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
269 #endif
273 free(freep);
274 mmap_unlock();
276 #else
277 FILE *f;
279 last_brk = (unsigned long)sbrk(0);
281 f = fopen("/compat/linux/proc/self/maps", "r");
282 if (f) {
283 mmap_lock();
285 do {
286 unsigned long startaddr, endaddr;
287 int n;
289 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
291 if (n == 2 && h2g_valid(startaddr)) {
292 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
294 if (h2g_valid(endaddr)) {
295 endaddr = h2g(endaddr);
296 } else {
297 endaddr = ~0ul;
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
301 } while (!feof(f));
303 fclose(f);
304 mmap_unlock();
306 #endif
308 #endif
311 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
313 PageDesc *pd;
314 void **lp;
315 int i;
317 #if defined(CONFIG_USER_ONLY)
318 /* We can't use g_malloc because it may recurse into a locked mutex. */
319 # define ALLOC(P, SIZE) \
320 do { \
321 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
322 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
323 } while (0)
324 #else
325 # define ALLOC(P, SIZE) \
326 do { P = g_malloc0(SIZE); } while (0)
327 #endif
329 /* Level 1. Always allocated. */
330 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
332 /* Level 2..N-1. */
333 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
334 void **p = *lp;
336 if (p == NULL) {
337 if (!alloc) {
338 return NULL;
340 ALLOC(p, sizeof(void *) * L2_SIZE);
341 *lp = p;
344 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
347 pd = *lp;
348 if (pd == NULL) {
349 if (!alloc) {
350 return NULL;
352 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
353 *lp = pd;
356 #undef ALLOC
358 return pd + (index & (L2_SIZE - 1));
361 static inline PageDesc *page_find(tb_page_addr_t index)
363 return page_find_alloc(index, 0);
366 #if !defined(CONFIG_USER_ONLY)
368 static void phys_map_node_reserve(unsigned nodes)
370 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
371 typedef PhysPageEntry Node[L2_SIZE];
372 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
373 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
374 phys_map_nodes_nb + nodes);
375 phys_map_nodes = g_renew(Node, phys_map_nodes,
376 phys_map_nodes_nb_alloc);
380 static uint16_t phys_map_node_alloc(void)
382 unsigned i;
383 uint16_t ret;
385 ret = phys_map_nodes_nb++;
386 assert(ret != PHYS_MAP_NODE_NIL);
387 assert(ret != phys_map_nodes_nb_alloc);
388 for (i = 0; i < L2_SIZE; ++i) {
389 phys_map_nodes[ret][i].is_leaf = 0;
390 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
392 return ret;
395 static void phys_map_nodes_reset(void)
397 phys_map_nodes_nb = 0;
401 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
402 hwaddr *nb, uint16_t leaf,
403 int level)
405 PhysPageEntry *p;
406 int i;
407 hwaddr step = (hwaddr)1 << (level * L2_BITS);
409 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
410 lp->ptr = phys_map_node_alloc();
411 p = phys_map_nodes[lp->ptr];
412 if (level == 0) {
413 for (i = 0; i < L2_SIZE; i++) {
414 p[i].is_leaf = 1;
415 p[i].ptr = phys_section_unassigned;
418 } else {
419 p = phys_map_nodes[lp->ptr];
421 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
423 while (*nb && lp < &p[L2_SIZE]) {
424 if ((*index & (step - 1)) == 0 && *nb >= step) {
425 lp->is_leaf = true;
426 lp->ptr = leaf;
427 *index += step;
428 *nb -= step;
429 } else {
430 phys_page_set_level(lp, index, nb, leaf, level - 1);
432 ++lp;
436 static void phys_page_set(AddressSpaceDispatch *d,
437 hwaddr index, hwaddr nb,
438 uint16_t leaf)
440 /* Wildly overreserve - it doesn't matter much. */
441 phys_map_node_reserve(3 * P_L2_LEVELS);
443 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
446 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
448 PhysPageEntry lp = d->phys_map;
449 PhysPageEntry *p;
450 int i;
451 uint16_t s_index = phys_section_unassigned;
453 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
454 if (lp.ptr == PHYS_MAP_NODE_NIL) {
455 goto not_found;
457 p = phys_map_nodes[lp.ptr];
458 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
461 s_index = lp.ptr;
462 not_found:
463 return &phys_sections[s_index];
466 bool memory_region_is_unassigned(MemoryRegion *mr)
468 return mr != &io_mem_ram && mr != &io_mem_rom
469 && mr != &io_mem_notdirty && !mr->rom_device
470 && mr != &io_mem_watch;
473 #define mmap_lock() do { } while(0)
474 #define mmap_unlock() do { } while(0)
475 #endif
477 #if defined(CONFIG_USER_ONLY)
478 /* Currently it is not recommended to allocate big chunks of data in
479 user mode. It will change when a dedicated libc will be used. */
480 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
481 region in which the guest needs to run. Revisit this. */
482 #define USE_STATIC_CODE_GEN_BUFFER
483 #endif
485 /* ??? Should configure for this, not list operating systems here. */
486 #if (defined(__linux__) \
487 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
488 || defined(__DragonFly__) || defined(__OpenBSD__) \
489 || defined(__NetBSD__))
490 # define USE_MMAP
491 #endif
493 /* Minimum size of the code gen buffer. This number is randomly chosen,
494 but not so small that we can't have a fair number of TB's live. */
495 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
497 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
498 indicated, this is constrained by the range of direct branches on the
499 host cpu, as used by the TCG implementation of goto_tb. */
500 #if defined(__x86_64__)
501 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
502 #elif defined(__sparc__)
503 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
504 #elif defined(__arm__)
505 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
506 #elif defined(__s390x__)
507 /* We have a +- 4GB range on the branches; leave some slop. */
508 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
509 #else
510 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
511 #endif
513 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
515 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
516 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
517 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
519 static inline size_t size_code_gen_buffer(size_t tb_size)
521 /* Size the buffer. */
522 if (tb_size == 0) {
523 #ifdef USE_STATIC_CODE_GEN_BUFFER
524 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
525 #else
526 /* ??? Needs adjustments. */
527 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
528 static buffer, we could size this on RESERVED_VA, on the text
529 segment size of the executable, or continue to use the default. */
530 tb_size = (unsigned long)(ram_size / 4);
531 #endif
533 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
534 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
536 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
537 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
539 code_gen_buffer_size = tb_size;
540 return tb_size;
543 #ifdef USE_STATIC_CODE_GEN_BUFFER
544 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
545 __attribute__((aligned(CODE_GEN_ALIGN)));
547 static inline void *alloc_code_gen_buffer(void)
549 map_exec(static_code_gen_buffer, code_gen_buffer_size);
550 return static_code_gen_buffer;
552 #elif defined(USE_MMAP)
553 static inline void *alloc_code_gen_buffer(void)
555 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
556 uintptr_t start = 0;
557 void *buf;
559 /* Constrain the position of the buffer based on the host cpu.
560 Note that these addresses are chosen in concert with the
561 addresses assigned in the relevant linker script file. */
562 # if defined(__PIE__) || defined(__PIC__)
563 /* Don't bother setting a preferred location if we're building
564 a position-independent executable. We're more likely to get
565 an address near the main executable if we let the kernel
566 choose the address. */
567 # elif defined(__x86_64__) && defined(MAP_32BIT)
568 /* Force the memory down into low memory with the executable.
569 Leave the choice of exact location with the kernel. */
570 flags |= MAP_32BIT;
571 /* Cannot expect to map more than 800MB in low memory. */
572 if (code_gen_buffer_size > 800u * 1024 * 1024) {
573 code_gen_buffer_size = 800u * 1024 * 1024;
575 # elif defined(__sparc__)
576 start = 0x40000000ul;
577 # elif defined(__s390x__)
578 start = 0x90000000ul;
579 # endif
581 buf = mmap((void *)start, code_gen_buffer_size,
582 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
583 return buf == MAP_FAILED ? NULL : buf;
585 #else
586 static inline void *alloc_code_gen_buffer(void)
588 void *buf = g_malloc(code_gen_buffer_size);
589 if (buf) {
590 map_exec(buf, code_gen_buffer_size);
592 return buf;
594 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
596 static inline void code_gen_alloc(size_t tb_size)
598 code_gen_buffer_size = size_code_gen_buffer(tb_size);
599 code_gen_buffer = alloc_code_gen_buffer();
600 if (code_gen_buffer == NULL) {
601 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
602 exit(1);
605 /* Steal room for the prologue at the end of the buffer. This ensures
606 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
607 from TB's to the prologue are going to be in range. It also means
608 that we don't need to mark (additional) portions of the data segment
609 as executable. */
610 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
611 code_gen_buffer_size -= 1024;
613 code_gen_buffer_max_size = code_gen_buffer_size -
614 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
615 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
616 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
619 /* Must be called before using the QEMU cpus. 'tb_size' is the size
620 (in bytes) allocated to the translation buffer. Zero means default
621 size. */
622 void tcg_exec_init(unsigned long tb_size)
624 cpu_gen_init();
625 code_gen_alloc(tb_size);
626 code_gen_ptr = code_gen_buffer;
627 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
628 page_init();
629 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
630 /* There's no guest base to take into account, so go ahead and
631 initialize the prologue now. */
632 tcg_prologue_init(&tcg_ctx);
633 #endif
636 bool tcg_enabled(void)
638 return code_gen_buffer != NULL;
641 void cpu_exec_init_all(void)
643 #if !defined(CONFIG_USER_ONLY)
644 memory_map_init();
645 io_mem_init();
646 #endif
649 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
651 static int cpu_common_post_load(void *opaque, int version_id)
653 CPUArchState *env = opaque;
655 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
656 version_id is increased. */
657 env->interrupt_request &= ~0x01;
658 tlb_flush(env, 1);
660 return 0;
663 static const VMStateDescription vmstate_cpu_common = {
664 .name = "cpu_common",
665 .version_id = 1,
666 .minimum_version_id = 1,
667 .minimum_version_id_old = 1,
668 .post_load = cpu_common_post_load,
669 .fields = (VMStateField []) {
670 VMSTATE_UINT32(halted, CPUArchState),
671 VMSTATE_UINT32(interrupt_request, CPUArchState),
672 VMSTATE_END_OF_LIST()
675 #endif
677 CPUArchState *qemu_get_cpu(int cpu)
679 CPUArchState *env = first_cpu;
681 while (env) {
682 if (env->cpu_index == cpu)
683 break;
684 env = env->next_cpu;
687 return env;
690 void cpu_exec_init(CPUArchState *env)
692 CPUArchState **penv;
693 int cpu_index;
695 #if defined(CONFIG_USER_ONLY)
696 cpu_list_lock();
697 #endif
698 env->next_cpu = NULL;
699 penv = &first_cpu;
700 cpu_index = 0;
701 while (*penv != NULL) {
702 penv = &(*penv)->next_cpu;
703 cpu_index++;
705 env->cpu_index = cpu_index;
706 env->numa_node = 0;
707 QTAILQ_INIT(&env->breakpoints);
708 QTAILQ_INIT(&env->watchpoints);
709 #ifndef CONFIG_USER_ONLY
710 env->thread_id = qemu_get_thread_id();
711 #endif
712 *penv = env;
713 #if defined(CONFIG_USER_ONLY)
714 cpu_list_unlock();
715 #endif
716 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
717 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
718 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
719 cpu_save, cpu_load, env);
720 #endif
723 /* Allocate a new translation block. Flush the translation buffer if
724 too many translation blocks or too much generated code. */
725 static TranslationBlock *tb_alloc(target_ulong pc)
727 TranslationBlock *tb;
729 if (nb_tbs >= code_gen_max_blocks ||
730 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
731 return NULL;
732 tb = &tbs[nb_tbs++];
733 tb->pc = pc;
734 tb->cflags = 0;
735 return tb;
738 void tb_free(TranslationBlock *tb)
740 /* In practice this is mostly used for single use temporary TB
741 Ignore the hard cases and just back up if this TB happens to
742 be the last one generated. */
743 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
744 code_gen_ptr = tb->tc_ptr;
745 nb_tbs--;
749 static inline void invalidate_page_bitmap(PageDesc *p)
751 if (p->code_bitmap) {
752 g_free(p->code_bitmap);
753 p->code_bitmap = NULL;
755 p->code_write_count = 0;
758 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
760 static void page_flush_tb_1 (int level, void **lp)
762 int i;
764 if (*lp == NULL) {
765 return;
767 if (level == 0) {
768 PageDesc *pd = *lp;
769 for (i = 0; i < L2_SIZE; ++i) {
770 pd[i].first_tb = NULL;
771 invalidate_page_bitmap(pd + i);
773 } else {
774 void **pp = *lp;
775 for (i = 0; i < L2_SIZE; ++i) {
776 page_flush_tb_1 (level - 1, pp + i);
781 static void page_flush_tb(void)
783 int i;
784 for (i = 0; i < V_L1_SIZE; i++) {
785 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
789 /* flush all the translation blocks */
790 /* XXX: tb_flush is currently not thread safe */
791 void tb_flush(CPUArchState *env1)
793 CPUArchState *env;
794 #if defined(DEBUG_FLUSH)
795 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
796 (unsigned long)(code_gen_ptr - code_gen_buffer),
797 nb_tbs, nb_tbs > 0 ?
798 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
799 #endif
800 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
801 cpu_abort(env1, "Internal error: code buffer overflow\n");
803 nb_tbs = 0;
805 for(env = first_cpu; env != NULL; env = env->next_cpu) {
806 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
809 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
810 page_flush_tb();
812 code_gen_ptr = code_gen_buffer;
813 /* XXX: flush processor icache at this point if cache flush is
814 expensive */
815 tb_flush_count++;
818 #ifdef DEBUG_TB_CHECK
820 static void tb_invalidate_check(target_ulong address)
822 TranslationBlock *tb;
823 int i;
824 address &= TARGET_PAGE_MASK;
825 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
826 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
827 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
828 address >= tb->pc + tb->size)) {
829 printf("ERROR invalidate: address=" TARGET_FMT_lx
830 " PC=%08lx size=%04x\n",
831 address, (long)tb->pc, tb->size);
837 /* verify that all the pages have correct rights for code */
838 static void tb_page_check(void)
840 TranslationBlock *tb;
841 int i, flags1, flags2;
843 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
844 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
845 flags1 = page_get_flags(tb->pc);
846 flags2 = page_get_flags(tb->pc + tb->size - 1);
847 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
848 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
849 (long)tb->pc, tb->size, flags1, flags2);
855 #endif
857 /* invalidate one TB */
858 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
859 int next_offset)
861 TranslationBlock *tb1;
862 for(;;) {
863 tb1 = *ptb;
864 if (tb1 == tb) {
865 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
866 break;
868 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
872 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
874 TranslationBlock *tb1;
875 unsigned int n1;
877 for(;;) {
878 tb1 = *ptb;
879 n1 = (uintptr_t)tb1 & 3;
880 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
881 if (tb1 == tb) {
882 *ptb = tb1->page_next[n1];
883 break;
885 ptb = &tb1->page_next[n1];
889 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
891 TranslationBlock *tb1, **ptb;
892 unsigned int n1;
894 ptb = &tb->jmp_next[n];
895 tb1 = *ptb;
896 if (tb1) {
897 /* find tb(n) in circular list */
898 for(;;) {
899 tb1 = *ptb;
900 n1 = (uintptr_t)tb1 & 3;
901 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
902 if (n1 == n && tb1 == tb)
903 break;
904 if (n1 == 2) {
905 ptb = &tb1->jmp_first;
906 } else {
907 ptb = &tb1->jmp_next[n1];
910 /* now we can suppress tb(n) from the list */
911 *ptb = tb->jmp_next[n];
913 tb->jmp_next[n] = NULL;
917 /* reset the jump entry 'n' of a TB so that it is not chained to
918 another TB */
919 static inline void tb_reset_jump(TranslationBlock *tb, int n)
921 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
924 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
926 CPUArchState *env;
927 PageDesc *p;
928 unsigned int h, n1;
929 tb_page_addr_t phys_pc;
930 TranslationBlock *tb1, *tb2;
932 /* remove the TB from the hash list */
933 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
934 h = tb_phys_hash_func(phys_pc);
935 tb_remove(&tb_phys_hash[h], tb,
936 offsetof(TranslationBlock, phys_hash_next));
938 /* remove the TB from the page list */
939 if (tb->page_addr[0] != page_addr) {
940 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
941 tb_page_remove(&p->first_tb, tb);
942 invalidate_page_bitmap(p);
944 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
945 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
946 tb_page_remove(&p->first_tb, tb);
947 invalidate_page_bitmap(p);
950 tb_invalidated_flag = 1;
952 /* remove the TB from the hash list */
953 h = tb_jmp_cache_hash_func(tb->pc);
954 for(env = first_cpu; env != NULL; env = env->next_cpu) {
955 if (env->tb_jmp_cache[h] == tb)
956 env->tb_jmp_cache[h] = NULL;
959 /* suppress this TB from the two jump lists */
960 tb_jmp_remove(tb, 0);
961 tb_jmp_remove(tb, 1);
963 /* suppress any remaining jumps to this TB */
964 tb1 = tb->jmp_first;
965 for(;;) {
966 n1 = (uintptr_t)tb1 & 3;
967 if (n1 == 2)
968 break;
969 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
970 tb2 = tb1->jmp_next[n1];
971 tb_reset_jump(tb1, n1);
972 tb1->jmp_next[n1] = NULL;
973 tb1 = tb2;
975 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
977 tb_phys_invalidate_count++;
980 static inline void set_bits(uint8_t *tab, int start, int len)
982 int end, mask, end1;
984 end = start + len;
985 tab += start >> 3;
986 mask = 0xff << (start & 7);
987 if ((start & ~7) == (end & ~7)) {
988 if (start < end) {
989 mask &= ~(0xff << (end & 7));
990 *tab |= mask;
992 } else {
993 *tab++ |= mask;
994 start = (start + 8) & ~7;
995 end1 = end & ~7;
996 while (start < end1) {
997 *tab++ = 0xff;
998 start += 8;
1000 if (start < end) {
1001 mask = ~(0xff << (end & 7));
1002 *tab |= mask;
1007 static void build_page_bitmap(PageDesc *p)
1009 int n, tb_start, tb_end;
1010 TranslationBlock *tb;
1012 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1014 tb = p->first_tb;
1015 while (tb != NULL) {
1016 n = (uintptr_t)tb & 3;
1017 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1018 /* NOTE: this is subtle as a TB may span two physical pages */
1019 if (n == 0) {
1020 /* NOTE: tb_end may be after the end of the page, but
1021 it is not a problem */
1022 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1023 tb_end = tb_start + tb->size;
1024 if (tb_end > TARGET_PAGE_SIZE)
1025 tb_end = TARGET_PAGE_SIZE;
1026 } else {
1027 tb_start = 0;
1028 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1030 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1031 tb = tb->page_next[n];
1035 TranslationBlock *tb_gen_code(CPUArchState *env,
1036 target_ulong pc, target_ulong cs_base,
1037 int flags, int cflags)
1039 TranslationBlock *tb;
1040 uint8_t *tc_ptr;
1041 tb_page_addr_t phys_pc, phys_page2;
1042 target_ulong virt_page2;
1043 int code_gen_size;
1045 phys_pc = get_page_addr_code(env, pc);
1046 tb = tb_alloc(pc);
1047 if (!tb) {
1048 /* flush must be done */
1049 tb_flush(env);
1050 /* cannot fail at this point */
1051 tb = tb_alloc(pc);
1052 /* Don't forget to invalidate previous TB info. */
1053 tb_invalidated_flag = 1;
1055 tc_ptr = code_gen_ptr;
1056 tb->tc_ptr = tc_ptr;
1057 tb->cs_base = cs_base;
1058 tb->flags = flags;
1059 tb->cflags = cflags;
1060 cpu_gen_code(env, tb, &code_gen_size);
1061 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1062 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1064 /* check next page if needed */
1065 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1066 phys_page2 = -1;
1067 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1068 phys_page2 = get_page_addr_code(env, virt_page2);
1070 tb_link_page(tb, phys_pc, phys_page2);
1071 return tb;
1075 * Invalidate all TBs which intersect with the target physical address range
1076 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1077 * 'is_cpu_write_access' should be true if called from a real cpu write
1078 * access: the virtual CPU will exit the current TB if code is modified inside
1079 * this TB.
1081 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1082 int is_cpu_write_access)
1084 while (start < end) {
1085 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1086 start &= TARGET_PAGE_MASK;
1087 start += TARGET_PAGE_SIZE;
1092 * Invalidate all TBs which intersect with the target physical address range
1093 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1094 * 'is_cpu_write_access' should be true if called from a real cpu write
1095 * access: the virtual CPU will exit the current TB if code is modified inside
1096 * this TB.
1098 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1099 int is_cpu_write_access)
1101 TranslationBlock *tb, *tb_next, *saved_tb;
1102 CPUArchState *env = cpu_single_env;
1103 tb_page_addr_t tb_start, tb_end;
1104 PageDesc *p;
1105 int n;
1106 #ifdef TARGET_HAS_PRECISE_SMC
1107 int current_tb_not_found = is_cpu_write_access;
1108 TranslationBlock *current_tb = NULL;
1109 int current_tb_modified = 0;
1110 target_ulong current_pc = 0;
1111 target_ulong current_cs_base = 0;
1112 int current_flags = 0;
1113 #endif /* TARGET_HAS_PRECISE_SMC */
1115 p = page_find(start >> TARGET_PAGE_BITS);
1116 if (!p)
1117 return;
1118 if (!p->code_bitmap &&
1119 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1120 is_cpu_write_access) {
1121 /* build code bitmap */
1122 build_page_bitmap(p);
1125 /* we remove all the TBs in the range [start, end[ */
1126 /* XXX: see if in some cases it could be faster to invalidate all the code */
1127 tb = p->first_tb;
1128 while (tb != NULL) {
1129 n = (uintptr_t)tb & 3;
1130 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1131 tb_next = tb->page_next[n];
1132 /* NOTE: this is subtle as a TB may span two physical pages */
1133 if (n == 0) {
1134 /* NOTE: tb_end may be after the end of the page, but
1135 it is not a problem */
1136 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1137 tb_end = tb_start + tb->size;
1138 } else {
1139 tb_start = tb->page_addr[1];
1140 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1142 if (!(tb_end <= start || tb_start >= end)) {
1143 #ifdef TARGET_HAS_PRECISE_SMC
1144 if (current_tb_not_found) {
1145 current_tb_not_found = 0;
1146 current_tb = NULL;
1147 if (env->mem_io_pc) {
1148 /* now we have a real cpu fault */
1149 current_tb = tb_find_pc(env->mem_io_pc);
1152 if (current_tb == tb &&
1153 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1154 /* If we are modifying the current TB, we must stop
1155 its execution. We could be more precise by checking
1156 that the modification is after the current PC, but it
1157 would require a specialized function to partially
1158 restore the CPU state */
1160 current_tb_modified = 1;
1161 cpu_restore_state(current_tb, env, env->mem_io_pc);
1162 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1163 &current_flags);
1165 #endif /* TARGET_HAS_PRECISE_SMC */
1166 /* we need to do that to handle the case where a signal
1167 occurs while doing tb_phys_invalidate() */
1168 saved_tb = NULL;
1169 if (env) {
1170 saved_tb = env->current_tb;
1171 env->current_tb = NULL;
1173 tb_phys_invalidate(tb, -1);
1174 if (env) {
1175 env->current_tb = saved_tb;
1176 if (env->interrupt_request && env->current_tb)
1177 cpu_interrupt(env, env->interrupt_request);
1180 tb = tb_next;
1182 #if !defined(CONFIG_USER_ONLY)
1183 /* if no code remaining, no need to continue to use slow writes */
1184 if (!p->first_tb) {
1185 invalidate_page_bitmap(p);
1186 if (is_cpu_write_access) {
1187 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1190 #endif
1191 #ifdef TARGET_HAS_PRECISE_SMC
1192 if (current_tb_modified) {
1193 /* we generate a block containing just the instruction
1194 modifying the memory. It will ensure that it cannot modify
1195 itself */
1196 env->current_tb = NULL;
1197 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1198 cpu_resume_from_signal(env, NULL);
1200 #endif
1203 /* len must be <= 8 and start must be a multiple of len */
1204 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1206 PageDesc *p;
1207 int offset, b;
1208 #if 0
1209 if (1) {
1210 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1211 cpu_single_env->mem_io_vaddr, len,
1212 cpu_single_env->eip,
1213 cpu_single_env->eip +
1214 (intptr_t)cpu_single_env->segs[R_CS].base);
1216 #endif
1217 p = page_find(start >> TARGET_PAGE_BITS);
1218 if (!p)
1219 return;
1220 if (p->code_bitmap) {
1221 offset = start & ~TARGET_PAGE_MASK;
1222 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1223 if (b & ((1 << len) - 1))
1224 goto do_invalidate;
1225 } else {
1226 do_invalidate:
1227 tb_invalidate_phys_page_range(start, start + len, 1);
1231 #if !defined(CONFIG_SOFTMMU)
1232 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1233 uintptr_t pc, void *puc)
1235 TranslationBlock *tb;
1236 PageDesc *p;
1237 int n;
1238 #ifdef TARGET_HAS_PRECISE_SMC
1239 TranslationBlock *current_tb = NULL;
1240 CPUArchState *env = cpu_single_env;
1241 int current_tb_modified = 0;
1242 target_ulong current_pc = 0;
1243 target_ulong current_cs_base = 0;
1244 int current_flags = 0;
1245 #endif
1247 addr &= TARGET_PAGE_MASK;
1248 p = page_find(addr >> TARGET_PAGE_BITS);
1249 if (!p)
1250 return;
1251 tb = p->first_tb;
1252 #ifdef TARGET_HAS_PRECISE_SMC
1253 if (tb && pc != 0) {
1254 current_tb = tb_find_pc(pc);
1256 #endif
1257 while (tb != NULL) {
1258 n = (uintptr_t)tb & 3;
1259 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1260 #ifdef TARGET_HAS_PRECISE_SMC
1261 if (current_tb == tb &&
1262 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1263 /* If we are modifying the current TB, we must stop
1264 its execution. We could be more precise by checking
1265 that the modification is after the current PC, but it
1266 would require a specialized function to partially
1267 restore the CPU state */
1269 current_tb_modified = 1;
1270 cpu_restore_state(current_tb, env, pc);
1271 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1272 &current_flags);
1274 #endif /* TARGET_HAS_PRECISE_SMC */
1275 tb_phys_invalidate(tb, addr);
1276 tb = tb->page_next[n];
1278 p->first_tb = NULL;
1279 #ifdef TARGET_HAS_PRECISE_SMC
1280 if (current_tb_modified) {
1281 /* we generate a block containing just the instruction
1282 modifying the memory. It will ensure that it cannot modify
1283 itself */
1284 env->current_tb = NULL;
1285 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1286 cpu_resume_from_signal(env, puc);
1288 #endif
1290 #endif
1292 /* add the tb in the target page and protect it if necessary */
1293 static inline void tb_alloc_page(TranslationBlock *tb,
1294 unsigned int n, tb_page_addr_t page_addr)
1296 PageDesc *p;
1297 #ifndef CONFIG_USER_ONLY
1298 bool page_already_protected;
1299 #endif
1301 tb->page_addr[n] = page_addr;
1302 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1303 tb->page_next[n] = p->first_tb;
1304 #ifndef CONFIG_USER_ONLY
1305 page_already_protected = p->first_tb != NULL;
1306 #endif
1307 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1308 invalidate_page_bitmap(p);
1310 #if defined(TARGET_HAS_SMC) || 1
1312 #if defined(CONFIG_USER_ONLY)
1313 if (p->flags & PAGE_WRITE) {
1314 target_ulong addr;
1315 PageDesc *p2;
1316 int prot;
1318 /* force the host page as non writable (writes will have a
1319 page fault + mprotect overhead) */
1320 page_addr &= qemu_host_page_mask;
1321 prot = 0;
1322 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1323 addr += TARGET_PAGE_SIZE) {
1325 p2 = page_find (addr >> TARGET_PAGE_BITS);
1326 if (!p2)
1327 continue;
1328 prot |= p2->flags;
1329 p2->flags &= ~PAGE_WRITE;
1331 mprotect(g2h(page_addr), qemu_host_page_size,
1332 (prot & PAGE_BITS) & ~PAGE_WRITE);
1333 #ifdef DEBUG_TB_INVALIDATE
1334 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1335 page_addr);
1336 #endif
1338 #else
1339 /* if some code is already present, then the pages are already
1340 protected. So we handle the case where only the first TB is
1341 allocated in a physical page */
1342 if (!page_already_protected) {
1343 tlb_protect_code(page_addr);
1345 #endif
1347 #endif /* TARGET_HAS_SMC */
1350 /* add a new TB and link it to the physical page tables. phys_page2 is
1351 (-1) to indicate that only one page contains the TB. */
1352 void tb_link_page(TranslationBlock *tb,
1353 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1355 unsigned int h;
1356 TranslationBlock **ptb;
1358 /* Grab the mmap lock to stop another thread invalidating this TB
1359 before we are done. */
1360 mmap_lock();
1361 /* add in the physical hash table */
1362 h = tb_phys_hash_func(phys_pc);
1363 ptb = &tb_phys_hash[h];
1364 tb->phys_hash_next = *ptb;
1365 *ptb = tb;
1367 /* add in the page list */
1368 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1369 if (phys_page2 != -1)
1370 tb_alloc_page(tb, 1, phys_page2);
1371 else
1372 tb->page_addr[1] = -1;
1374 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1375 tb->jmp_next[0] = NULL;
1376 tb->jmp_next[1] = NULL;
1378 /* init original jump addresses */
1379 if (tb->tb_next_offset[0] != 0xffff)
1380 tb_reset_jump(tb, 0);
1381 if (tb->tb_next_offset[1] != 0xffff)
1382 tb_reset_jump(tb, 1);
1384 #ifdef DEBUG_TB_CHECK
1385 tb_page_check();
1386 #endif
1387 mmap_unlock();
1390 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1391 tb[1].tc_ptr. Return NULL if not found */
1392 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1394 int m_min, m_max, m;
1395 uintptr_t v;
1396 TranslationBlock *tb;
1398 if (nb_tbs <= 0)
1399 return NULL;
1400 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1401 tc_ptr >= (uintptr_t)code_gen_ptr) {
1402 return NULL;
1404 /* binary search (cf Knuth) */
1405 m_min = 0;
1406 m_max = nb_tbs - 1;
1407 while (m_min <= m_max) {
1408 m = (m_min + m_max) >> 1;
1409 tb = &tbs[m];
1410 v = (uintptr_t)tb->tc_ptr;
1411 if (v == tc_ptr)
1412 return tb;
1413 else if (tc_ptr < v) {
1414 m_max = m - 1;
1415 } else {
1416 m_min = m + 1;
1419 return &tbs[m_max];
1422 static void tb_reset_jump_recursive(TranslationBlock *tb);
1424 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1426 TranslationBlock *tb1, *tb_next, **ptb;
1427 unsigned int n1;
1429 tb1 = tb->jmp_next[n];
1430 if (tb1 != NULL) {
1431 /* find head of list */
1432 for(;;) {
1433 n1 = (uintptr_t)tb1 & 3;
1434 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1435 if (n1 == 2)
1436 break;
1437 tb1 = tb1->jmp_next[n1];
1439 /* we are now sure now that tb jumps to tb1 */
1440 tb_next = tb1;
1442 /* remove tb from the jmp_first list */
1443 ptb = &tb_next->jmp_first;
1444 for(;;) {
1445 tb1 = *ptb;
1446 n1 = (uintptr_t)tb1 & 3;
1447 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1448 if (n1 == n && tb1 == tb)
1449 break;
1450 ptb = &tb1->jmp_next[n1];
1452 *ptb = tb->jmp_next[n];
1453 tb->jmp_next[n] = NULL;
1455 /* suppress the jump to next tb in generated code */
1456 tb_reset_jump(tb, n);
1458 /* suppress jumps in the tb on which we could have jumped */
1459 tb_reset_jump_recursive(tb_next);
1463 static void tb_reset_jump_recursive(TranslationBlock *tb)
1465 tb_reset_jump_recursive2(tb, 0);
1466 tb_reset_jump_recursive2(tb, 1);
1469 #if defined(TARGET_HAS_ICE)
1470 #if defined(CONFIG_USER_ONLY)
1471 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1473 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1475 #else
1476 void tb_invalidate_phys_addr(hwaddr addr)
1478 ram_addr_t ram_addr;
1479 MemoryRegionSection *section;
1481 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1482 if (!(memory_region_is_ram(section->mr)
1483 || (section->mr->rom_device && section->mr->readable))) {
1484 return;
1486 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1487 + memory_region_section_addr(section, addr);
1488 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1491 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1493 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1494 (pc & ~TARGET_PAGE_MASK));
1496 #endif
1497 #endif /* TARGET_HAS_ICE */
1499 #if defined(CONFIG_USER_ONLY)
1500 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1505 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1506 int flags, CPUWatchpoint **watchpoint)
1508 return -ENOSYS;
1510 #else
1511 /* Add a watchpoint. */
1512 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1513 int flags, CPUWatchpoint **watchpoint)
1515 target_ulong len_mask = ~(len - 1);
1516 CPUWatchpoint *wp;
1518 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1519 if ((len & (len - 1)) || (addr & ~len_mask) ||
1520 len == 0 || len > TARGET_PAGE_SIZE) {
1521 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1522 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1523 return -EINVAL;
1525 wp = g_malloc(sizeof(*wp));
1527 wp->vaddr = addr;
1528 wp->len_mask = len_mask;
1529 wp->flags = flags;
1531 /* keep all GDB-injected watchpoints in front */
1532 if (flags & BP_GDB)
1533 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1534 else
1535 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1537 tlb_flush_page(env, addr);
1539 if (watchpoint)
1540 *watchpoint = wp;
1541 return 0;
1544 /* Remove a specific watchpoint. */
1545 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1546 int flags)
1548 target_ulong len_mask = ~(len - 1);
1549 CPUWatchpoint *wp;
1551 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1552 if (addr == wp->vaddr && len_mask == wp->len_mask
1553 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1554 cpu_watchpoint_remove_by_ref(env, wp);
1555 return 0;
1558 return -ENOENT;
1561 /* Remove a specific watchpoint by reference. */
1562 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1564 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1566 tlb_flush_page(env, watchpoint->vaddr);
1568 g_free(watchpoint);
1571 /* Remove all matching watchpoints. */
1572 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1574 CPUWatchpoint *wp, *next;
1576 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1577 if (wp->flags & mask)
1578 cpu_watchpoint_remove_by_ref(env, wp);
1581 #endif
1583 /* Add a breakpoint. */
1584 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1585 CPUBreakpoint **breakpoint)
1587 #if defined(TARGET_HAS_ICE)
1588 CPUBreakpoint *bp;
1590 bp = g_malloc(sizeof(*bp));
1592 bp->pc = pc;
1593 bp->flags = flags;
1595 /* keep all GDB-injected breakpoints in front */
1596 if (flags & BP_GDB)
1597 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1598 else
1599 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1601 breakpoint_invalidate(env, pc);
1603 if (breakpoint)
1604 *breakpoint = bp;
1605 return 0;
1606 #else
1607 return -ENOSYS;
1608 #endif
1611 /* Remove a specific breakpoint. */
1612 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1614 #if defined(TARGET_HAS_ICE)
1615 CPUBreakpoint *bp;
1617 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1618 if (bp->pc == pc && bp->flags == flags) {
1619 cpu_breakpoint_remove_by_ref(env, bp);
1620 return 0;
1623 return -ENOENT;
1624 #else
1625 return -ENOSYS;
1626 #endif
1629 /* Remove a specific breakpoint by reference. */
1630 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1632 #if defined(TARGET_HAS_ICE)
1633 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1635 breakpoint_invalidate(env, breakpoint->pc);
1637 g_free(breakpoint);
1638 #endif
1641 /* Remove all matching breakpoints. */
1642 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1644 #if defined(TARGET_HAS_ICE)
1645 CPUBreakpoint *bp, *next;
1647 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1648 if (bp->flags & mask)
1649 cpu_breakpoint_remove_by_ref(env, bp);
1651 #endif
1654 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1655 CPU loop after each instruction */
1656 void cpu_single_step(CPUArchState *env, int enabled)
1658 #if defined(TARGET_HAS_ICE)
1659 if (env->singlestep_enabled != enabled) {
1660 env->singlestep_enabled = enabled;
1661 if (kvm_enabled())
1662 kvm_update_guest_debug(env, 0);
1663 else {
1664 /* must flush all the translated code to avoid inconsistencies */
1665 /* XXX: only flush what is necessary */
1666 tb_flush(env);
1669 #endif
1672 static void cpu_unlink_tb(CPUArchState *env)
1674 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1675 problem and hope the cpu will stop of its own accord. For userspace
1676 emulation this often isn't actually as bad as it sounds. Often
1677 signals are used primarily to interrupt blocking syscalls. */
1678 TranslationBlock *tb;
1679 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1681 spin_lock(&interrupt_lock);
1682 tb = env->current_tb;
1683 /* if the cpu is currently executing code, we must unlink it and
1684 all the potentially executing TB */
1685 if (tb) {
1686 env->current_tb = NULL;
1687 tb_reset_jump_recursive(tb);
1689 spin_unlock(&interrupt_lock);
1692 #ifndef CONFIG_USER_ONLY
1693 /* mask must never be zero, except for A20 change call */
1694 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1696 int old_mask;
1698 old_mask = env->interrupt_request;
1699 env->interrupt_request |= mask;
1702 * If called from iothread context, wake the target cpu in
1703 * case its halted.
1705 if (!qemu_cpu_is_self(env)) {
1706 qemu_cpu_kick(env);
1707 return;
1710 if (use_icount) {
1711 env->icount_decr.u16.high = 0xffff;
1712 if (!can_do_io(env)
1713 && (mask & ~old_mask) != 0) {
1714 cpu_abort(env, "Raised interrupt while not in I/O function");
1716 } else {
1717 cpu_unlink_tb(env);
1721 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1723 #else /* CONFIG_USER_ONLY */
1725 void cpu_interrupt(CPUArchState *env, int mask)
1727 env->interrupt_request |= mask;
1728 cpu_unlink_tb(env);
1730 #endif /* CONFIG_USER_ONLY */
1732 void cpu_reset_interrupt(CPUArchState *env, int mask)
1734 env->interrupt_request &= ~mask;
1737 void cpu_exit(CPUArchState *env)
1739 env->exit_request = 1;
1740 cpu_unlink_tb(env);
1743 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1745 va_list ap;
1746 va_list ap2;
1748 va_start(ap, fmt);
1749 va_copy(ap2, ap);
1750 fprintf(stderr, "qemu: fatal: ");
1751 vfprintf(stderr, fmt, ap);
1752 fprintf(stderr, "\n");
1753 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1754 if (qemu_log_enabled()) {
1755 qemu_log("qemu: fatal: ");
1756 qemu_log_vprintf(fmt, ap2);
1757 qemu_log("\n");
1758 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1759 qemu_log_flush();
1760 qemu_log_close();
1762 va_end(ap2);
1763 va_end(ap);
1764 #if defined(CONFIG_USER_ONLY)
1766 struct sigaction act;
1767 sigfillset(&act.sa_mask);
1768 act.sa_handler = SIG_DFL;
1769 sigaction(SIGABRT, &act, NULL);
1771 #endif
1772 abort();
1775 CPUArchState *cpu_copy(CPUArchState *env)
1777 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1778 CPUArchState *next_cpu = new_env->next_cpu;
1779 int cpu_index = new_env->cpu_index;
1780 #if defined(TARGET_HAS_ICE)
1781 CPUBreakpoint *bp;
1782 CPUWatchpoint *wp;
1783 #endif
1785 memcpy(new_env, env, sizeof(CPUArchState));
1787 /* Preserve chaining and index. */
1788 new_env->next_cpu = next_cpu;
1789 new_env->cpu_index = cpu_index;
1791 /* Clone all break/watchpoints.
1792 Note: Once we support ptrace with hw-debug register access, make sure
1793 BP_CPU break/watchpoints are handled correctly on clone. */
1794 QTAILQ_INIT(&env->breakpoints);
1795 QTAILQ_INIT(&env->watchpoints);
1796 #if defined(TARGET_HAS_ICE)
1797 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1798 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1800 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1801 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1802 wp->flags, NULL);
1804 #endif
1806 return new_env;
1809 #if !defined(CONFIG_USER_ONLY)
1810 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1812 unsigned int i;
1814 /* Discard jump cache entries for any tb which might potentially
1815 overlap the flushed page. */
1816 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1817 memset (&env->tb_jmp_cache[i], 0,
1818 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1820 i = tb_jmp_cache_hash_page(addr);
1821 memset (&env->tb_jmp_cache[i], 0,
1822 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1825 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1826 uintptr_t length)
1828 uintptr_t start1;
1830 /* we modify the TLB cache so that the dirty bit will be set again
1831 when accessing the range */
1832 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1833 /* Check that we don't span multiple blocks - this breaks the
1834 address comparisons below. */
1835 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1836 != (end - 1) - start) {
1837 abort();
1839 cpu_tlb_reset_dirty_all(start1, length);
1843 /* Note: start and end must be within the same ram block. */
1844 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1845 int dirty_flags)
1847 uintptr_t length;
1849 start &= TARGET_PAGE_MASK;
1850 end = TARGET_PAGE_ALIGN(end);
1852 length = end - start;
1853 if (length == 0)
1854 return;
1855 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1857 if (tcg_enabled()) {
1858 tlb_reset_dirty_range_all(start, end, length);
1862 int cpu_physical_memory_set_dirty_tracking(int enable)
1864 int ret = 0;
1865 in_migration = enable;
1866 return ret;
1869 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1870 MemoryRegionSection *section,
1871 target_ulong vaddr,
1872 hwaddr paddr,
1873 int prot,
1874 target_ulong *address)
1876 hwaddr iotlb;
1877 CPUWatchpoint *wp;
1879 if (memory_region_is_ram(section->mr)) {
1880 /* Normal RAM. */
1881 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1882 + memory_region_section_addr(section, paddr);
1883 if (!section->readonly) {
1884 iotlb |= phys_section_notdirty;
1885 } else {
1886 iotlb |= phys_section_rom;
1888 } else {
1889 /* IO handlers are currently passed a physical address.
1890 It would be nice to pass an offset from the base address
1891 of that region. This would avoid having to special case RAM,
1892 and avoid full address decoding in every device.
1893 We can't use the high bits of pd for this because
1894 IO_MEM_ROMD uses these as a ram address. */
1895 iotlb = section - phys_sections;
1896 iotlb += memory_region_section_addr(section, paddr);
1899 /* Make accesses to pages with watchpoints go via the
1900 watchpoint trap routines. */
1901 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1902 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1903 /* Avoid trapping reads of pages with a write breakpoint. */
1904 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1905 iotlb = phys_section_watch + paddr;
1906 *address |= TLB_MMIO;
1907 break;
1912 return iotlb;
1915 #else
1917 * Walks guest process memory "regions" one by one
1918 * and calls callback function 'fn' for each region.
1921 struct walk_memory_regions_data
1923 walk_memory_regions_fn fn;
1924 void *priv;
1925 uintptr_t start;
1926 int prot;
1929 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1930 abi_ulong end, int new_prot)
1932 if (data->start != -1ul) {
1933 int rc = data->fn(data->priv, data->start, end, data->prot);
1934 if (rc != 0) {
1935 return rc;
1939 data->start = (new_prot ? end : -1ul);
1940 data->prot = new_prot;
1942 return 0;
1945 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1946 abi_ulong base, int level, void **lp)
1948 abi_ulong pa;
1949 int i, rc;
1951 if (*lp == NULL) {
1952 return walk_memory_regions_end(data, base, 0);
1955 if (level == 0) {
1956 PageDesc *pd = *lp;
1957 for (i = 0; i < L2_SIZE; ++i) {
1958 int prot = pd[i].flags;
1960 pa = base | (i << TARGET_PAGE_BITS);
1961 if (prot != data->prot) {
1962 rc = walk_memory_regions_end(data, pa, prot);
1963 if (rc != 0) {
1964 return rc;
1968 } else {
1969 void **pp = *lp;
1970 for (i = 0; i < L2_SIZE; ++i) {
1971 pa = base | ((abi_ulong)i <<
1972 (TARGET_PAGE_BITS + L2_BITS * level));
1973 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1974 if (rc != 0) {
1975 return rc;
1980 return 0;
1983 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1985 struct walk_memory_regions_data data;
1986 uintptr_t i;
1988 data.fn = fn;
1989 data.priv = priv;
1990 data.start = -1ul;
1991 data.prot = 0;
1993 for (i = 0; i < V_L1_SIZE; i++) {
1994 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1995 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
1996 if (rc != 0) {
1997 return rc;
2001 return walk_memory_regions_end(&data, 0, 0);
2004 static int dump_region(void *priv, abi_ulong start,
2005 abi_ulong end, unsigned long prot)
2007 FILE *f = (FILE *)priv;
2009 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2010 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2011 start, end, end - start,
2012 ((prot & PAGE_READ) ? 'r' : '-'),
2013 ((prot & PAGE_WRITE) ? 'w' : '-'),
2014 ((prot & PAGE_EXEC) ? 'x' : '-'));
2016 return (0);
2019 /* dump memory mappings */
2020 void page_dump(FILE *f)
2022 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2023 "start", "end", "size", "prot");
2024 walk_memory_regions(f, dump_region);
2027 int page_get_flags(target_ulong address)
2029 PageDesc *p;
2031 p = page_find(address >> TARGET_PAGE_BITS);
2032 if (!p)
2033 return 0;
2034 return p->flags;
2037 /* Modify the flags of a page and invalidate the code if necessary.
2038 The flag PAGE_WRITE_ORG is positioned automatically depending
2039 on PAGE_WRITE. The mmap_lock should already be held. */
2040 void page_set_flags(target_ulong start, target_ulong end, int flags)
2042 target_ulong addr, len;
2044 /* This function should never be called with addresses outside the
2045 guest address space. If this assert fires, it probably indicates
2046 a missing call to h2g_valid. */
2047 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2048 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2049 #endif
2050 assert(start < end);
2052 start = start & TARGET_PAGE_MASK;
2053 end = TARGET_PAGE_ALIGN(end);
2055 if (flags & PAGE_WRITE) {
2056 flags |= PAGE_WRITE_ORG;
2059 for (addr = start, len = end - start;
2060 len != 0;
2061 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2062 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2064 /* If the write protection bit is set, then we invalidate
2065 the code inside. */
2066 if (!(p->flags & PAGE_WRITE) &&
2067 (flags & PAGE_WRITE) &&
2068 p->first_tb) {
2069 tb_invalidate_phys_page(addr, 0, NULL);
2071 p->flags = flags;
2075 int page_check_range(target_ulong start, target_ulong len, int flags)
2077 PageDesc *p;
2078 target_ulong end;
2079 target_ulong addr;
2081 /* This function should never be called with addresses outside the
2082 guest address space. If this assert fires, it probably indicates
2083 a missing call to h2g_valid. */
2084 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2085 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2086 #endif
2088 if (len == 0) {
2089 return 0;
2091 if (start + len - 1 < start) {
2092 /* We've wrapped around. */
2093 return -1;
2096 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2097 start = start & TARGET_PAGE_MASK;
2099 for (addr = start, len = end - start;
2100 len != 0;
2101 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2102 p = page_find(addr >> TARGET_PAGE_BITS);
2103 if( !p )
2104 return -1;
2105 if( !(p->flags & PAGE_VALID) )
2106 return -1;
2108 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2109 return -1;
2110 if (flags & PAGE_WRITE) {
2111 if (!(p->flags & PAGE_WRITE_ORG))
2112 return -1;
2113 /* unprotect the page if it was put read-only because it
2114 contains translated code */
2115 if (!(p->flags & PAGE_WRITE)) {
2116 if (!page_unprotect(addr, 0, NULL))
2117 return -1;
2119 return 0;
2122 return 0;
2125 /* called from signal handler: invalidate the code and unprotect the
2126 page. Return TRUE if the fault was successfully handled. */
2127 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2129 unsigned int prot;
2130 PageDesc *p;
2131 target_ulong host_start, host_end, addr;
2133 /* Technically this isn't safe inside a signal handler. However we
2134 know this only ever happens in a synchronous SEGV handler, so in
2135 practice it seems to be ok. */
2136 mmap_lock();
2138 p = page_find(address >> TARGET_PAGE_BITS);
2139 if (!p) {
2140 mmap_unlock();
2141 return 0;
2144 /* if the page was really writable, then we change its
2145 protection back to writable */
2146 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2147 host_start = address & qemu_host_page_mask;
2148 host_end = host_start + qemu_host_page_size;
2150 prot = 0;
2151 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2152 p = page_find(addr >> TARGET_PAGE_BITS);
2153 p->flags |= PAGE_WRITE;
2154 prot |= p->flags;
2156 /* and since the content will be modified, we must invalidate
2157 the corresponding translated code. */
2158 tb_invalidate_phys_page(addr, pc, puc);
2159 #ifdef DEBUG_TB_CHECK
2160 tb_invalidate_check(addr);
2161 #endif
2163 mprotect((void *)g2h(host_start), qemu_host_page_size,
2164 prot & PAGE_BITS);
2166 mmap_unlock();
2167 return 1;
2169 mmap_unlock();
2170 return 0;
2172 #endif /* defined(CONFIG_USER_ONLY) */
2174 #if !defined(CONFIG_USER_ONLY)
2176 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2177 typedef struct subpage_t {
2178 MemoryRegion iomem;
2179 hwaddr base;
2180 uint16_t sub_section[TARGET_PAGE_SIZE];
2181 } subpage_t;
2183 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2184 uint16_t section);
2185 static subpage_t *subpage_init(hwaddr base);
2186 static void destroy_page_desc(uint16_t section_index)
2188 MemoryRegionSection *section = &phys_sections[section_index];
2189 MemoryRegion *mr = section->mr;
2191 if (mr->subpage) {
2192 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2193 memory_region_destroy(&subpage->iomem);
2194 g_free(subpage);
2198 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2200 unsigned i;
2201 PhysPageEntry *p;
2203 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2204 return;
2207 p = phys_map_nodes[lp->ptr];
2208 for (i = 0; i < L2_SIZE; ++i) {
2209 if (!p[i].is_leaf) {
2210 destroy_l2_mapping(&p[i], level - 1);
2211 } else {
2212 destroy_page_desc(p[i].ptr);
2215 lp->is_leaf = 0;
2216 lp->ptr = PHYS_MAP_NODE_NIL;
2219 static void destroy_all_mappings(AddressSpaceDispatch *d)
2221 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2222 phys_map_nodes_reset();
2225 static uint16_t phys_section_add(MemoryRegionSection *section)
2227 if (phys_sections_nb == phys_sections_nb_alloc) {
2228 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2229 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2230 phys_sections_nb_alloc);
2232 phys_sections[phys_sections_nb] = *section;
2233 return phys_sections_nb++;
2236 static void phys_sections_clear(void)
2238 phys_sections_nb = 0;
2241 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2243 subpage_t *subpage;
2244 hwaddr base = section->offset_within_address_space
2245 & TARGET_PAGE_MASK;
2246 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2247 MemoryRegionSection subsection = {
2248 .offset_within_address_space = base,
2249 .size = TARGET_PAGE_SIZE,
2251 hwaddr start, end;
2253 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2255 if (!(existing->mr->subpage)) {
2256 subpage = subpage_init(base);
2257 subsection.mr = &subpage->iomem;
2258 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2259 phys_section_add(&subsection));
2260 } else {
2261 subpage = container_of(existing->mr, subpage_t, iomem);
2263 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2264 end = start + section->size - 1;
2265 subpage_register(subpage, start, end, phys_section_add(section));
2269 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2271 hwaddr start_addr = section->offset_within_address_space;
2272 ram_addr_t size = section->size;
2273 hwaddr addr;
2274 uint16_t section_index = phys_section_add(section);
2276 assert(size);
2278 addr = start_addr;
2279 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2280 section_index);
2283 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2285 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2286 MemoryRegionSection now = *section, remain = *section;
2288 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2289 || (now.size < TARGET_PAGE_SIZE)) {
2290 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2291 - now.offset_within_address_space,
2292 now.size);
2293 register_subpage(d, &now);
2294 remain.size -= now.size;
2295 remain.offset_within_address_space += now.size;
2296 remain.offset_within_region += now.size;
2298 while (remain.size >= TARGET_PAGE_SIZE) {
2299 now = remain;
2300 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2301 now.size = TARGET_PAGE_SIZE;
2302 register_subpage(d, &now);
2303 } else {
2304 now.size &= TARGET_PAGE_MASK;
2305 register_multipage(d, &now);
2307 remain.size -= now.size;
2308 remain.offset_within_address_space += now.size;
2309 remain.offset_within_region += now.size;
2311 now = remain;
2312 if (now.size) {
2313 register_subpage(d, &now);
2317 void qemu_flush_coalesced_mmio_buffer(void)
2319 if (kvm_enabled())
2320 kvm_flush_coalesced_mmio_buffer();
2323 #if defined(__linux__) && !defined(TARGET_S390X)
2325 #include <sys/vfs.h>
2327 #define HUGETLBFS_MAGIC 0x958458f6
2329 static long gethugepagesize(const char *path)
2331 struct statfs fs;
2332 int ret;
2334 do {
2335 ret = statfs(path, &fs);
2336 } while (ret != 0 && errno == EINTR);
2338 if (ret != 0) {
2339 perror(path);
2340 return 0;
2343 if (fs.f_type != HUGETLBFS_MAGIC)
2344 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2346 return fs.f_bsize;
2349 static void *file_ram_alloc(RAMBlock *block,
2350 ram_addr_t memory,
2351 const char *path)
2353 char *filename;
2354 void *area;
2355 int fd;
2356 #ifdef MAP_POPULATE
2357 int flags;
2358 #endif
2359 unsigned long hpagesize;
2361 hpagesize = gethugepagesize(path);
2362 if (!hpagesize) {
2363 return NULL;
2366 if (memory < hpagesize) {
2367 return NULL;
2370 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2371 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2372 return NULL;
2375 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2376 return NULL;
2379 fd = mkstemp(filename);
2380 if (fd < 0) {
2381 perror("unable to create backing store for hugepages");
2382 free(filename);
2383 return NULL;
2385 unlink(filename);
2386 free(filename);
2388 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2391 * ftruncate is not supported by hugetlbfs in older
2392 * hosts, so don't bother bailing out on errors.
2393 * If anything goes wrong with it under other filesystems,
2394 * mmap will fail.
2396 if (ftruncate(fd, memory))
2397 perror("ftruncate");
2399 #ifdef MAP_POPULATE
2400 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2401 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2402 * to sidestep this quirk.
2404 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2405 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2406 #else
2407 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2408 #endif
2409 if (area == MAP_FAILED) {
2410 perror("file_ram_alloc: can't mmap RAM pages");
2411 close(fd);
2412 return (NULL);
2414 block->fd = fd;
2415 return area;
2417 #endif
2419 static ram_addr_t find_ram_offset(ram_addr_t size)
2421 RAMBlock *block, *next_block;
2422 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2424 if (QLIST_EMPTY(&ram_list.blocks))
2425 return 0;
2427 QLIST_FOREACH(block, &ram_list.blocks, next) {
2428 ram_addr_t end, next = RAM_ADDR_MAX;
2430 end = block->offset + block->length;
2432 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2433 if (next_block->offset >= end) {
2434 next = MIN(next, next_block->offset);
2437 if (next - end >= size && next - end < mingap) {
2438 offset = end;
2439 mingap = next - end;
2443 if (offset == RAM_ADDR_MAX) {
2444 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2445 (uint64_t)size);
2446 abort();
2449 return offset;
2452 ram_addr_t last_ram_offset(void)
2454 RAMBlock *block;
2455 ram_addr_t last = 0;
2457 QLIST_FOREACH(block, &ram_list.blocks, next)
2458 last = MAX(last, block->offset + block->length);
2460 return last;
2463 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2465 int ret;
2466 QemuOpts *machine_opts;
2468 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2469 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2470 if (machine_opts &&
2471 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2472 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2473 if (ret) {
2474 perror("qemu_madvise");
2475 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2476 "but dump_guest_core=off specified\n");
2481 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2483 RAMBlock *new_block, *block;
2485 new_block = NULL;
2486 QLIST_FOREACH(block, &ram_list.blocks, next) {
2487 if (block->offset == addr) {
2488 new_block = block;
2489 break;
2492 assert(new_block);
2493 assert(!new_block->idstr[0]);
2495 if (dev) {
2496 char *id = qdev_get_dev_path(dev);
2497 if (id) {
2498 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2499 g_free(id);
2502 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2504 QLIST_FOREACH(block, &ram_list.blocks, next) {
2505 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2506 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2507 new_block->idstr);
2508 abort();
2513 static int memory_try_enable_merging(void *addr, size_t len)
2515 QemuOpts *opts;
2517 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2518 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2519 /* disabled by the user */
2520 return 0;
2523 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2526 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2527 MemoryRegion *mr)
2529 RAMBlock *new_block;
2531 size = TARGET_PAGE_ALIGN(size);
2532 new_block = g_malloc0(sizeof(*new_block));
2534 new_block->mr = mr;
2535 new_block->offset = find_ram_offset(size);
2536 if (host) {
2537 new_block->host = host;
2538 new_block->flags |= RAM_PREALLOC_MASK;
2539 } else {
2540 if (mem_path) {
2541 #if defined (__linux__) && !defined(TARGET_S390X)
2542 new_block->host = file_ram_alloc(new_block, size, mem_path);
2543 if (!new_block->host) {
2544 new_block->host = qemu_vmalloc(size);
2545 memory_try_enable_merging(new_block->host, size);
2547 #else
2548 fprintf(stderr, "-mem-path option unsupported\n");
2549 exit(1);
2550 #endif
2551 } else {
2552 if (xen_enabled()) {
2553 xen_ram_alloc(new_block->offset, size, mr);
2554 } else if (kvm_enabled()) {
2555 /* some s390/kvm configurations have special constraints */
2556 new_block->host = kvm_vmalloc(size);
2557 } else {
2558 new_block->host = qemu_vmalloc(size);
2560 memory_try_enable_merging(new_block->host, size);
2563 new_block->length = size;
2565 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2567 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2568 last_ram_offset() >> TARGET_PAGE_BITS);
2569 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2570 0, size >> TARGET_PAGE_BITS);
2571 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2573 qemu_ram_setup_dump(new_block->host, size);
2574 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2576 if (kvm_enabled())
2577 kvm_setup_guest_memory(new_block->host, size);
2579 return new_block->offset;
2582 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2584 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2587 void qemu_ram_free_from_ptr(ram_addr_t addr)
2589 RAMBlock *block;
2591 QLIST_FOREACH(block, &ram_list.blocks, next) {
2592 if (addr == block->offset) {
2593 QLIST_REMOVE(block, next);
2594 g_free(block);
2595 return;
2600 void qemu_ram_free(ram_addr_t addr)
2602 RAMBlock *block;
2604 QLIST_FOREACH(block, &ram_list.blocks, next) {
2605 if (addr == block->offset) {
2606 QLIST_REMOVE(block, next);
2607 if (block->flags & RAM_PREALLOC_MASK) {
2609 } else if (mem_path) {
2610 #if defined (__linux__) && !defined(TARGET_S390X)
2611 if (block->fd) {
2612 munmap(block->host, block->length);
2613 close(block->fd);
2614 } else {
2615 qemu_vfree(block->host);
2617 #else
2618 abort();
2619 #endif
2620 } else {
2621 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2622 munmap(block->host, block->length);
2623 #else
2624 if (xen_enabled()) {
2625 xen_invalidate_map_cache_entry(block->host);
2626 } else {
2627 qemu_vfree(block->host);
2629 #endif
2631 g_free(block);
2632 return;
2638 #ifndef _WIN32
2639 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2641 RAMBlock *block;
2642 ram_addr_t offset;
2643 int flags;
2644 void *area, *vaddr;
2646 QLIST_FOREACH(block, &ram_list.blocks, next) {
2647 offset = addr - block->offset;
2648 if (offset < block->length) {
2649 vaddr = block->host + offset;
2650 if (block->flags & RAM_PREALLOC_MASK) {
2652 } else {
2653 flags = MAP_FIXED;
2654 munmap(vaddr, length);
2655 if (mem_path) {
2656 #if defined(__linux__) && !defined(TARGET_S390X)
2657 if (block->fd) {
2658 #ifdef MAP_POPULATE
2659 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2660 MAP_PRIVATE;
2661 #else
2662 flags |= MAP_PRIVATE;
2663 #endif
2664 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2665 flags, block->fd, offset);
2666 } else {
2667 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2668 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2669 flags, -1, 0);
2671 #else
2672 abort();
2673 #endif
2674 } else {
2675 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2676 flags |= MAP_SHARED | MAP_ANONYMOUS;
2677 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2678 flags, -1, 0);
2679 #else
2680 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2681 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2682 flags, -1, 0);
2683 #endif
2685 if (area != vaddr) {
2686 fprintf(stderr, "Could not remap addr: "
2687 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2688 length, addr);
2689 exit(1);
2691 memory_try_enable_merging(vaddr, length);
2692 qemu_ram_setup_dump(vaddr, length);
2694 return;
2698 #endif /* !_WIN32 */
2700 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2701 With the exception of the softmmu code in this file, this should
2702 only be used for local memory (e.g. video ram) that the device owns,
2703 and knows it isn't going to access beyond the end of the block.
2705 It should not be used for general purpose DMA.
2706 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2708 void *qemu_get_ram_ptr(ram_addr_t addr)
2710 RAMBlock *block;
2712 QLIST_FOREACH(block, &ram_list.blocks, next) {
2713 if (addr - block->offset < block->length) {
2714 /* Move this entry to to start of the list. */
2715 if (block != QLIST_FIRST(&ram_list.blocks)) {
2716 QLIST_REMOVE(block, next);
2717 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2719 if (xen_enabled()) {
2720 /* We need to check if the requested address is in the RAM
2721 * because we don't want to map the entire memory in QEMU.
2722 * In that case just map until the end of the page.
2724 if (block->offset == 0) {
2725 return xen_map_cache(addr, 0, 0);
2726 } else if (block->host == NULL) {
2727 block->host =
2728 xen_map_cache(block->offset, block->length, 1);
2731 return block->host + (addr - block->offset);
2735 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2736 abort();
2738 return NULL;
2741 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2742 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2744 void *qemu_safe_ram_ptr(ram_addr_t addr)
2746 RAMBlock *block;
2748 QLIST_FOREACH(block, &ram_list.blocks, next) {
2749 if (addr - block->offset < block->length) {
2750 if (xen_enabled()) {
2751 /* We need to check if the requested address is in the RAM
2752 * because we don't want to map the entire memory in QEMU.
2753 * In that case just map until the end of the page.
2755 if (block->offset == 0) {
2756 return xen_map_cache(addr, 0, 0);
2757 } else if (block->host == NULL) {
2758 block->host =
2759 xen_map_cache(block->offset, block->length, 1);
2762 return block->host + (addr - block->offset);
2766 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2767 abort();
2769 return NULL;
2772 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2773 * but takes a size argument */
2774 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2776 if (*size == 0) {
2777 return NULL;
2779 if (xen_enabled()) {
2780 return xen_map_cache(addr, *size, 1);
2781 } else {
2782 RAMBlock *block;
2784 QLIST_FOREACH(block, &ram_list.blocks, next) {
2785 if (addr - block->offset < block->length) {
2786 if (addr - block->offset + *size > block->length)
2787 *size = block->length - addr + block->offset;
2788 return block->host + (addr - block->offset);
2792 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2793 abort();
2797 void qemu_put_ram_ptr(void *addr)
2799 trace_qemu_put_ram_ptr(addr);
2802 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2804 RAMBlock *block;
2805 uint8_t *host = ptr;
2807 if (xen_enabled()) {
2808 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2809 return 0;
2812 QLIST_FOREACH(block, &ram_list.blocks, next) {
2813 /* This case append when the block is not mapped. */
2814 if (block->host == NULL) {
2815 continue;
2817 if (host - block->host < block->length) {
2818 *ram_addr = block->offset + (host - block->host);
2819 return 0;
2823 return -1;
2826 /* Some of the softmmu routines need to translate from a host pointer
2827 (typically a TLB entry) back to a ram offset. */
2828 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2830 ram_addr_t ram_addr;
2832 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2833 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2834 abort();
2836 return ram_addr;
2839 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2840 unsigned size)
2842 #ifdef DEBUG_UNASSIGNED
2843 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2844 #endif
2845 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2846 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2847 #endif
2848 return 0;
2851 static void unassigned_mem_write(void *opaque, hwaddr addr,
2852 uint64_t val, unsigned size)
2854 #ifdef DEBUG_UNASSIGNED
2855 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2856 #endif
2857 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2858 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2859 #endif
2862 static const MemoryRegionOps unassigned_mem_ops = {
2863 .read = unassigned_mem_read,
2864 .write = unassigned_mem_write,
2865 .endianness = DEVICE_NATIVE_ENDIAN,
2868 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2869 unsigned size)
2871 abort();
2874 static void error_mem_write(void *opaque, hwaddr addr,
2875 uint64_t value, unsigned size)
2877 abort();
2880 static const MemoryRegionOps error_mem_ops = {
2881 .read = error_mem_read,
2882 .write = error_mem_write,
2883 .endianness = DEVICE_NATIVE_ENDIAN,
2886 static const MemoryRegionOps rom_mem_ops = {
2887 .read = error_mem_read,
2888 .write = unassigned_mem_write,
2889 .endianness = DEVICE_NATIVE_ENDIAN,
2892 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2893 uint64_t val, unsigned size)
2895 int dirty_flags;
2896 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2897 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2898 #if !defined(CONFIG_USER_ONLY)
2899 tb_invalidate_phys_page_fast(ram_addr, size);
2900 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2901 #endif
2903 switch (size) {
2904 case 1:
2905 stb_p(qemu_get_ram_ptr(ram_addr), val);
2906 break;
2907 case 2:
2908 stw_p(qemu_get_ram_ptr(ram_addr), val);
2909 break;
2910 case 4:
2911 stl_p(qemu_get_ram_ptr(ram_addr), val);
2912 break;
2913 default:
2914 abort();
2916 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2917 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2918 /* we remove the notdirty callback only if the code has been
2919 flushed */
2920 if (dirty_flags == 0xff)
2921 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2924 static const MemoryRegionOps notdirty_mem_ops = {
2925 .read = error_mem_read,
2926 .write = notdirty_mem_write,
2927 .endianness = DEVICE_NATIVE_ENDIAN,
2930 /* Generate a debug exception if a watchpoint has been hit. */
2931 static void check_watchpoint(int offset, int len_mask, int flags)
2933 CPUArchState *env = cpu_single_env;
2934 target_ulong pc, cs_base;
2935 TranslationBlock *tb;
2936 target_ulong vaddr;
2937 CPUWatchpoint *wp;
2938 int cpu_flags;
2940 if (env->watchpoint_hit) {
2941 /* We re-entered the check after replacing the TB. Now raise
2942 * the debug interrupt so that is will trigger after the
2943 * current instruction. */
2944 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2945 return;
2947 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2948 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2949 if ((vaddr == (wp->vaddr & len_mask) ||
2950 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2951 wp->flags |= BP_WATCHPOINT_HIT;
2952 if (!env->watchpoint_hit) {
2953 env->watchpoint_hit = wp;
2954 tb = tb_find_pc(env->mem_io_pc);
2955 if (!tb) {
2956 cpu_abort(env, "check_watchpoint: could not find TB for "
2957 "pc=%p", (void *)env->mem_io_pc);
2959 cpu_restore_state(tb, env, env->mem_io_pc);
2960 tb_phys_invalidate(tb, -1);
2961 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2962 env->exception_index = EXCP_DEBUG;
2963 cpu_loop_exit(env);
2964 } else {
2965 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2966 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2967 cpu_resume_from_signal(env, NULL);
2970 } else {
2971 wp->flags &= ~BP_WATCHPOINT_HIT;
2976 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2977 so these check for a hit then pass through to the normal out-of-line
2978 phys routines. */
2979 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2980 unsigned size)
2982 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2983 switch (size) {
2984 case 1: return ldub_phys(addr);
2985 case 2: return lduw_phys(addr);
2986 case 4: return ldl_phys(addr);
2987 default: abort();
2991 static void watch_mem_write(void *opaque, hwaddr addr,
2992 uint64_t val, unsigned size)
2994 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2995 switch (size) {
2996 case 1:
2997 stb_phys(addr, val);
2998 break;
2999 case 2:
3000 stw_phys(addr, val);
3001 break;
3002 case 4:
3003 stl_phys(addr, val);
3004 break;
3005 default: abort();
3009 static const MemoryRegionOps watch_mem_ops = {
3010 .read = watch_mem_read,
3011 .write = watch_mem_write,
3012 .endianness = DEVICE_NATIVE_ENDIAN,
3015 static uint64_t subpage_read(void *opaque, hwaddr addr,
3016 unsigned len)
3018 subpage_t *mmio = opaque;
3019 unsigned int idx = SUBPAGE_IDX(addr);
3020 MemoryRegionSection *section;
3021 #if defined(DEBUG_SUBPAGE)
3022 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3023 mmio, len, addr, idx);
3024 #endif
3026 section = &phys_sections[mmio->sub_section[idx]];
3027 addr += mmio->base;
3028 addr -= section->offset_within_address_space;
3029 addr += section->offset_within_region;
3030 return io_mem_read(section->mr, addr, len);
3033 static void subpage_write(void *opaque, hwaddr addr,
3034 uint64_t value, unsigned len)
3036 subpage_t *mmio = opaque;
3037 unsigned int idx = SUBPAGE_IDX(addr);
3038 MemoryRegionSection *section;
3039 #if defined(DEBUG_SUBPAGE)
3040 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3041 " idx %d value %"PRIx64"\n",
3042 __func__, mmio, len, addr, idx, value);
3043 #endif
3045 section = &phys_sections[mmio->sub_section[idx]];
3046 addr += mmio->base;
3047 addr -= section->offset_within_address_space;
3048 addr += section->offset_within_region;
3049 io_mem_write(section->mr, addr, value, len);
3052 static const MemoryRegionOps subpage_ops = {
3053 .read = subpage_read,
3054 .write = subpage_write,
3055 .endianness = DEVICE_NATIVE_ENDIAN,
3058 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3059 unsigned size)
3061 ram_addr_t raddr = addr;
3062 void *ptr = qemu_get_ram_ptr(raddr);
3063 switch (size) {
3064 case 1: return ldub_p(ptr);
3065 case 2: return lduw_p(ptr);
3066 case 4: return ldl_p(ptr);
3067 default: abort();
3071 static void subpage_ram_write(void *opaque, hwaddr addr,
3072 uint64_t value, unsigned size)
3074 ram_addr_t raddr = addr;
3075 void *ptr = qemu_get_ram_ptr(raddr);
3076 switch (size) {
3077 case 1: return stb_p(ptr, value);
3078 case 2: return stw_p(ptr, value);
3079 case 4: return stl_p(ptr, value);
3080 default: abort();
3084 static const MemoryRegionOps subpage_ram_ops = {
3085 .read = subpage_ram_read,
3086 .write = subpage_ram_write,
3087 .endianness = DEVICE_NATIVE_ENDIAN,
3090 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3091 uint16_t section)
3093 int idx, eidx;
3095 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3096 return -1;
3097 idx = SUBPAGE_IDX(start);
3098 eidx = SUBPAGE_IDX(end);
3099 #if defined(DEBUG_SUBPAGE)
3100 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3101 mmio, start, end, idx, eidx, memory);
3102 #endif
3103 if (memory_region_is_ram(phys_sections[section].mr)) {
3104 MemoryRegionSection new_section = phys_sections[section];
3105 new_section.mr = &io_mem_subpage_ram;
3106 section = phys_section_add(&new_section);
3108 for (; idx <= eidx; idx++) {
3109 mmio->sub_section[idx] = section;
3112 return 0;
3115 static subpage_t *subpage_init(hwaddr base)
3117 subpage_t *mmio;
3119 mmio = g_malloc0(sizeof(subpage_t));
3121 mmio->base = base;
3122 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3123 "subpage", TARGET_PAGE_SIZE);
3124 mmio->iomem.subpage = true;
3125 #if defined(DEBUG_SUBPAGE)
3126 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3127 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3128 #endif
3129 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3131 return mmio;
3134 static uint16_t dummy_section(MemoryRegion *mr)
3136 MemoryRegionSection section = {
3137 .mr = mr,
3138 .offset_within_address_space = 0,
3139 .offset_within_region = 0,
3140 .size = UINT64_MAX,
3143 return phys_section_add(&section);
3146 MemoryRegion *iotlb_to_region(hwaddr index)
3148 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3151 static void io_mem_init(void)
3153 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3154 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3155 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3156 "unassigned", UINT64_MAX);
3157 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3158 "notdirty", UINT64_MAX);
3159 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3160 "subpage-ram", UINT64_MAX);
3161 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3162 "watch", UINT64_MAX);
3165 static void mem_begin(MemoryListener *listener)
3167 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3169 destroy_all_mappings(d);
3170 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3173 static void core_begin(MemoryListener *listener)
3175 phys_sections_clear();
3176 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3177 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3178 phys_section_rom = dummy_section(&io_mem_rom);
3179 phys_section_watch = dummy_section(&io_mem_watch);
3182 static void tcg_commit(MemoryListener *listener)
3184 CPUArchState *env;
3186 /* since each CPU stores ram addresses in its TLB cache, we must
3187 reset the modified entries */
3188 /* XXX: slow ! */
3189 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3190 tlb_flush(env, 1);
3194 static void core_log_global_start(MemoryListener *listener)
3196 cpu_physical_memory_set_dirty_tracking(1);
3199 static void core_log_global_stop(MemoryListener *listener)
3201 cpu_physical_memory_set_dirty_tracking(0);
3204 static void io_region_add(MemoryListener *listener,
3205 MemoryRegionSection *section)
3207 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3209 mrio->mr = section->mr;
3210 mrio->offset = section->offset_within_region;
3211 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3212 section->offset_within_address_space, section->size);
3213 ioport_register(&mrio->iorange);
3216 static void io_region_del(MemoryListener *listener,
3217 MemoryRegionSection *section)
3219 isa_unassign_ioport(section->offset_within_address_space, section->size);
3222 static MemoryListener core_memory_listener = {
3223 .begin = core_begin,
3224 .log_global_start = core_log_global_start,
3225 .log_global_stop = core_log_global_stop,
3226 .priority = 1,
3229 static MemoryListener io_memory_listener = {
3230 .region_add = io_region_add,
3231 .region_del = io_region_del,
3232 .priority = 0,
3235 static MemoryListener tcg_memory_listener = {
3236 .commit = tcg_commit,
3239 void address_space_init_dispatch(AddressSpace *as)
3241 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3243 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3244 d->listener = (MemoryListener) {
3245 .begin = mem_begin,
3246 .region_add = mem_add,
3247 .region_nop = mem_add,
3248 .priority = 0,
3250 as->dispatch = d;
3251 memory_listener_register(&d->listener, as);
3254 void address_space_destroy_dispatch(AddressSpace *as)
3256 AddressSpaceDispatch *d = as->dispatch;
3258 memory_listener_unregister(&d->listener);
3259 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3260 g_free(d);
3261 as->dispatch = NULL;
3264 static void memory_map_init(void)
3266 system_memory = g_malloc(sizeof(*system_memory));
3267 memory_region_init(system_memory, "system", INT64_MAX);
3268 address_space_init(&address_space_memory, system_memory);
3269 address_space_memory.name = "memory";
3271 system_io = g_malloc(sizeof(*system_io));
3272 memory_region_init(system_io, "io", 65536);
3273 address_space_init(&address_space_io, system_io);
3274 address_space_io.name = "I/O";
3276 memory_listener_register(&core_memory_listener, &address_space_memory);
3277 memory_listener_register(&io_memory_listener, &address_space_io);
3278 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3281 MemoryRegion *get_system_memory(void)
3283 return system_memory;
3286 MemoryRegion *get_system_io(void)
3288 return system_io;
3291 #endif /* !defined(CONFIG_USER_ONLY) */
3293 /* physical memory access (slow version, mainly for debug) */
3294 #if defined(CONFIG_USER_ONLY)
3295 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3296 uint8_t *buf, int len, int is_write)
3298 int l, flags;
3299 target_ulong page;
3300 void * p;
3302 while (len > 0) {
3303 page = addr & TARGET_PAGE_MASK;
3304 l = (page + TARGET_PAGE_SIZE) - addr;
3305 if (l > len)
3306 l = len;
3307 flags = page_get_flags(page);
3308 if (!(flags & PAGE_VALID))
3309 return -1;
3310 if (is_write) {
3311 if (!(flags & PAGE_WRITE))
3312 return -1;
3313 /* XXX: this code should not depend on lock_user */
3314 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3315 return -1;
3316 memcpy(p, buf, l);
3317 unlock_user(p, addr, l);
3318 } else {
3319 if (!(flags & PAGE_READ))
3320 return -1;
3321 /* XXX: this code should not depend on lock_user */
3322 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3323 return -1;
3324 memcpy(buf, p, l);
3325 unlock_user(p, addr, 0);
3327 len -= l;
3328 buf += l;
3329 addr += l;
3331 return 0;
3334 #else
3336 static void invalidate_and_set_dirty(hwaddr addr,
3337 hwaddr length)
3339 if (!cpu_physical_memory_is_dirty(addr)) {
3340 /* invalidate code */
3341 tb_invalidate_phys_page_range(addr, addr + length, 0);
3342 /* set dirty bit */
3343 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3345 xen_modified_memory(addr, length);
3348 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3349 int len, bool is_write)
3351 AddressSpaceDispatch *d = as->dispatch;
3352 int l;
3353 uint8_t *ptr;
3354 uint32_t val;
3355 hwaddr page;
3356 MemoryRegionSection *section;
3358 while (len > 0) {
3359 page = addr & TARGET_PAGE_MASK;
3360 l = (page + TARGET_PAGE_SIZE) - addr;
3361 if (l > len)
3362 l = len;
3363 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3365 if (is_write) {
3366 if (!memory_region_is_ram(section->mr)) {
3367 hwaddr addr1;
3368 addr1 = memory_region_section_addr(section, addr);
3369 /* XXX: could force cpu_single_env to NULL to avoid
3370 potential bugs */
3371 if (l >= 4 && ((addr1 & 3) == 0)) {
3372 /* 32 bit write access */
3373 val = ldl_p(buf);
3374 io_mem_write(section->mr, addr1, val, 4);
3375 l = 4;
3376 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3377 /* 16 bit write access */
3378 val = lduw_p(buf);
3379 io_mem_write(section->mr, addr1, val, 2);
3380 l = 2;
3381 } else {
3382 /* 8 bit write access */
3383 val = ldub_p(buf);
3384 io_mem_write(section->mr, addr1, val, 1);
3385 l = 1;
3387 } else if (!section->readonly) {
3388 ram_addr_t addr1;
3389 addr1 = memory_region_get_ram_addr(section->mr)
3390 + memory_region_section_addr(section, addr);
3391 /* RAM case */
3392 ptr = qemu_get_ram_ptr(addr1);
3393 memcpy(ptr, buf, l);
3394 invalidate_and_set_dirty(addr1, l);
3395 qemu_put_ram_ptr(ptr);
3397 } else {
3398 if (!(memory_region_is_ram(section->mr) ||
3399 memory_region_is_romd(section->mr))) {
3400 hwaddr addr1;
3401 /* I/O case */
3402 addr1 = memory_region_section_addr(section, addr);
3403 if (l >= 4 && ((addr1 & 3) == 0)) {
3404 /* 32 bit read access */
3405 val = io_mem_read(section->mr, addr1, 4);
3406 stl_p(buf, val);
3407 l = 4;
3408 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3409 /* 16 bit read access */
3410 val = io_mem_read(section->mr, addr1, 2);
3411 stw_p(buf, val);
3412 l = 2;
3413 } else {
3414 /* 8 bit read access */
3415 val = io_mem_read(section->mr, addr1, 1);
3416 stb_p(buf, val);
3417 l = 1;
3419 } else {
3420 /* RAM case */
3421 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3422 + memory_region_section_addr(section,
3423 addr));
3424 memcpy(buf, ptr, l);
3425 qemu_put_ram_ptr(ptr);
3428 len -= l;
3429 buf += l;
3430 addr += l;
3434 void address_space_write(AddressSpace *as, hwaddr addr,
3435 const uint8_t *buf, int len)
3437 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3441 * address_space_read: read from an address space.
3443 * @as: #AddressSpace to be accessed
3444 * @addr: address within that address space
3445 * @buf: buffer with the data transferred
3447 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3449 address_space_rw(as, addr, buf, len, false);
3453 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3454 int len, int is_write)
3456 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3459 /* used for ROM loading : can write in RAM and ROM */
3460 void cpu_physical_memory_write_rom(hwaddr addr,
3461 const uint8_t *buf, int len)
3463 AddressSpaceDispatch *d = address_space_memory.dispatch;
3464 int l;
3465 uint8_t *ptr;
3466 hwaddr page;
3467 MemoryRegionSection *section;
3469 while (len > 0) {
3470 page = addr & TARGET_PAGE_MASK;
3471 l = (page + TARGET_PAGE_SIZE) - addr;
3472 if (l > len)
3473 l = len;
3474 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3476 if (!(memory_region_is_ram(section->mr) ||
3477 memory_region_is_romd(section->mr))) {
3478 /* do nothing */
3479 } else {
3480 unsigned long addr1;
3481 addr1 = memory_region_get_ram_addr(section->mr)
3482 + memory_region_section_addr(section, addr);
3483 /* ROM/RAM case */
3484 ptr = qemu_get_ram_ptr(addr1);
3485 memcpy(ptr, buf, l);
3486 invalidate_and_set_dirty(addr1, l);
3487 qemu_put_ram_ptr(ptr);
3489 len -= l;
3490 buf += l;
3491 addr += l;
3495 typedef struct {
3496 void *buffer;
3497 hwaddr addr;
3498 hwaddr len;
3499 } BounceBuffer;
3501 static BounceBuffer bounce;
3503 typedef struct MapClient {
3504 void *opaque;
3505 void (*callback)(void *opaque);
3506 QLIST_ENTRY(MapClient) link;
3507 } MapClient;
3509 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3510 = QLIST_HEAD_INITIALIZER(map_client_list);
3512 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3514 MapClient *client = g_malloc(sizeof(*client));
3516 client->opaque = opaque;
3517 client->callback = callback;
3518 QLIST_INSERT_HEAD(&map_client_list, client, link);
3519 return client;
3522 void cpu_unregister_map_client(void *_client)
3524 MapClient *client = (MapClient *)_client;
3526 QLIST_REMOVE(client, link);
3527 g_free(client);
3530 static void cpu_notify_map_clients(void)
3532 MapClient *client;
3534 while (!QLIST_EMPTY(&map_client_list)) {
3535 client = QLIST_FIRST(&map_client_list);
3536 client->callback(client->opaque);
3537 cpu_unregister_map_client(client);
3541 /* Map a physical memory region into a host virtual address.
3542 * May map a subset of the requested range, given by and returned in *plen.
3543 * May return NULL if resources needed to perform the mapping are exhausted.
3544 * Use only for reads OR writes - not for read-modify-write operations.
3545 * Use cpu_register_map_client() to know when retrying the map operation is
3546 * likely to succeed.
3548 void *address_space_map(AddressSpace *as,
3549 hwaddr addr,
3550 hwaddr *plen,
3551 bool is_write)
3553 AddressSpaceDispatch *d = as->dispatch;
3554 hwaddr len = *plen;
3555 hwaddr todo = 0;
3556 int l;
3557 hwaddr page;
3558 MemoryRegionSection *section;
3559 ram_addr_t raddr = RAM_ADDR_MAX;
3560 ram_addr_t rlen;
3561 void *ret;
3563 while (len > 0) {
3564 page = addr & TARGET_PAGE_MASK;
3565 l = (page + TARGET_PAGE_SIZE) - addr;
3566 if (l > len)
3567 l = len;
3568 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3570 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3571 if (todo || bounce.buffer) {
3572 break;
3574 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3575 bounce.addr = addr;
3576 bounce.len = l;
3577 if (!is_write) {
3578 address_space_read(as, addr, bounce.buffer, l);
3581 *plen = l;
3582 return bounce.buffer;
3584 if (!todo) {
3585 raddr = memory_region_get_ram_addr(section->mr)
3586 + memory_region_section_addr(section, addr);
3589 len -= l;
3590 addr += l;
3591 todo += l;
3593 rlen = todo;
3594 ret = qemu_ram_ptr_length(raddr, &rlen);
3595 *plen = rlen;
3596 return ret;
3599 /* Unmaps a memory region previously mapped by address_space_map().
3600 * Will also mark the memory as dirty if is_write == 1. access_len gives
3601 * the amount of memory that was actually read or written by the caller.
3603 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3604 int is_write, hwaddr access_len)
3606 if (buffer != bounce.buffer) {
3607 if (is_write) {
3608 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3609 while (access_len) {
3610 unsigned l;
3611 l = TARGET_PAGE_SIZE;
3612 if (l > access_len)
3613 l = access_len;
3614 invalidate_and_set_dirty(addr1, l);
3615 addr1 += l;
3616 access_len -= l;
3619 if (xen_enabled()) {
3620 xen_invalidate_map_cache_entry(buffer);
3622 return;
3624 if (is_write) {
3625 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3627 qemu_vfree(bounce.buffer);
3628 bounce.buffer = NULL;
3629 cpu_notify_map_clients();
3632 void *cpu_physical_memory_map(hwaddr addr,
3633 hwaddr *plen,
3634 int is_write)
3636 return address_space_map(&address_space_memory, addr, plen, is_write);
3639 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3640 int is_write, hwaddr access_len)
3642 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3645 /* warning: addr must be aligned */
3646 static inline uint32_t ldl_phys_internal(hwaddr addr,
3647 enum device_endian endian)
3649 uint8_t *ptr;
3650 uint32_t val;
3651 MemoryRegionSection *section;
3653 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3655 if (!(memory_region_is_ram(section->mr) ||
3656 memory_region_is_romd(section->mr))) {
3657 /* I/O case */
3658 addr = memory_region_section_addr(section, addr);
3659 val = io_mem_read(section->mr, addr, 4);
3660 #if defined(TARGET_WORDS_BIGENDIAN)
3661 if (endian == DEVICE_LITTLE_ENDIAN) {
3662 val = bswap32(val);
3664 #else
3665 if (endian == DEVICE_BIG_ENDIAN) {
3666 val = bswap32(val);
3668 #endif
3669 } else {
3670 /* RAM case */
3671 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3672 & TARGET_PAGE_MASK)
3673 + memory_region_section_addr(section, addr));
3674 switch (endian) {
3675 case DEVICE_LITTLE_ENDIAN:
3676 val = ldl_le_p(ptr);
3677 break;
3678 case DEVICE_BIG_ENDIAN:
3679 val = ldl_be_p(ptr);
3680 break;
3681 default:
3682 val = ldl_p(ptr);
3683 break;
3686 return val;
3689 uint32_t ldl_phys(hwaddr addr)
3691 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3694 uint32_t ldl_le_phys(hwaddr addr)
3696 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3699 uint32_t ldl_be_phys(hwaddr addr)
3701 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3704 /* warning: addr must be aligned */
3705 static inline uint64_t ldq_phys_internal(hwaddr addr,
3706 enum device_endian endian)
3708 uint8_t *ptr;
3709 uint64_t val;
3710 MemoryRegionSection *section;
3712 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3714 if (!(memory_region_is_ram(section->mr) ||
3715 memory_region_is_romd(section->mr))) {
3716 /* I/O case */
3717 addr = memory_region_section_addr(section, addr);
3719 /* XXX This is broken when device endian != cpu endian.
3720 Fix and add "endian" variable check */
3721 #ifdef TARGET_WORDS_BIGENDIAN
3722 val = io_mem_read(section->mr, addr, 4) << 32;
3723 val |= io_mem_read(section->mr, addr + 4, 4);
3724 #else
3725 val = io_mem_read(section->mr, addr, 4);
3726 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3727 #endif
3728 } else {
3729 /* RAM case */
3730 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3731 & TARGET_PAGE_MASK)
3732 + memory_region_section_addr(section, addr));
3733 switch (endian) {
3734 case DEVICE_LITTLE_ENDIAN:
3735 val = ldq_le_p(ptr);
3736 break;
3737 case DEVICE_BIG_ENDIAN:
3738 val = ldq_be_p(ptr);
3739 break;
3740 default:
3741 val = ldq_p(ptr);
3742 break;
3745 return val;
3748 uint64_t ldq_phys(hwaddr addr)
3750 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3753 uint64_t ldq_le_phys(hwaddr addr)
3755 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3758 uint64_t ldq_be_phys(hwaddr addr)
3760 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3763 /* XXX: optimize */
3764 uint32_t ldub_phys(hwaddr addr)
3766 uint8_t val;
3767 cpu_physical_memory_read(addr, &val, 1);
3768 return val;
3771 /* warning: addr must be aligned */
3772 static inline uint32_t lduw_phys_internal(hwaddr addr,
3773 enum device_endian endian)
3775 uint8_t *ptr;
3776 uint64_t val;
3777 MemoryRegionSection *section;
3779 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3781 if (!(memory_region_is_ram(section->mr) ||
3782 memory_region_is_romd(section->mr))) {
3783 /* I/O case */
3784 addr = memory_region_section_addr(section, addr);
3785 val = io_mem_read(section->mr, addr, 2);
3786 #if defined(TARGET_WORDS_BIGENDIAN)
3787 if (endian == DEVICE_LITTLE_ENDIAN) {
3788 val = bswap16(val);
3790 #else
3791 if (endian == DEVICE_BIG_ENDIAN) {
3792 val = bswap16(val);
3794 #endif
3795 } else {
3796 /* RAM case */
3797 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3798 & TARGET_PAGE_MASK)
3799 + memory_region_section_addr(section, addr));
3800 switch (endian) {
3801 case DEVICE_LITTLE_ENDIAN:
3802 val = lduw_le_p(ptr);
3803 break;
3804 case DEVICE_BIG_ENDIAN:
3805 val = lduw_be_p(ptr);
3806 break;
3807 default:
3808 val = lduw_p(ptr);
3809 break;
3812 return val;
3815 uint32_t lduw_phys(hwaddr addr)
3817 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3820 uint32_t lduw_le_phys(hwaddr addr)
3822 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3825 uint32_t lduw_be_phys(hwaddr addr)
3827 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3830 /* warning: addr must be aligned. The ram page is not masked as dirty
3831 and the code inside is not invalidated. It is useful if the dirty
3832 bits are used to track modified PTEs */
3833 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3835 uint8_t *ptr;
3836 MemoryRegionSection *section;
3838 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3840 if (!memory_region_is_ram(section->mr) || section->readonly) {
3841 addr = memory_region_section_addr(section, addr);
3842 if (memory_region_is_ram(section->mr)) {
3843 section = &phys_sections[phys_section_rom];
3845 io_mem_write(section->mr, addr, val, 4);
3846 } else {
3847 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3848 & TARGET_PAGE_MASK)
3849 + memory_region_section_addr(section, addr);
3850 ptr = qemu_get_ram_ptr(addr1);
3851 stl_p(ptr, val);
3853 if (unlikely(in_migration)) {
3854 if (!cpu_physical_memory_is_dirty(addr1)) {
3855 /* invalidate code */
3856 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3857 /* set dirty bit */
3858 cpu_physical_memory_set_dirty_flags(
3859 addr1, (0xff & ~CODE_DIRTY_FLAG));
3865 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3867 uint8_t *ptr;
3868 MemoryRegionSection *section;
3870 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3872 if (!memory_region_is_ram(section->mr) || section->readonly) {
3873 addr = memory_region_section_addr(section, addr);
3874 if (memory_region_is_ram(section->mr)) {
3875 section = &phys_sections[phys_section_rom];
3877 #ifdef TARGET_WORDS_BIGENDIAN
3878 io_mem_write(section->mr, addr, val >> 32, 4);
3879 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3880 #else
3881 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3882 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3883 #endif
3884 } else {
3885 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3886 & TARGET_PAGE_MASK)
3887 + memory_region_section_addr(section, addr));
3888 stq_p(ptr, val);
3892 /* warning: addr must be aligned */
3893 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3894 enum device_endian endian)
3896 uint8_t *ptr;
3897 MemoryRegionSection *section;
3899 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3901 if (!memory_region_is_ram(section->mr) || section->readonly) {
3902 addr = memory_region_section_addr(section, addr);
3903 if (memory_region_is_ram(section->mr)) {
3904 section = &phys_sections[phys_section_rom];
3906 #if defined(TARGET_WORDS_BIGENDIAN)
3907 if (endian == DEVICE_LITTLE_ENDIAN) {
3908 val = bswap32(val);
3910 #else
3911 if (endian == DEVICE_BIG_ENDIAN) {
3912 val = bswap32(val);
3914 #endif
3915 io_mem_write(section->mr, addr, val, 4);
3916 } else {
3917 unsigned long addr1;
3918 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3919 + memory_region_section_addr(section, addr);
3920 /* RAM case */
3921 ptr = qemu_get_ram_ptr(addr1);
3922 switch (endian) {
3923 case DEVICE_LITTLE_ENDIAN:
3924 stl_le_p(ptr, val);
3925 break;
3926 case DEVICE_BIG_ENDIAN:
3927 stl_be_p(ptr, val);
3928 break;
3929 default:
3930 stl_p(ptr, val);
3931 break;
3933 invalidate_and_set_dirty(addr1, 4);
3937 void stl_phys(hwaddr addr, uint32_t val)
3939 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3942 void stl_le_phys(hwaddr addr, uint32_t val)
3944 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3947 void stl_be_phys(hwaddr addr, uint32_t val)
3949 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3952 /* XXX: optimize */
3953 void stb_phys(hwaddr addr, uint32_t val)
3955 uint8_t v = val;
3956 cpu_physical_memory_write(addr, &v, 1);
3959 /* warning: addr must be aligned */
3960 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3961 enum device_endian endian)
3963 uint8_t *ptr;
3964 MemoryRegionSection *section;
3966 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3968 if (!memory_region_is_ram(section->mr) || section->readonly) {
3969 addr = memory_region_section_addr(section, addr);
3970 if (memory_region_is_ram(section->mr)) {
3971 section = &phys_sections[phys_section_rom];
3973 #if defined(TARGET_WORDS_BIGENDIAN)
3974 if (endian == DEVICE_LITTLE_ENDIAN) {
3975 val = bswap16(val);
3977 #else
3978 if (endian == DEVICE_BIG_ENDIAN) {
3979 val = bswap16(val);
3981 #endif
3982 io_mem_write(section->mr, addr, val, 2);
3983 } else {
3984 unsigned long addr1;
3985 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3986 + memory_region_section_addr(section, addr);
3987 /* RAM case */
3988 ptr = qemu_get_ram_ptr(addr1);
3989 switch (endian) {
3990 case DEVICE_LITTLE_ENDIAN:
3991 stw_le_p(ptr, val);
3992 break;
3993 case DEVICE_BIG_ENDIAN:
3994 stw_be_p(ptr, val);
3995 break;
3996 default:
3997 stw_p(ptr, val);
3998 break;
4000 invalidate_and_set_dirty(addr1, 2);
4004 void stw_phys(hwaddr addr, uint32_t val)
4006 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4009 void stw_le_phys(hwaddr addr, uint32_t val)
4011 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4014 void stw_be_phys(hwaddr addr, uint32_t val)
4016 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4019 /* XXX: optimize */
4020 void stq_phys(hwaddr addr, uint64_t val)
4022 val = tswap64(val);
4023 cpu_physical_memory_write(addr, &val, 8);
4026 void stq_le_phys(hwaddr addr, uint64_t val)
4028 val = cpu_to_le64(val);
4029 cpu_physical_memory_write(addr, &val, 8);
4032 void stq_be_phys(hwaddr addr, uint64_t val)
4034 val = cpu_to_be64(val);
4035 cpu_physical_memory_write(addr, &val, 8);
4038 /* virtual memory access for debug (includes writing to ROM) */
4039 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4040 uint8_t *buf, int len, int is_write)
4042 int l;
4043 hwaddr phys_addr;
4044 target_ulong page;
4046 while (len > 0) {
4047 page = addr & TARGET_PAGE_MASK;
4048 phys_addr = cpu_get_phys_page_debug(env, page);
4049 /* if no physical page mapped, return an error */
4050 if (phys_addr == -1)
4051 return -1;
4052 l = (page + TARGET_PAGE_SIZE) - addr;
4053 if (l > len)
4054 l = len;
4055 phys_addr += (addr & ~TARGET_PAGE_MASK);
4056 if (is_write)
4057 cpu_physical_memory_write_rom(phys_addr, buf, l);
4058 else
4059 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4060 len -= l;
4061 buf += l;
4062 addr += l;
4064 return 0;
4066 #endif
4068 /* in deterministic execution mode, instructions doing device I/Os
4069 must be at the end of the TB */
4070 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4072 TranslationBlock *tb;
4073 uint32_t n, cflags;
4074 target_ulong pc, cs_base;
4075 uint64_t flags;
4077 tb = tb_find_pc(retaddr);
4078 if (!tb) {
4079 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4080 (void *)retaddr);
4082 n = env->icount_decr.u16.low + tb->icount;
4083 cpu_restore_state(tb, env, retaddr);
4084 /* Calculate how many instructions had been executed before the fault
4085 occurred. */
4086 n = n - env->icount_decr.u16.low;
4087 /* Generate a new TB ending on the I/O insn. */
4088 n++;
4089 /* On MIPS and SH, delay slot instructions can only be restarted if
4090 they were already the first instruction in the TB. If this is not
4091 the first instruction in a TB then re-execute the preceding
4092 branch. */
4093 #if defined(TARGET_MIPS)
4094 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4095 env->active_tc.PC -= 4;
4096 env->icount_decr.u16.low++;
4097 env->hflags &= ~MIPS_HFLAG_BMASK;
4099 #elif defined(TARGET_SH4)
4100 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4101 && n > 1) {
4102 env->pc -= 2;
4103 env->icount_decr.u16.low++;
4104 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4106 #endif
4107 /* This should never happen. */
4108 if (n > CF_COUNT_MASK)
4109 cpu_abort(env, "TB too big during recompile");
4111 cflags = n | CF_LAST_IO;
4112 pc = tb->pc;
4113 cs_base = tb->cs_base;
4114 flags = tb->flags;
4115 tb_phys_invalidate(tb, -1);
4116 /* FIXME: In theory this could raise an exception. In practice
4117 we have already translated the block once so it's probably ok. */
4118 tb_gen_code(env, pc, cs_base, flags, cflags);
4119 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4120 the first in the TB) then we end up generating a whole new TB and
4121 repeating the fault, which is horribly inefficient.
4122 Better would be to execute just this insn uncached, or generate a
4123 second new TB. */
4124 cpu_resume_from_signal(env, NULL);
4127 #if !defined(CONFIG_USER_ONLY)
4129 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4131 int i, target_code_size, max_target_code_size;
4132 int direct_jmp_count, direct_jmp2_count, cross_page;
4133 TranslationBlock *tb;
4135 target_code_size = 0;
4136 max_target_code_size = 0;
4137 cross_page = 0;
4138 direct_jmp_count = 0;
4139 direct_jmp2_count = 0;
4140 for(i = 0; i < nb_tbs; i++) {
4141 tb = &tbs[i];
4142 target_code_size += tb->size;
4143 if (tb->size > max_target_code_size)
4144 max_target_code_size = tb->size;
4145 if (tb->page_addr[1] != -1)
4146 cross_page++;
4147 if (tb->tb_next_offset[0] != 0xffff) {
4148 direct_jmp_count++;
4149 if (tb->tb_next_offset[1] != 0xffff) {
4150 direct_jmp2_count++;
4154 /* XXX: avoid using doubles ? */
4155 cpu_fprintf(f, "Translation buffer state:\n");
4156 cpu_fprintf(f, "gen code size %td/%zd\n",
4157 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4158 cpu_fprintf(f, "TB count %d/%d\n",
4159 nb_tbs, code_gen_max_blocks);
4160 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4161 nb_tbs ? target_code_size / nb_tbs : 0,
4162 max_target_code_size);
4163 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4164 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4165 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4166 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4167 cross_page,
4168 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4169 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4170 direct_jmp_count,
4171 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4172 direct_jmp2_count,
4173 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4174 cpu_fprintf(f, "\nStatistics:\n");
4175 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4176 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4177 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4178 tcg_dump_info(f, cpu_fprintf);
4182 * A helper function for the _utterly broken_ virtio device model to find out if
4183 * it's running on a big endian machine. Don't do this at home kids!
4185 bool virtio_is_big_endian(void);
4186 bool virtio_is_big_endian(void)
4188 #if defined(TARGET_WORDS_BIGENDIAN)
4189 return true;
4190 #else
4191 return false;
4192 #endif
4195 #endif
4197 #ifndef CONFIG_USER_ONLY
4198 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4200 MemoryRegionSection *section;
4202 section = phys_page_find(address_space_memory.dispatch,
4203 phys_addr >> TARGET_PAGE_BITS);
4205 return !(memory_region_is_ram(section->mr) ||
4206 memory_region_is_romd(section->mr));
4208 #endif