pci-assign: Use PCI-2.3-based shared legacy interrupts
[qemu-kvm.git] / exec.c
blob1e5bbd6378a8ee2adaedb10ab7a1cad6252a24a3
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #define WANT_EXEC_OBSOLETE
61 #include "exec-obsolete.h"
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
118 static MemoryRegion *system_memory;
119 static MemoryRegion *system_io;
121 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
122 static MemoryRegion io_mem_subpage_ram;
124 #endif
126 CPUState *first_cpu;
127 /* current CPU in the current thread. It is only valid inside
128 cpu_exec() */
129 DEFINE_TLS(CPUState *,cpu_single_env);
130 /* 0 = Do not count executed instructions.
131 1 = Precise instruction counting.
132 2 = Adaptive rate instruction counting. */
133 int use_icount = 0;
135 typedef struct PageDesc {
136 /* list of TBs intersecting this ram page */
137 TranslationBlock *first_tb;
138 /* in order to optimize self modifying code, we count the number
139 of lookups we do to a given page to use a bitmap */
140 unsigned int code_write_count;
141 uint8_t *code_bitmap;
142 #if defined(CONFIG_USER_ONLY)
143 unsigned long flags;
144 #endif
145 } PageDesc;
147 /* In system mode we want L1_MAP to be based on ram offsets,
148 while in user mode we want it to be based on virtual addresses. */
149 #if !defined(CONFIG_USER_ONLY)
150 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
151 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
154 #endif
155 #else
156 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
157 #endif
159 /* Size of the L2 (and L3, etc) page tables. */
160 #define L2_BITS 10
161 #define L2_SIZE (1 << L2_BITS)
163 #define P_L2_LEVELS \
164 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
166 /* The bits remaining after N lower levels of page tables. */
167 #define V_L1_BITS_REM \
168 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
170 #if V_L1_BITS_REM < 4
171 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
172 #else
173 #define V_L1_BITS V_L1_BITS_REM
174 #endif
176 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
178 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
180 unsigned long qemu_real_host_page_size;
181 unsigned long qemu_host_page_size;
182 unsigned long qemu_host_page_mask;
184 /* This is a multi-level map on the virtual address space.
185 The bottom level has pointers to PageDesc. */
186 static void *l1_map[V_L1_SIZE];
188 #if !defined(CONFIG_USER_ONLY)
189 typedef struct PhysPageEntry PhysPageEntry;
191 static MemoryRegionSection *phys_sections;
192 static unsigned phys_sections_nb, phys_sections_nb_alloc;
193 static uint16_t phys_section_unassigned;
195 struct PhysPageEntry {
196 uint16_t is_leaf : 1;
197 /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
198 uint16_t ptr : 15;
201 /* Simple allocator for PhysPageEntry nodes */
202 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
203 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
205 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
207 /* This is a multi-level map on the physical address space.
208 The bottom level has pointers to MemoryRegionSections. */
209 static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
211 static void io_mem_init(void);
212 static void memory_map_init(void);
214 /* io memory support */
215 MemoryRegion *io_mem_region[IO_MEM_NB_ENTRIES];
216 static char io_mem_used[IO_MEM_NB_ENTRIES];
217 static MemoryRegion io_mem_watch;
218 #endif
220 /* log support */
221 #ifdef WIN32
222 static const char *logfilename = "qemu.log";
223 #else
224 static const char *logfilename = "/tmp/qemu.log";
225 #endif
226 FILE *logfile;
227 int loglevel;
228 static int log_append = 0;
230 /* statistics */
231 #if !defined(CONFIG_USER_ONLY)
232 static int tlb_flush_count;
233 #endif
234 static int tb_flush_count;
235 static int tb_phys_invalidate_count;
237 #ifdef _WIN32
238 static void map_exec(void *addr, long size)
240 DWORD old_protect;
241 VirtualProtect(addr, size,
242 PAGE_EXECUTE_READWRITE, &old_protect);
245 #else
246 static void map_exec(void *addr, long size)
248 unsigned long start, end, page_size;
250 page_size = getpagesize();
251 start = (unsigned long)addr;
252 start &= ~(page_size - 1);
254 end = (unsigned long)addr + size;
255 end += page_size - 1;
256 end &= ~(page_size - 1);
258 mprotect((void *)start, end - start,
259 PROT_READ | PROT_WRITE | PROT_EXEC);
261 #endif
263 static void page_init(void)
265 /* NOTE: we can always suppose that qemu_host_page_size >=
266 TARGET_PAGE_SIZE */
267 #ifdef _WIN32
269 SYSTEM_INFO system_info;
271 GetSystemInfo(&system_info);
272 qemu_real_host_page_size = system_info.dwPageSize;
274 #else
275 qemu_real_host_page_size = getpagesize();
276 #endif
277 if (qemu_host_page_size == 0)
278 qemu_host_page_size = qemu_real_host_page_size;
279 if (qemu_host_page_size < TARGET_PAGE_SIZE)
280 qemu_host_page_size = TARGET_PAGE_SIZE;
281 qemu_host_page_mask = ~(qemu_host_page_size - 1);
283 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
285 #ifdef HAVE_KINFO_GETVMMAP
286 struct kinfo_vmentry *freep;
287 int i, cnt;
289 freep = kinfo_getvmmap(getpid(), &cnt);
290 if (freep) {
291 mmap_lock();
292 for (i = 0; i < cnt; i++) {
293 unsigned long startaddr, endaddr;
295 startaddr = freep[i].kve_start;
296 endaddr = freep[i].kve_end;
297 if (h2g_valid(startaddr)) {
298 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
300 if (h2g_valid(endaddr)) {
301 endaddr = h2g(endaddr);
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
303 } else {
304 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
305 endaddr = ~0ul;
306 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
307 #endif
311 free(freep);
312 mmap_unlock();
314 #else
315 FILE *f;
317 last_brk = (unsigned long)sbrk(0);
319 f = fopen("/compat/linux/proc/self/maps", "r");
320 if (f) {
321 mmap_lock();
323 do {
324 unsigned long startaddr, endaddr;
325 int n;
327 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
329 if (n == 2 && h2g_valid(startaddr)) {
330 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
332 if (h2g_valid(endaddr)) {
333 endaddr = h2g(endaddr);
334 } else {
335 endaddr = ~0ul;
337 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
339 } while (!feof(f));
341 fclose(f);
342 mmap_unlock();
344 #endif
346 #endif
349 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
351 PageDesc *pd;
352 void **lp;
353 int i;
355 #if defined(CONFIG_USER_ONLY)
356 /* We can't use g_malloc because it may recurse into a locked mutex. */
357 # define ALLOC(P, SIZE) \
358 do { \
359 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
360 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
361 } while (0)
362 #else
363 # define ALLOC(P, SIZE) \
364 do { P = g_malloc0(SIZE); } while (0)
365 #endif
367 /* Level 1. Always allocated. */
368 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
370 /* Level 2..N-1. */
371 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
372 void **p = *lp;
374 if (p == NULL) {
375 if (!alloc) {
376 return NULL;
378 ALLOC(p, sizeof(void *) * L2_SIZE);
379 *lp = p;
382 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
385 pd = *lp;
386 if (pd == NULL) {
387 if (!alloc) {
388 return NULL;
390 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
391 *lp = pd;
394 #undef ALLOC
396 return pd + (index & (L2_SIZE - 1));
399 static inline PageDesc *page_find(tb_page_addr_t index)
401 return page_find_alloc(index, 0);
404 #if !defined(CONFIG_USER_ONLY)
406 static void phys_map_node_reserve(unsigned nodes)
408 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
409 typedef PhysPageEntry Node[L2_SIZE];
410 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
411 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
412 phys_map_nodes_nb + nodes);
413 phys_map_nodes = g_renew(Node, phys_map_nodes,
414 phys_map_nodes_nb_alloc);
418 static uint16_t phys_map_node_alloc(void)
420 unsigned i;
421 uint16_t ret;
423 ret = phys_map_nodes_nb++;
424 assert(ret != PHYS_MAP_NODE_NIL);
425 assert(ret != phys_map_nodes_nb_alloc);
426 for (i = 0; i < L2_SIZE; ++i) {
427 phys_map_nodes[ret][i].is_leaf = 0;
428 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
430 return ret;
433 static void phys_map_nodes_reset(void)
435 phys_map_nodes_nb = 0;
439 static void phys_page_set_level(PhysPageEntry *lp, target_phys_addr_t *index,
440 target_phys_addr_t *nb, uint16_t leaf,
441 int level)
443 PhysPageEntry *p;
444 int i;
445 target_phys_addr_t step = (target_phys_addr_t)1 << (level * L2_BITS);
447 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
448 lp->ptr = phys_map_node_alloc();
449 p = phys_map_nodes[lp->ptr];
450 if (level == 0) {
451 for (i = 0; i < L2_SIZE; i++) {
452 p[i].is_leaf = 1;
453 p[i].ptr = phys_section_unassigned;
456 } else {
457 p = phys_map_nodes[lp->ptr];
459 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
461 while (*nb && lp < &p[L2_SIZE]) {
462 if ((*index & (step - 1)) == 0 && *nb >= step) {
463 lp->is_leaf = true;
464 lp->ptr = leaf;
465 *index += step;
466 *nb -= step;
467 } else {
468 phys_page_set_level(lp, index, nb, leaf, level - 1);
470 ++lp;
474 static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
475 uint16_t leaf)
477 /* Wildly overreserve - it doesn't matter much. */
478 phys_map_node_reserve(3 * P_L2_LEVELS);
480 phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
483 static MemoryRegionSection phys_page_find(target_phys_addr_t index)
485 PhysPageEntry lp = phys_map;
486 PhysPageEntry *p;
487 int i;
488 MemoryRegionSection section;
489 target_phys_addr_t delta;
490 uint16_t s_index = phys_section_unassigned;
492 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
493 if (lp.ptr == PHYS_MAP_NODE_NIL) {
494 goto not_found;
496 p = phys_map_nodes[lp.ptr];
497 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
500 s_index = lp.ptr;
501 not_found:
502 section = phys_sections[s_index];
503 index <<= TARGET_PAGE_BITS;
504 assert(section.offset_within_address_space <= index
505 && index <= section.offset_within_address_space + section.size-1);
506 delta = index - section.offset_within_address_space;
507 section.offset_within_address_space += delta;
508 section.offset_within_region += delta;
509 section.size -= delta;
510 return section;
513 static void tlb_protect_code(ram_addr_t ram_addr);
514 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
515 target_ulong vaddr);
516 #define mmap_lock() do { } while(0)
517 #define mmap_unlock() do { } while(0)
518 #endif
520 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
522 #if defined(CONFIG_USER_ONLY)
523 /* Currently it is not recommended to allocate big chunks of data in
524 user mode. It will change when a dedicated libc will be used */
525 #define USE_STATIC_CODE_GEN_BUFFER
526 #endif
528 #ifdef USE_STATIC_CODE_GEN_BUFFER
529 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
530 __attribute__((aligned (CODE_GEN_ALIGN)));
531 #endif
533 static void code_gen_alloc(unsigned long tb_size)
535 #ifdef USE_STATIC_CODE_GEN_BUFFER
536 code_gen_buffer = static_code_gen_buffer;
537 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
538 map_exec(code_gen_buffer, code_gen_buffer_size);
539 #else
540 code_gen_buffer_size = tb_size;
541 if (code_gen_buffer_size == 0) {
542 #if defined(CONFIG_USER_ONLY)
543 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
544 #else
545 /* XXX: needs adjustments */
546 code_gen_buffer_size = (unsigned long)(ram_size / 4);
547 #endif
549 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
550 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
551 /* The code gen buffer location may have constraints depending on
552 the host cpu and OS */
553 #if defined(__linux__)
555 int flags;
556 void *start = NULL;
558 flags = MAP_PRIVATE | MAP_ANONYMOUS;
559 #if defined(__x86_64__)
560 flags |= MAP_32BIT;
561 /* Cannot map more than that */
562 if (code_gen_buffer_size > (800 * 1024 * 1024))
563 code_gen_buffer_size = (800 * 1024 * 1024);
564 #elif defined(__sparc_v9__)
565 // Map the buffer below 2G, so we can use direct calls and branches
566 flags |= MAP_FIXED;
567 start = (void *) 0x60000000UL;
568 if (code_gen_buffer_size > (512 * 1024 * 1024))
569 code_gen_buffer_size = (512 * 1024 * 1024);
570 #elif defined(__arm__)
571 /* Keep the buffer no bigger than 16MB to branch between blocks */
572 if (code_gen_buffer_size > 16 * 1024 * 1024)
573 code_gen_buffer_size = 16 * 1024 * 1024;
574 #elif defined(__s390x__)
575 /* Map the buffer so that we can use direct calls and branches. */
576 /* We have a +- 4GB range on the branches; leave some slop. */
577 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
578 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
580 start = (void *)0x90000000UL;
581 #endif
582 code_gen_buffer = mmap(start, code_gen_buffer_size,
583 PROT_WRITE | PROT_READ | PROT_EXEC,
584 flags, -1, 0);
585 if (code_gen_buffer == MAP_FAILED) {
586 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
587 exit(1);
590 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
591 || defined(__DragonFly__) || defined(__OpenBSD__) \
592 || defined(__NetBSD__)
594 int flags;
595 void *addr = NULL;
596 flags = MAP_PRIVATE | MAP_ANONYMOUS;
597 #if defined(__x86_64__)
598 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
599 * 0x40000000 is free */
600 flags |= MAP_FIXED;
601 addr = (void *)0x40000000;
602 /* Cannot map more than that */
603 if (code_gen_buffer_size > (800 * 1024 * 1024))
604 code_gen_buffer_size = (800 * 1024 * 1024);
605 #elif defined(__sparc_v9__)
606 // Map the buffer below 2G, so we can use direct calls and branches
607 flags |= MAP_FIXED;
608 addr = (void *) 0x60000000UL;
609 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
610 code_gen_buffer_size = (512 * 1024 * 1024);
612 #endif
613 code_gen_buffer = mmap(addr, code_gen_buffer_size,
614 PROT_WRITE | PROT_READ | PROT_EXEC,
615 flags, -1, 0);
616 if (code_gen_buffer == MAP_FAILED) {
617 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
618 exit(1);
621 #else
622 code_gen_buffer = g_malloc(code_gen_buffer_size);
623 map_exec(code_gen_buffer, code_gen_buffer_size);
624 #endif
625 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
626 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
627 code_gen_buffer_max_size = code_gen_buffer_size -
628 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
629 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
630 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
633 /* Must be called before using the QEMU cpus. 'tb_size' is the size
634 (in bytes) allocated to the translation buffer. Zero means default
635 size. */
636 void tcg_exec_init(unsigned long tb_size)
638 cpu_gen_init();
639 code_gen_alloc(tb_size);
640 code_gen_ptr = code_gen_buffer;
641 page_init();
642 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
643 /* There's no guest base to take into account, so go ahead and
644 initialize the prologue now. */
645 tcg_prologue_init(&tcg_ctx);
646 #endif
649 bool tcg_enabled(void)
651 return code_gen_buffer != NULL;
654 void cpu_exec_init_all(void)
656 #if !defined(CONFIG_USER_ONLY)
657 memory_map_init();
658 io_mem_init();
659 #endif
662 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
664 static int cpu_common_post_load(void *opaque, int version_id)
666 CPUState *env = opaque;
668 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
669 version_id is increased. */
670 env->interrupt_request &= ~0x01;
671 tlb_flush(env, 1);
673 return 0;
676 static const VMStateDescription vmstate_cpu_common = {
677 .name = "cpu_common",
678 .version_id = 1,
679 .minimum_version_id = 1,
680 .minimum_version_id_old = 1,
681 .post_load = cpu_common_post_load,
682 .fields = (VMStateField []) {
683 VMSTATE_UINT32(halted, CPUState),
684 VMSTATE_UINT32(interrupt_request, CPUState),
685 VMSTATE_END_OF_LIST()
688 #endif
690 CPUState *qemu_get_cpu(int cpu)
692 CPUState *env = first_cpu;
694 while (env) {
695 if (env->cpu_index == cpu)
696 break;
697 env = env->next_cpu;
700 return env;
703 void cpu_exec_init(CPUState *env)
705 CPUState **penv;
706 int cpu_index;
708 #if defined(CONFIG_USER_ONLY)
709 cpu_list_lock();
710 #endif
711 env->next_cpu = NULL;
712 penv = &first_cpu;
713 cpu_index = 0;
714 while (*penv != NULL) {
715 penv = &(*penv)->next_cpu;
716 cpu_index++;
718 env->cpu_index = cpu_index;
719 env->numa_node = 0;
720 QTAILQ_INIT(&env->breakpoints);
721 QTAILQ_INIT(&env->watchpoints);
722 #ifndef CONFIG_USER_ONLY
723 env->thread_id = qemu_get_thread_id();
724 #endif
725 *penv = env;
726 #if defined(CONFIG_USER_ONLY)
727 cpu_list_unlock();
728 #endif
729 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
730 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
731 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
732 cpu_save, cpu_load, env);
733 #endif
736 /* Allocate a new translation block. Flush the translation buffer if
737 too many translation blocks or too much generated code. */
738 static TranslationBlock *tb_alloc(target_ulong pc)
740 TranslationBlock *tb;
742 if (nb_tbs >= code_gen_max_blocks ||
743 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
744 return NULL;
745 tb = &tbs[nb_tbs++];
746 tb->pc = pc;
747 tb->cflags = 0;
748 return tb;
751 void tb_free(TranslationBlock *tb)
753 /* In practice this is mostly used for single use temporary TB
754 Ignore the hard cases and just back up if this TB happens to
755 be the last one generated. */
756 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
757 code_gen_ptr = tb->tc_ptr;
758 nb_tbs--;
762 static inline void invalidate_page_bitmap(PageDesc *p)
764 if (p->code_bitmap) {
765 g_free(p->code_bitmap);
766 p->code_bitmap = NULL;
768 p->code_write_count = 0;
771 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
773 static void page_flush_tb_1 (int level, void **lp)
775 int i;
777 if (*lp == NULL) {
778 return;
780 if (level == 0) {
781 PageDesc *pd = *lp;
782 for (i = 0; i < L2_SIZE; ++i) {
783 pd[i].first_tb = NULL;
784 invalidate_page_bitmap(pd + i);
786 } else {
787 void **pp = *lp;
788 for (i = 0; i < L2_SIZE; ++i) {
789 page_flush_tb_1 (level - 1, pp + i);
794 static void page_flush_tb(void)
796 int i;
797 for (i = 0; i < V_L1_SIZE; i++) {
798 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
802 /* flush all the translation blocks */
803 /* XXX: tb_flush is currently not thread safe */
804 void tb_flush(CPUState *env1)
806 CPUState *env;
807 #if defined(DEBUG_FLUSH)
808 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
809 (unsigned long)(code_gen_ptr - code_gen_buffer),
810 nb_tbs, nb_tbs > 0 ?
811 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
812 #endif
813 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
814 cpu_abort(env1, "Internal error: code buffer overflow\n");
816 nb_tbs = 0;
818 for(env = first_cpu; env != NULL; env = env->next_cpu) {
819 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
822 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
823 page_flush_tb();
825 code_gen_ptr = code_gen_buffer;
826 /* XXX: flush processor icache at this point if cache flush is
827 expensive */
828 tb_flush_count++;
831 #ifdef DEBUG_TB_CHECK
833 static void tb_invalidate_check(target_ulong address)
835 TranslationBlock *tb;
836 int i;
837 address &= TARGET_PAGE_MASK;
838 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
839 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
840 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
841 address >= tb->pc + tb->size)) {
842 printf("ERROR invalidate: address=" TARGET_FMT_lx
843 " PC=%08lx size=%04x\n",
844 address, (long)tb->pc, tb->size);
850 /* verify that all the pages have correct rights for code */
851 static void tb_page_check(void)
853 TranslationBlock *tb;
854 int i, flags1, flags2;
856 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
857 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
858 flags1 = page_get_flags(tb->pc);
859 flags2 = page_get_flags(tb->pc + tb->size - 1);
860 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
861 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
862 (long)tb->pc, tb->size, flags1, flags2);
868 #endif
870 /* invalidate one TB */
871 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
872 int next_offset)
874 TranslationBlock *tb1;
875 for(;;) {
876 tb1 = *ptb;
877 if (tb1 == tb) {
878 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
879 break;
881 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
885 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
887 TranslationBlock *tb1;
888 unsigned int n1;
890 for(;;) {
891 tb1 = *ptb;
892 n1 = (long)tb1 & 3;
893 tb1 = (TranslationBlock *)((long)tb1 & ~3);
894 if (tb1 == tb) {
895 *ptb = tb1->page_next[n1];
896 break;
898 ptb = &tb1->page_next[n1];
902 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
904 TranslationBlock *tb1, **ptb;
905 unsigned int n1;
907 ptb = &tb->jmp_next[n];
908 tb1 = *ptb;
909 if (tb1) {
910 /* find tb(n) in circular list */
911 for(;;) {
912 tb1 = *ptb;
913 n1 = (long)tb1 & 3;
914 tb1 = (TranslationBlock *)((long)tb1 & ~3);
915 if (n1 == n && tb1 == tb)
916 break;
917 if (n1 == 2) {
918 ptb = &tb1->jmp_first;
919 } else {
920 ptb = &tb1->jmp_next[n1];
923 /* now we can suppress tb(n) from the list */
924 *ptb = tb->jmp_next[n];
926 tb->jmp_next[n] = NULL;
930 /* reset the jump entry 'n' of a TB so that it is not chained to
931 another TB */
932 static inline void tb_reset_jump(TranslationBlock *tb, int n)
934 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
937 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
939 CPUState *env;
940 PageDesc *p;
941 unsigned int h, n1;
942 tb_page_addr_t phys_pc;
943 TranslationBlock *tb1, *tb2;
945 /* remove the TB from the hash list */
946 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
947 h = tb_phys_hash_func(phys_pc);
948 tb_remove(&tb_phys_hash[h], tb,
949 offsetof(TranslationBlock, phys_hash_next));
951 /* remove the TB from the page list */
952 if (tb->page_addr[0] != page_addr) {
953 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
954 tb_page_remove(&p->first_tb, tb);
955 invalidate_page_bitmap(p);
957 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
958 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
959 tb_page_remove(&p->first_tb, tb);
960 invalidate_page_bitmap(p);
963 tb_invalidated_flag = 1;
965 /* remove the TB from the hash list */
966 h = tb_jmp_cache_hash_func(tb->pc);
967 for(env = first_cpu; env != NULL; env = env->next_cpu) {
968 if (env->tb_jmp_cache[h] == tb)
969 env->tb_jmp_cache[h] = NULL;
972 /* suppress this TB from the two jump lists */
973 tb_jmp_remove(tb, 0);
974 tb_jmp_remove(tb, 1);
976 /* suppress any remaining jumps to this TB */
977 tb1 = tb->jmp_first;
978 for(;;) {
979 n1 = (long)tb1 & 3;
980 if (n1 == 2)
981 break;
982 tb1 = (TranslationBlock *)((long)tb1 & ~3);
983 tb2 = tb1->jmp_next[n1];
984 tb_reset_jump(tb1, n1);
985 tb1->jmp_next[n1] = NULL;
986 tb1 = tb2;
988 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
990 tb_phys_invalidate_count++;
993 static inline void set_bits(uint8_t *tab, int start, int len)
995 int end, mask, end1;
997 end = start + len;
998 tab += start >> 3;
999 mask = 0xff << (start & 7);
1000 if ((start & ~7) == (end & ~7)) {
1001 if (start < end) {
1002 mask &= ~(0xff << (end & 7));
1003 *tab |= mask;
1005 } else {
1006 *tab++ |= mask;
1007 start = (start + 8) & ~7;
1008 end1 = end & ~7;
1009 while (start < end1) {
1010 *tab++ = 0xff;
1011 start += 8;
1013 if (start < end) {
1014 mask = ~(0xff << (end & 7));
1015 *tab |= mask;
1020 static void build_page_bitmap(PageDesc *p)
1022 int n, tb_start, tb_end;
1023 TranslationBlock *tb;
1025 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1027 tb = p->first_tb;
1028 while (tb != NULL) {
1029 n = (long)tb & 3;
1030 tb = (TranslationBlock *)((long)tb & ~3);
1031 /* NOTE: this is subtle as a TB may span two physical pages */
1032 if (n == 0) {
1033 /* NOTE: tb_end may be after the end of the page, but
1034 it is not a problem */
1035 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1036 tb_end = tb_start + tb->size;
1037 if (tb_end > TARGET_PAGE_SIZE)
1038 tb_end = TARGET_PAGE_SIZE;
1039 } else {
1040 tb_start = 0;
1041 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1043 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1044 tb = tb->page_next[n];
1048 TranslationBlock *tb_gen_code(CPUState *env,
1049 target_ulong pc, target_ulong cs_base,
1050 int flags, int cflags)
1052 TranslationBlock *tb;
1053 uint8_t *tc_ptr;
1054 tb_page_addr_t phys_pc, phys_page2;
1055 target_ulong virt_page2;
1056 int code_gen_size;
1058 phys_pc = get_page_addr_code(env, pc);
1059 tb = tb_alloc(pc);
1060 if (!tb) {
1061 /* flush must be done */
1062 tb_flush(env);
1063 /* cannot fail at this point */
1064 tb = tb_alloc(pc);
1065 /* Don't forget to invalidate previous TB info. */
1066 tb_invalidated_flag = 1;
1068 tc_ptr = code_gen_ptr;
1069 tb->tc_ptr = tc_ptr;
1070 tb->cs_base = cs_base;
1071 tb->flags = flags;
1072 tb->cflags = cflags;
1073 cpu_gen_code(env, tb, &code_gen_size);
1074 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1076 /* check next page if needed */
1077 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1078 phys_page2 = -1;
1079 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1080 phys_page2 = get_page_addr_code(env, virt_page2);
1082 tb_link_page(tb, phys_pc, phys_page2);
1083 return tb;
1086 /* invalidate all TBs which intersect with the target physical page
1087 starting in range [start;end[. NOTE: start and end must refer to
1088 the same physical page. 'is_cpu_write_access' should be true if called
1089 from a real cpu write access: the virtual CPU will exit the current
1090 TB if code is modified inside this TB. */
1091 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1092 int is_cpu_write_access)
1094 TranslationBlock *tb, *tb_next, *saved_tb;
1095 CPUState *env = cpu_single_env;
1096 tb_page_addr_t tb_start, tb_end;
1097 PageDesc *p;
1098 int n;
1099 #ifdef TARGET_HAS_PRECISE_SMC
1100 int current_tb_not_found = is_cpu_write_access;
1101 TranslationBlock *current_tb = NULL;
1102 int current_tb_modified = 0;
1103 target_ulong current_pc = 0;
1104 target_ulong current_cs_base = 0;
1105 int current_flags = 0;
1106 #endif /* TARGET_HAS_PRECISE_SMC */
1108 p = page_find(start >> TARGET_PAGE_BITS);
1109 if (!p)
1110 return;
1111 if (!p->code_bitmap &&
1112 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1113 is_cpu_write_access) {
1114 /* build code bitmap */
1115 build_page_bitmap(p);
1118 /* we remove all the TBs in the range [start, end[ */
1119 /* XXX: see if in some cases it could be faster to invalidate all the code */
1120 tb = p->first_tb;
1121 while (tb != NULL) {
1122 n = (long)tb & 3;
1123 tb = (TranslationBlock *)((long)tb & ~3);
1124 tb_next = tb->page_next[n];
1125 /* NOTE: this is subtle as a TB may span two physical pages */
1126 if (n == 0) {
1127 /* NOTE: tb_end may be after the end of the page, but
1128 it is not a problem */
1129 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1130 tb_end = tb_start + tb->size;
1131 } else {
1132 tb_start = tb->page_addr[1];
1133 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1135 if (!(tb_end <= start || tb_start >= end)) {
1136 #ifdef TARGET_HAS_PRECISE_SMC
1137 if (current_tb_not_found) {
1138 current_tb_not_found = 0;
1139 current_tb = NULL;
1140 if (env->mem_io_pc) {
1141 /* now we have a real cpu fault */
1142 current_tb = tb_find_pc(env->mem_io_pc);
1145 if (current_tb == tb &&
1146 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1147 /* If we are modifying the current TB, we must stop
1148 its execution. We could be more precise by checking
1149 that the modification is after the current PC, but it
1150 would require a specialized function to partially
1151 restore the CPU state */
1153 current_tb_modified = 1;
1154 cpu_restore_state(current_tb, env, env->mem_io_pc);
1155 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1156 &current_flags);
1158 #endif /* TARGET_HAS_PRECISE_SMC */
1159 /* we need to do that to handle the case where a signal
1160 occurs while doing tb_phys_invalidate() */
1161 saved_tb = NULL;
1162 if (env) {
1163 saved_tb = env->current_tb;
1164 env->current_tb = NULL;
1166 tb_phys_invalidate(tb, -1);
1167 if (env) {
1168 env->current_tb = saved_tb;
1169 if (env->interrupt_request && env->current_tb)
1170 cpu_interrupt(env, env->interrupt_request);
1173 tb = tb_next;
1175 #if !defined(CONFIG_USER_ONLY)
1176 /* if no code remaining, no need to continue to use slow writes */
1177 if (!p->first_tb) {
1178 invalidate_page_bitmap(p);
1179 if (is_cpu_write_access) {
1180 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1183 #endif
1184 #ifdef TARGET_HAS_PRECISE_SMC
1185 if (current_tb_modified) {
1186 /* we generate a block containing just the instruction
1187 modifying the memory. It will ensure that it cannot modify
1188 itself */
1189 env->current_tb = NULL;
1190 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1191 cpu_resume_from_signal(env, NULL);
1193 #endif
1196 /* len must be <= 8 and start must be a multiple of len */
1197 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1199 PageDesc *p;
1200 int offset, b;
1201 #if 0
1202 if (1) {
1203 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1204 cpu_single_env->mem_io_vaddr, len,
1205 cpu_single_env->eip,
1206 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1208 #endif
1209 p = page_find(start >> TARGET_PAGE_BITS);
1210 if (!p)
1211 return;
1212 if (p->code_bitmap) {
1213 offset = start & ~TARGET_PAGE_MASK;
1214 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1215 if (b & ((1 << len) - 1))
1216 goto do_invalidate;
1217 } else {
1218 do_invalidate:
1219 tb_invalidate_phys_page_range(start, start + len, 1);
1223 #if !defined(CONFIG_SOFTMMU)
1224 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1225 unsigned long pc, void *puc)
1227 TranslationBlock *tb;
1228 PageDesc *p;
1229 int n;
1230 #ifdef TARGET_HAS_PRECISE_SMC
1231 TranslationBlock *current_tb = NULL;
1232 CPUState *env = cpu_single_env;
1233 int current_tb_modified = 0;
1234 target_ulong current_pc = 0;
1235 target_ulong current_cs_base = 0;
1236 int current_flags = 0;
1237 #endif
1239 addr &= TARGET_PAGE_MASK;
1240 p = page_find(addr >> TARGET_PAGE_BITS);
1241 if (!p)
1242 return;
1243 tb = p->first_tb;
1244 #ifdef TARGET_HAS_PRECISE_SMC
1245 if (tb && pc != 0) {
1246 current_tb = tb_find_pc(pc);
1248 #endif
1249 while (tb != NULL) {
1250 n = (long)tb & 3;
1251 tb = (TranslationBlock *)((long)tb & ~3);
1252 #ifdef TARGET_HAS_PRECISE_SMC
1253 if (current_tb == tb &&
1254 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1255 /* If we are modifying the current TB, we must stop
1256 its execution. We could be more precise by checking
1257 that the modification is after the current PC, but it
1258 would require a specialized function to partially
1259 restore the CPU state */
1261 current_tb_modified = 1;
1262 cpu_restore_state(current_tb, env, pc);
1263 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1264 &current_flags);
1266 #endif /* TARGET_HAS_PRECISE_SMC */
1267 tb_phys_invalidate(tb, addr);
1268 tb = tb->page_next[n];
1270 p->first_tb = NULL;
1271 #ifdef TARGET_HAS_PRECISE_SMC
1272 if (current_tb_modified) {
1273 /* we generate a block containing just the instruction
1274 modifying the memory. It will ensure that it cannot modify
1275 itself */
1276 env->current_tb = NULL;
1277 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1278 cpu_resume_from_signal(env, puc);
1280 #endif
1282 #endif
1284 /* add the tb in the target page and protect it if necessary */
1285 static inline void tb_alloc_page(TranslationBlock *tb,
1286 unsigned int n, tb_page_addr_t page_addr)
1288 PageDesc *p;
1289 #ifndef CONFIG_USER_ONLY
1290 bool page_already_protected;
1291 #endif
1293 tb->page_addr[n] = page_addr;
1294 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1295 tb->page_next[n] = p->first_tb;
1296 #ifndef CONFIG_USER_ONLY
1297 page_already_protected = p->first_tb != NULL;
1298 #endif
1299 p->first_tb = (TranslationBlock *)((long)tb | n);
1300 invalidate_page_bitmap(p);
1302 #if defined(TARGET_HAS_SMC) || 1
1304 #if defined(CONFIG_USER_ONLY)
1305 if (p->flags & PAGE_WRITE) {
1306 target_ulong addr;
1307 PageDesc *p2;
1308 int prot;
1310 /* force the host page as non writable (writes will have a
1311 page fault + mprotect overhead) */
1312 page_addr &= qemu_host_page_mask;
1313 prot = 0;
1314 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1315 addr += TARGET_PAGE_SIZE) {
1317 p2 = page_find (addr >> TARGET_PAGE_BITS);
1318 if (!p2)
1319 continue;
1320 prot |= p2->flags;
1321 p2->flags &= ~PAGE_WRITE;
1323 mprotect(g2h(page_addr), qemu_host_page_size,
1324 (prot & PAGE_BITS) & ~PAGE_WRITE);
1325 #ifdef DEBUG_TB_INVALIDATE
1326 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1327 page_addr);
1328 #endif
1330 #else
1331 /* if some code is already present, then the pages are already
1332 protected. So we handle the case where only the first TB is
1333 allocated in a physical page */
1334 if (!page_already_protected) {
1335 tlb_protect_code(page_addr);
1337 #endif
1339 #endif /* TARGET_HAS_SMC */
1342 /* add a new TB and link it to the physical page tables. phys_page2 is
1343 (-1) to indicate that only one page contains the TB. */
1344 void tb_link_page(TranslationBlock *tb,
1345 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1347 unsigned int h;
1348 TranslationBlock **ptb;
1350 /* Grab the mmap lock to stop another thread invalidating this TB
1351 before we are done. */
1352 mmap_lock();
1353 /* add in the physical hash table */
1354 h = tb_phys_hash_func(phys_pc);
1355 ptb = &tb_phys_hash[h];
1356 tb->phys_hash_next = *ptb;
1357 *ptb = tb;
1359 /* add in the page list */
1360 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1361 if (phys_page2 != -1)
1362 tb_alloc_page(tb, 1, phys_page2);
1363 else
1364 tb->page_addr[1] = -1;
1366 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1367 tb->jmp_next[0] = NULL;
1368 tb->jmp_next[1] = NULL;
1370 /* init original jump addresses */
1371 if (tb->tb_next_offset[0] != 0xffff)
1372 tb_reset_jump(tb, 0);
1373 if (tb->tb_next_offset[1] != 0xffff)
1374 tb_reset_jump(tb, 1);
1376 #ifdef DEBUG_TB_CHECK
1377 tb_page_check();
1378 #endif
1379 mmap_unlock();
1382 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1383 tb[1].tc_ptr. Return NULL if not found */
1384 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1386 int m_min, m_max, m;
1387 unsigned long v;
1388 TranslationBlock *tb;
1390 if (nb_tbs <= 0)
1391 return NULL;
1392 if (tc_ptr < (unsigned long)code_gen_buffer ||
1393 tc_ptr >= (unsigned long)code_gen_ptr)
1394 return NULL;
1395 /* binary search (cf Knuth) */
1396 m_min = 0;
1397 m_max = nb_tbs - 1;
1398 while (m_min <= m_max) {
1399 m = (m_min + m_max) >> 1;
1400 tb = &tbs[m];
1401 v = (unsigned long)tb->tc_ptr;
1402 if (v == tc_ptr)
1403 return tb;
1404 else if (tc_ptr < v) {
1405 m_max = m - 1;
1406 } else {
1407 m_min = m + 1;
1410 return &tbs[m_max];
1413 static void tb_reset_jump_recursive(TranslationBlock *tb);
1415 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1417 TranslationBlock *tb1, *tb_next, **ptb;
1418 unsigned int n1;
1420 tb1 = tb->jmp_next[n];
1421 if (tb1 != NULL) {
1422 /* find head of list */
1423 for(;;) {
1424 n1 = (long)tb1 & 3;
1425 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1426 if (n1 == 2)
1427 break;
1428 tb1 = tb1->jmp_next[n1];
1430 /* we are now sure now that tb jumps to tb1 */
1431 tb_next = tb1;
1433 /* remove tb from the jmp_first list */
1434 ptb = &tb_next->jmp_first;
1435 for(;;) {
1436 tb1 = *ptb;
1437 n1 = (long)tb1 & 3;
1438 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1439 if (n1 == n && tb1 == tb)
1440 break;
1441 ptb = &tb1->jmp_next[n1];
1443 *ptb = tb->jmp_next[n];
1444 tb->jmp_next[n] = NULL;
1446 /* suppress the jump to next tb in generated code */
1447 tb_reset_jump(tb, n);
1449 /* suppress jumps in the tb on which we could have jumped */
1450 tb_reset_jump_recursive(tb_next);
1454 static void tb_reset_jump_recursive(TranslationBlock *tb)
1456 tb_reset_jump_recursive2(tb, 0);
1457 tb_reset_jump_recursive2(tb, 1);
1460 #if defined(TARGET_HAS_ICE)
1461 #if defined(CONFIG_USER_ONLY)
1462 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1464 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1466 #else
1467 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1469 target_phys_addr_t addr;
1470 ram_addr_t ram_addr;
1471 MemoryRegionSection section;
1473 addr = cpu_get_phys_page_debug(env, pc);
1474 section = phys_page_find(addr >> TARGET_PAGE_BITS);
1475 if (!(memory_region_is_ram(section.mr)
1476 || (section.mr->rom_device && section.mr->readable))) {
1477 return;
1479 ram_addr = (memory_region_get_ram_addr(section.mr)
1480 + section.offset_within_region) & TARGET_PAGE_MASK;
1481 ram_addr |= (pc & ~TARGET_PAGE_MASK);
1482 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1484 #endif
1485 #endif /* TARGET_HAS_ICE */
1487 #if defined(CONFIG_USER_ONLY)
1488 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1493 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1494 int flags, CPUWatchpoint **watchpoint)
1496 return -ENOSYS;
1498 #else
1499 /* Add a watchpoint. */
1500 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1501 int flags, CPUWatchpoint **watchpoint)
1503 target_ulong len_mask = ~(len - 1);
1504 CPUWatchpoint *wp;
1506 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1507 if ((len & (len - 1)) || (addr & ~len_mask) ||
1508 len == 0 || len > TARGET_PAGE_SIZE) {
1509 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1510 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1511 return -EINVAL;
1513 wp = g_malloc(sizeof(*wp));
1515 wp->vaddr = addr;
1516 wp->len_mask = len_mask;
1517 wp->flags = flags;
1519 /* keep all GDB-injected watchpoints in front */
1520 if (flags & BP_GDB)
1521 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1522 else
1523 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1525 tlb_flush_page(env, addr);
1527 if (watchpoint)
1528 *watchpoint = wp;
1529 return 0;
1532 /* Remove a specific watchpoint. */
1533 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1534 int flags)
1536 target_ulong len_mask = ~(len - 1);
1537 CPUWatchpoint *wp;
1539 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1540 if (addr == wp->vaddr && len_mask == wp->len_mask
1541 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1542 cpu_watchpoint_remove_by_ref(env, wp);
1543 return 0;
1546 return -ENOENT;
1549 /* Remove a specific watchpoint by reference. */
1550 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1552 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1554 tlb_flush_page(env, watchpoint->vaddr);
1556 g_free(watchpoint);
1559 /* Remove all matching watchpoints. */
1560 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1562 CPUWatchpoint *wp, *next;
1564 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1565 if (wp->flags & mask)
1566 cpu_watchpoint_remove_by_ref(env, wp);
1569 #endif
1571 /* Add a breakpoint. */
1572 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1573 CPUBreakpoint **breakpoint)
1575 #if defined(TARGET_HAS_ICE)
1576 CPUBreakpoint *bp;
1578 bp = g_malloc(sizeof(*bp));
1580 bp->pc = pc;
1581 bp->flags = flags;
1583 /* keep all GDB-injected breakpoints in front */
1584 if (flags & BP_GDB)
1585 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1586 else
1587 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1589 breakpoint_invalidate(env, pc);
1591 if (breakpoint)
1592 *breakpoint = bp;
1593 return 0;
1594 #else
1595 return -ENOSYS;
1596 #endif
1599 /* Remove a specific breakpoint. */
1600 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1602 #if defined(TARGET_HAS_ICE)
1603 CPUBreakpoint *bp;
1605 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1606 if (bp->pc == pc && bp->flags == flags) {
1607 cpu_breakpoint_remove_by_ref(env, bp);
1608 return 0;
1611 return -ENOENT;
1612 #else
1613 return -ENOSYS;
1614 #endif
1617 /* Remove a specific breakpoint by reference. */
1618 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1620 #if defined(TARGET_HAS_ICE)
1621 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1623 breakpoint_invalidate(env, breakpoint->pc);
1625 g_free(breakpoint);
1626 #endif
1629 /* Remove all matching breakpoints. */
1630 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1632 #if defined(TARGET_HAS_ICE)
1633 CPUBreakpoint *bp, *next;
1635 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1636 if (bp->flags & mask)
1637 cpu_breakpoint_remove_by_ref(env, bp);
1639 #endif
1642 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1643 CPU loop after each instruction */
1644 void cpu_single_step(CPUState *env, int enabled)
1646 #if defined(TARGET_HAS_ICE)
1647 if (env->singlestep_enabled != enabled) {
1648 env->singlestep_enabled = enabled;
1649 if (kvm_enabled())
1650 kvm_update_guest_debug(env, 0);
1651 else {
1652 /* must flush all the translated code to avoid inconsistencies */
1653 /* XXX: only flush what is necessary */
1654 tb_flush(env);
1657 #endif
1660 /* enable or disable low levels log */
1661 void cpu_set_log(int log_flags)
1663 loglevel = log_flags;
1664 if (loglevel && !logfile) {
1665 logfile = fopen(logfilename, log_append ? "a" : "w");
1666 if (!logfile) {
1667 perror(logfilename);
1668 _exit(1);
1670 #if !defined(CONFIG_SOFTMMU)
1671 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1673 static char logfile_buf[4096];
1674 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1676 #elif defined(_WIN32)
1677 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1678 setvbuf(logfile, NULL, _IONBF, 0);
1679 #else
1680 setvbuf(logfile, NULL, _IOLBF, 0);
1681 #endif
1682 log_append = 1;
1684 if (!loglevel && logfile) {
1685 fclose(logfile);
1686 logfile = NULL;
1690 void cpu_set_log_filename(const char *filename)
1692 logfilename = strdup(filename);
1693 if (logfile) {
1694 fclose(logfile);
1695 logfile = NULL;
1697 cpu_set_log(loglevel);
1700 static void cpu_unlink_tb(CPUState *env)
1702 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1703 problem and hope the cpu will stop of its own accord. For userspace
1704 emulation this often isn't actually as bad as it sounds. Often
1705 signals are used primarily to interrupt blocking syscalls. */
1706 TranslationBlock *tb;
1707 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1709 spin_lock(&interrupt_lock);
1710 tb = env->current_tb;
1711 /* if the cpu is currently executing code, we must unlink it and
1712 all the potentially executing TB */
1713 if (tb) {
1714 env->current_tb = NULL;
1715 tb_reset_jump_recursive(tb);
1717 spin_unlock(&interrupt_lock);
1720 #ifndef CONFIG_USER_ONLY
1721 /* mask must never be zero, except for A20 change call */
1722 static void tcg_handle_interrupt(CPUState *env, int mask)
1724 int old_mask;
1726 old_mask = env->interrupt_request;
1727 env->interrupt_request |= mask;
1730 * If called from iothread context, wake the target cpu in
1731 * case its halted.
1733 if (!qemu_cpu_is_self(env)) {
1734 qemu_cpu_kick(env);
1735 return;
1738 if (use_icount) {
1739 env->icount_decr.u16.high = 0xffff;
1740 if (!can_do_io(env)
1741 && (mask & ~old_mask) != 0) {
1742 cpu_abort(env, "Raised interrupt while not in I/O function");
1744 } else {
1745 cpu_unlink_tb(env);
1749 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1751 #else /* CONFIG_USER_ONLY */
1753 void cpu_interrupt(CPUState *env, int mask)
1755 env->interrupt_request |= mask;
1756 cpu_unlink_tb(env);
1758 #endif /* CONFIG_USER_ONLY */
1760 void cpu_reset_interrupt(CPUState *env, int mask)
1762 env->interrupt_request &= ~mask;
1765 void cpu_exit(CPUState *env)
1767 env->exit_request = 1;
1768 cpu_unlink_tb(env);
1771 const CPULogItem cpu_log_items[] = {
1772 { CPU_LOG_TB_OUT_ASM, "out_asm",
1773 "show generated host assembly code for each compiled TB" },
1774 { CPU_LOG_TB_IN_ASM, "in_asm",
1775 "show target assembly code for each compiled TB" },
1776 { CPU_LOG_TB_OP, "op",
1777 "show micro ops for each compiled TB" },
1778 { CPU_LOG_TB_OP_OPT, "op_opt",
1779 "show micro ops "
1780 #ifdef TARGET_I386
1781 "before eflags optimization and "
1782 #endif
1783 "after liveness analysis" },
1784 { CPU_LOG_INT, "int",
1785 "show interrupts/exceptions in short format" },
1786 { CPU_LOG_EXEC, "exec",
1787 "show trace before each executed TB (lots of logs)" },
1788 { CPU_LOG_TB_CPU, "cpu",
1789 "show CPU state before block translation" },
1790 #ifdef TARGET_I386
1791 { CPU_LOG_PCALL, "pcall",
1792 "show protected mode far calls/returns/exceptions" },
1793 { CPU_LOG_RESET, "cpu_reset",
1794 "show CPU state before CPU resets" },
1795 #endif
1796 #ifdef DEBUG_IOPORT
1797 { CPU_LOG_IOPORT, "ioport",
1798 "show all i/o ports accesses" },
1799 #endif
1800 { 0, NULL, NULL },
1803 static int cmp1(const char *s1, int n, const char *s2)
1805 if (strlen(s2) != n)
1806 return 0;
1807 return memcmp(s1, s2, n) == 0;
1810 /* takes a comma separated list of log masks. Return 0 if error. */
1811 int cpu_str_to_log_mask(const char *str)
1813 const CPULogItem *item;
1814 int mask;
1815 const char *p, *p1;
1817 p = str;
1818 mask = 0;
1819 for(;;) {
1820 p1 = strchr(p, ',');
1821 if (!p1)
1822 p1 = p + strlen(p);
1823 if(cmp1(p,p1-p,"all")) {
1824 for(item = cpu_log_items; item->mask != 0; item++) {
1825 mask |= item->mask;
1827 } else {
1828 for(item = cpu_log_items; item->mask != 0; item++) {
1829 if (cmp1(p, p1 - p, item->name))
1830 goto found;
1832 return 0;
1834 found:
1835 mask |= item->mask;
1836 if (*p1 != ',')
1837 break;
1838 p = p1 + 1;
1840 return mask;
1843 void cpu_abort(CPUState *env, const char *fmt, ...)
1845 va_list ap;
1846 va_list ap2;
1848 va_start(ap, fmt);
1849 va_copy(ap2, ap);
1850 fprintf(stderr, "qemu: fatal: ");
1851 vfprintf(stderr, fmt, ap);
1852 fprintf(stderr, "\n");
1853 #ifdef TARGET_I386
1854 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1855 #else
1856 cpu_dump_state(env, stderr, fprintf, 0);
1857 #endif
1858 if (qemu_log_enabled()) {
1859 qemu_log("qemu: fatal: ");
1860 qemu_log_vprintf(fmt, ap2);
1861 qemu_log("\n");
1862 #ifdef TARGET_I386
1863 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1864 #else
1865 log_cpu_state(env, 0);
1866 #endif
1867 qemu_log_flush();
1868 qemu_log_close();
1870 va_end(ap2);
1871 va_end(ap);
1872 #if defined(CONFIG_USER_ONLY)
1874 struct sigaction act;
1875 sigfillset(&act.sa_mask);
1876 act.sa_handler = SIG_DFL;
1877 sigaction(SIGABRT, &act, NULL);
1879 #endif
1880 abort();
1883 CPUState *cpu_copy(CPUState *env)
1885 CPUState *new_env = cpu_init(env->cpu_model_str);
1886 CPUState *next_cpu = new_env->next_cpu;
1887 int cpu_index = new_env->cpu_index;
1888 #if defined(TARGET_HAS_ICE)
1889 CPUBreakpoint *bp;
1890 CPUWatchpoint *wp;
1891 #endif
1893 memcpy(new_env, env, sizeof(CPUState));
1895 /* Preserve chaining and index. */
1896 new_env->next_cpu = next_cpu;
1897 new_env->cpu_index = cpu_index;
1899 /* Clone all break/watchpoints.
1900 Note: Once we support ptrace with hw-debug register access, make sure
1901 BP_CPU break/watchpoints are handled correctly on clone. */
1902 QTAILQ_INIT(&env->breakpoints);
1903 QTAILQ_INIT(&env->watchpoints);
1904 #if defined(TARGET_HAS_ICE)
1905 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1906 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1908 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1909 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1910 wp->flags, NULL);
1912 #endif
1914 return new_env;
1917 #if !defined(CONFIG_USER_ONLY)
1919 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1921 unsigned int i;
1923 /* Discard jump cache entries for any tb which might potentially
1924 overlap the flushed page. */
1925 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1926 memset (&env->tb_jmp_cache[i], 0,
1927 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1929 i = tb_jmp_cache_hash_page(addr);
1930 memset (&env->tb_jmp_cache[i], 0,
1931 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1934 static CPUTLBEntry s_cputlb_empty_entry = {
1935 .addr_read = -1,
1936 .addr_write = -1,
1937 .addr_code = -1,
1938 .addend = -1,
1941 /* NOTE:
1942 * If flush_global is true (the usual case), flush all tlb entries.
1943 * If flush_global is false, flush (at least) all tlb entries not
1944 * marked global.
1946 * Since QEMU doesn't currently implement a global/not-global flag
1947 * for tlb entries, at the moment tlb_flush() will also flush all
1948 * tlb entries in the flush_global == false case. This is OK because
1949 * CPU architectures generally permit an implementation to drop
1950 * entries from the TLB at any time, so flushing more entries than
1951 * required is only an efficiency issue, not a correctness issue.
1953 void tlb_flush(CPUState *env, int flush_global)
1955 int i;
1957 #if defined(DEBUG_TLB)
1958 printf("tlb_flush:\n");
1959 #endif
1960 /* must reset current TB so that interrupts cannot modify the
1961 links while we are modifying them */
1962 env->current_tb = NULL;
1964 for(i = 0; i < CPU_TLB_SIZE; i++) {
1965 int mmu_idx;
1966 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1967 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1971 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1973 env->tlb_flush_addr = -1;
1974 env->tlb_flush_mask = 0;
1975 tlb_flush_count++;
1978 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1980 if (addr == (tlb_entry->addr_read &
1981 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1982 addr == (tlb_entry->addr_write &
1983 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1984 addr == (tlb_entry->addr_code &
1985 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1986 *tlb_entry = s_cputlb_empty_entry;
1990 void tlb_flush_page(CPUState *env, target_ulong addr)
1992 int i;
1993 int mmu_idx;
1995 #if defined(DEBUG_TLB)
1996 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1997 #endif
1998 /* Check if we need to flush due to large pages. */
1999 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2000 #if defined(DEBUG_TLB)
2001 printf("tlb_flush_page: forced full flush ("
2002 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2003 env->tlb_flush_addr, env->tlb_flush_mask);
2004 #endif
2005 tlb_flush(env, 1);
2006 return;
2008 /* must reset current TB so that interrupts cannot modify the
2009 links while we are modifying them */
2010 env->current_tb = NULL;
2012 addr &= TARGET_PAGE_MASK;
2013 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2014 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2015 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2017 tlb_flush_jmp_cache(env, addr);
2020 /* update the TLBs so that writes to code in the virtual page 'addr'
2021 can be detected */
2022 static void tlb_protect_code(ram_addr_t ram_addr)
2024 cpu_physical_memory_reset_dirty(ram_addr,
2025 ram_addr + TARGET_PAGE_SIZE,
2026 CODE_DIRTY_FLAG);
2029 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2030 tested for self modifying code */
2031 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2032 target_ulong vaddr)
2034 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2037 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2038 unsigned long start, unsigned long length)
2040 unsigned long addr;
2041 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2042 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2043 if ((addr - start) < length) {
2044 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2049 /* Note: start and end must be within the same ram block. */
2050 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2051 int dirty_flags)
2053 CPUState *env;
2054 unsigned long length, start1;
2055 int i;
2057 start &= TARGET_PAGE_MASK;
2058 end = TARGET_PAGE_ALIGN(end);
2060 length = end - start;
2061 if (length == 0)
2062 return;
2063 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2065 /* we modify the TLB cache so that the dirty bit will be set again
2066 when accessing the range */
2067 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2068 /* Check that we don't span multiple blocks - this breaks the
2069 address comparisons below. */
2070 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2071 != (end - 1) - start) {
2072 abort();
2075 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2076 int mmu_idx;
2077 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2078 for(i = 0; i < CPU_TLB_SIZE; i++)
2079 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2080 start1, length);
2085 int cpu_physical_memory_set_dirty_tracking(int enable)
2087 int ret = 0;
2088 in_migration = enable;
2089 return ret;
2092 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2094 ram_addr_t ram_addr;
2095 void *p;
2097 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == io_mem_ram.ram_addr) {
2098 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2099 + tlb_entry->addend);
2100 ram_addr = qemu_ram_addr_from_host_nofail(p);
2101 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2102 tlb_entry->addr_write |= TLB_NOTDIRTY;
2107 /* update the TLB according to the current state of the dirty bits */
2108 void cpu_tlb_update_dirty(CPUState *env)
2110 int i;
2111 int mmu_idx;
2112 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2113 for(i = 0; i < CPU_TLB_SIZE; i++)
2114 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2118 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2120 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2121 tlb_entry->addr_write = vaddr;
2124 /* update the TLB corresponding to virtual page vaddr
2125 so that it is no longer dirty */
2126 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2128 int i;
2129 int mmu_idx;
2131 vaddr &= TARGET_PAGE_MASK;
2132 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2133 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2134 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2137 /* Our TLB does not support large pages, so remember the area covered by
2138 large pages and trigger a full TLB flush if these are invalidated. */
2139 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2140 target_ulong size)
2142 target_ulong mask = ~(size - 1);
2144 if (env->tlb_flush_addr == (target_ulong)-1) {
2145 env->tlb_flush_addr = vaddr & mask;
2146 env->tlb_flush_mask = mask;
2147 return;
2149 /* Extend the existing region to include the new page.
2150 This is a compromise between unnecessary flushes and the cost
2151 of maintaining a full variable size TLB. */
2152 mask &= env->tlb_flush_mask;
2153 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2154 mask <<= 1;
2156 env->tlb_flush_addr &= mask;
2157 env->tlb_flush_mask = mask;
2160 static bool is_ram_rom(MemoryRegionSection *s)
2162 return memory_region_is_ram(s->mr);
2165 static bool is_romd(MemoryRegionSection *s)
2167 MemoryRegion *mr = s->mr;
2169 return mr->rom_device && mr->readable;
2172 static bool is_ram_rom_romd(MemoryRegionSection *s)
2174 return is_ram_rom(s) || is_romd(s);
2177 /* Add a new TLB entry. At most one entry for a given virtual address
2178 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2179 supplied size is only used by tlb_flush_page. */
2180 void tlb_set_page(CPUState *env, target_ulong vaddr,
2181 target_phys_addr_t paddr, int prot,
2182 int mmu_idx, target_ulong size)
2184 MemoryRegionSection section;
2185 unsigned int index;
2186 target_ulong address;
2187 target_ulong code_address;
2188 unsigned long addend;
2189 CPUTLBEntry *te;
2190 CPUWatchpoint *wp;
2191 target_phys_addr_t iotlb;
2193 assert(size >= TARGET_PAGE_SIZE);
2194 if (size != TARGET_PAGE_SIZE) {
2195 tlb_add_large_page(env, vaddr, size);
2197 section = phys_page_find(paddr >> TARGET_PAGE_BITS);
2198 #if defined(DEBUG_TLB)
2199 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2200 " prot=%x idx=%d pd=0x%08lx\n",
2201 vaddr, paddr, prot, mmu_idx, pd);
2202 #endif
2204 address = vaddr;
2205 if (!is_ram_rom_romd(&section)) {
2206 /* IO memory case (romd handled later) */
2207 address |= TLB_MMIO;
2209 if (is_ram_rom_romd(&section)) {
2210 addend = (unsigned long)(memory_region_get_ram_ptr(section.mr)
2211 + section.offset_within_region);
2212 } else {
2213 addend = 0;
2215 if (is_ram_rom(&section)) {
2216 /* Normal RAM. */
2217 iotlb = (memory_region_get_ram_addr(section.mr)
2218 + section.offset_within_region) & TARGET_PAGE_MASK;
2219 if (!section.readonly)
2220 iotlb |= io_mem_notdirty.ram_addr;
2221 else
2222 iotlb |= io_mem_rom.ram_addr;
2223 } else {
2224 /* IO handlers are currently passed a physical address.
2225 It would be nice to pass an offset from the base address
2226 of that region. This would avoid having to special case RAM,
2227 and avoid full address decoding in every device.
2228 We can't use the high bits of pd for this because
2229 IO_MEM_ROMD uses these as a ram address. */
2230 iotlb = memory_region_get_ram_addr(section.mr) & ~TARGET_PAGE_MASK;
2231 iotlb += section.offset_within_region;
2234 code_address = address;
2235 /* Make accesses to pages with watchpoints go via the
2236 watchpoint trap routines. */
2237 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2238 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2239 /* Avoid trapping reads of pages with a write breakpoint. */
2240 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2241 iotlb = io_mem_watch.ram_addr + paddr;
2242 address |= TLB_MMIO;
2243 break;
2248 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2249 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2250 te = &env->tlb_table[mmu_idx][index];
2251 te->addend = addend - vaddr;
2252 if (prot & PAGE_READ) {
2253 te->addr_read = address;
2254 } else {
2255 te->addr_read = -1;
2258 if (prot & PAGE_EXEC) {
2259 te->addr_code = code_address;
2260 } else {
2261 te->addr_code = -1;
2263 if (prot & PAGE_WRITE) {
2264 if ((memory_region_is_ram(section.mr) && section.readonly)
2265 || is_romd(&section)) {
2266 /* Write access calls the I/O callback. */
2267 te->addr_write = address | TLB_MMIO;
2268 } else if (memory_region_is_ram(section.mr)
2269 && !cpu_physical_memory_is_dirty(
2270 section.mr->ram_addr
2271 + section.offset_within_region)) {
2272 te->addr_write = address | TLB_NOTDIRTY;
2273 } else {
2274 te->addr_write = address;
2276 } else {
2277 te->addr_write = -1;
2281 #else
2283 void tlb_flush(CPUState *env, int flush_global)
2287 void tlb_flush_page(CPUState *env, target_ulong addr)
2292 * Walks guest process memory "regions" one by one
2293 * and calls callback function 'fn' for each region.
2296 struct walk_memory_regions_data
2298 walk_memory_regions_fn fn;
2299 void *priv;
2300 unsigned long start;
2301 int prot;
2304 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2305 abi_ulong end, int new_prot)
2307 if (data->start != -1ul) {
2308 int rc = data->fn(data->priv, data->start, end, data->prot);
2309 if (rc != 0) {
2310 return rc;
2314 data->start = (new_prot ? end : -1ul);
2315 data->prot = new_prot;
2317 return 0;
2320 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2321 abi_ulong base, int level, void **lp)
2323 abi_ulong pa;
2324 int i, rc;
2326 if (*lp == NULL) {
2327 return walk_memory_regions_end(data, base, 0);
2330 if (level == 0) {
2331 PageDesc *pd = *lp;
2332 for (i = 0; i < L2_SIZE; ++i) {
2333 int prot = pd[i].flags;
2335 pa = base | (i << TARGET_PAGE_BITS);
2336 if (prot != data->prot) {
2337 rc = walk_memory_regions_end(data, pa, prot);
2338 if (rc != 0) {
2339 return rc;
2343 } else {
2344 void **pp = *lp;
2345 for (i = 0; i < L2_SIZE; ++i) {
2346 pa = base | ((abi_ulong)i <<
2347 (TARGET_PAGE_BITS + L2_BITS * level));
2348 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2349 if (rc != 0) {
2350 return rc;
2355 return 0;
2358 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2360 struct walk_memory_regions_data data;
2361 unsigned long i;
2363 data.fn = fn;
2364 data.priv = priv;
2365 data.start = -1ul;
2366 data.prot = 0;
2368 for (i = 0; i < V_L1_SIZE; i++) {
2369 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2370 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2371 if (rc != 0) {
2372 return rc;
2376 return walk_memory_regions_end(&data, 0, 0);
2379 static int dump_region(void *priv, abi_ulong start,
2380 abi_ulong end, unsigned long prot)
2382 FILE *f = (FILE *)priv;
2384 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2385 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2386 start, end, end - start,
2387 ((prot & PAGE_READ) ? 'r' : '-'),
2388 ((prot & PAGE_WRITE) ? 'w' : '-'),
2389 ((prot & PAGE_EXEC) ? 'x' : '-'));
2391 return (0);
2394 /* dump memory mappings */
2395 void page_dump(FILE *f)
2397 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2398 "start", "end", "size", "prot");
2399 walk_memory_regions(f, dump_region);
2402 int page_get_flags(target_ulong address)
2404 PageDesc *p;
2406 p = page_find(address >> TARGET_PAGE_BITS);
2407 if (!p)
2408 return 0;
2409 return p->flags;
2412 /* Modify the flags of a page and invalidate the code if necessary.
2413 The flag PAGE_WRITE_ORG is positioned automatically depending
2414 on PAGE_WRITE. The mmap_lock should already be held. */
2415 void page_set_flags(target_ulong start, target_ulong end, int flags)
2417 target_ulong addr, len;
2419 /* This function should never be called with addresses outside the
2420 guest address space. If this assert fires, it probably indicates
2421 a missing call to h2g_valid. */
2422 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2423 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2424 #endif
2425 assert(start < end);
2427 start = start & TARGET_PAGE_MASK;
2428 end = TARGET_PAGE_ALIGN(end);
2430 if (flags & PAGE_WRITE) {
2431 flags |= PAGE_WRITE_ORG;
2434 for (addr = start, len = end - start;
2435 len != 0;
2436 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2437 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2439 /* If the write protection bit is set, then we invalidate
2440 the code inside. */
2441 if (!(p->flags & PAGE_WRITE) &&
2442 (flags & PAGE_WRITE) &&
2443 p->first_tb) {
2444 tb_invalidate_phys_page(addr, 0, NULL);
2446 p->flags = flags;
2450 int page_check_range(target_ulong start, target_ulong len, int flags)
2452 PageDesc *p;
2453 target_ulong end;
2454 target_ulong addr;
2456 /* This function should never be called with addresses outside the
2457 guest address space. If this assert fires, it probably indicates
2458 a missing call to h2g_valid. */
2459 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2460 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2461 #endif
2463 if (len == 0) {
2464 return 0;
2466 if (start + len - 1 < start) {
2467 /* We've wrapped around. */
2468 return -1;
2471 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2472 start = start & TARGET_PAGE_MASK;
2474 for (addr = start, len = end - start;
2475 len != 0;
2476 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2477 p = page_find(addr >> TARGET_PAGE_BITS);
2478 if( !p )
2479 return -1;
2480 if( !(p->flags & PAGE_VALID) )
2481 return -1;
2483 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2484 return -1;
2485 if (flags & PAGE_WRITE) {
2486 if (!(p->flags & PAGE_WRITE_ORG))
2487 return -1;
2488 /* unprotect the page if it was put read-only because it
2489 contains translated code */
2490 if (!(p->flags & PAGE_WRITE)) {
2491 if (!page_unprotect(addr, 0, NULL))
2492 return -1;
2494 return 0;
2497 return 0;
2500 /* called from signal handler: invalidate the code and unprotect the
2501 page. Return TRUE if the fault was successfully handled. */
2502 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2504 unsigned int prot;
2505 PageDesc *p;
2506 target_ulong host_start, host_end, addr;
2508 /* Technically this isn't safe inside a signal handler. However we
2509 know this only ever happens in a synchronous SEGV handler, so in
2510 practice it seems to be ok. */
2511 mmap_lock();
2513 p = page_find(address >> TARGET_PAGE_BITS);
2514 if (!p) {
2515 mmap_unlock();
2516 return 0;
2519 /* if the page was really writable, then we change its
2520 protection back to writable */
2521 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2522 host_start = address & qemu_host_page_mask;
2523 host_end = host_start + qemu_host_page_size;
2525 prot = 0;
2526 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2527 p = page_find(addr >> TARGET_PAGE_BITS);
2528 p->flags |= PAGE_WRITE;
2529 prot |= p->flags;
2531 /* and since the content will be modified, we must invalidate
2532 the corresponding translated code. */
2533 tb_invalidate_phys_page(addr, pc, puc);
2534 #ifdef DEBUG_TB_CHECK
2535 tb_invalidate_check(addr);
2536 #endif
2538 mprotect((void *)g2h(host_start), qemu_host_page_size,
2539 prot & PAGE_BITS);
2541 mmap_unlock();
2542 return 1;
2544 mmap_unlock();
2545 return 0;
2548 static inline void tlb_set_dirty(CPUState *env,
2549 unsigned long addr, target_ulong vaddr)
2552 #endif /* defined(CONFIG_USER_ONLY) */
2554 #if !defined(CONFIG_USER_ONLY)
2556 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2557 typedef struct subpage_t {
2558 MemoryRegion iomem;
2559 target_phys_addr_t base;
2560 uint16_t sub_section[TARGET_PAGE_SIZE];
2561 } subpage_t;
2563 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2564 uint16_t section);
2565 static subpage_t *subpage_init(target_phys_addr_t base);
2566 static void destroy_page_desc(uint16_t section_index)
2568 MemoryRegionSection *section = &phys_sections[section_index];
2569 MemoryRegion *mr = section->mr;
2571 if (mr->subpage) {
2572 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2573 memory_region_destroy(&subpage->iomem);
2574 g_free(subpage);
2578 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2580 unsigned i;
2581 PhysPageEntry *p;
2583 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2584 return;
2587 p = phys_map_nodes[lp->ptr];
2588 for (i = 0; i < L2_SIZE; ++i) {
2589 if (!p[i].is_leaf) {
2590 destroy_l2_mapping(&p[i], level - 1);
2591 } else {
2592 destroy_page_desc(p[i].ptr);
2595 lp->is_leaf = 0;
2596 lp->ptr = PHYS_MAP_NODE_NIL;
2599 static void destroy_all_mappings(void)
2601 destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
2602 phys_map_nodes_reset();
2605 static uint16_t phys_section_add(MemoryRegionSection *section)
2607 if (phys_sections_nb == phys_sections_nb_alloc) {
2608 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2609 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2610 phys_sections_nb_alloc);
2612 phys_sections[phys_sections_nb] = *section;
2613 return phys_sections_nb++;
2616 static void phys_sections_clear(void)
2618 phys_sections_nb = 0;
2621 /* register physical memory.
2622 For RAM, 'size' must be a multiple of the target page size.
2623 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2624 io memory page. The address used when calling the IO function is
2625 the offset from the start of the region, plus region_offset. Both
2626 start_addr and region_offset are rounded down to a page boundary
2627 before calculating this offset. This should not be a problem unless
2628 the low bits of start_addr and region_offset differ. */
2629 static void register_subpage(MemoryRegionSection *section)
2631 subpage_t *subpage;
2632 target_phys_addr_t base = section->offset_within_address_space
2633 & TARGET_PAGE_MASK;
2634 MemoryRegionSection existing = phys_page_find(base >> TARGET_PAGE_BITS);
2635 MemoryRegionSection subsection = {
2636 .offset_within_address_space = base,
2637 .size = TARGET_PAGE_SIZE,
2639 target_phys_addr_t start, end;
2641 assert(existing.mr->subpage || existing.mr == &io_mem_unassigned);
2643 if (!(existing.mr->subpage)) {
2644 subpage = subpage_init(base);
2645 subsection.mr = &subpage->iomem;
2646 phys_page_set(base >> TARGET_PAGE_BITS, 1,
2647 phys_section_add(&subsection));
2648 } else {
2649 subpage = container_of(existing.mr, subpage_t, iomem);
2651 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2652 end = start + section->size;
2653 subpage_register(subpage, start, end, phys_section_add(section));
2657 static void register_multipage(MemoryRegionSection *section)
2659 target_phys_addr_t start_addr = section->offset_within_address_space;
2660 ram_addr_t size = section->size;
2661 target_phys_addr_t addr;
2662 uint16_t section_index = phys_section_add(section);
2664 assert(size);
2666 addr = start_addr;
2667 phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2668 section_index);
2671 void cpu_register_physical_memory_log(MemoryRegionSection *section,
2672 bool readonly)
2674 MemoryRegionSection now = *section, remain = *section;
2676 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2677 || (now.size < TARGET_PAGE_SIZE)) {
2678 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2679 - now.offset_within_address_space,
2680 now.size);
2681 register_subpage(&now);
2682 remain.size -= now.size;
2683 remain.offset_within_address_space += now.size;
2684 remain.offset_within_region += now.size;
2686 now = remain;
2687 now.size &= TARGET_PAGE_MASK;
2688 if (now.size) {
2689 register_multipage(&now);
2690 remain.size -= now.size;
2691 remain.offset_within_address_space += now.size;
2692 remain.offset_within_region += now.size;
2694 now = remain;
2695 if (now.size) {
2696 register_subpage(&now);
2701 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2703 if (kvm_enabled())
2704 kvm_coalesce_mmio_region(addr, size);
2707 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2709 if (kvm_enabled())
2710 kvm_uncoalesce_mmio_region(addr, size);
2713 void qemu_flush_coalesced_mmio_buffer(void)
2715 if (kvm_enabled())
2716 kvm_flush_coalesced_mmio_buffer();
2719 #if defined(__linux__) && !defined(TARGET_S390X)
2721 #include <sys/vfs.h>
2723 #define HUGETLBFS_MAGIC 0x958458f6
2725 static long gethugepagesize(const char *path)
2727 struct statfs fs;
2728 int ret;
2730 do {
2731 ret = statfs(path, &fs);
2732 } while (ret != 0 && errno == EINTR);
2734 if (ret != 0) {
2735 perror(path);
2736 return 0;
2739 if (fs.f_type != HUGETLBFS_MAGIC)
2740 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2742 return fs.f_bsize;
2745 static void *file_ram_alloc(RAMBlock *block,
2746 ram_addr_t memory,
2747 const char *path)
2749 char *filename;
2750 void *area;
2751 int fd;
2752 #ifdef MAP_POPULATE
2753 int flags;
2754 #endif
2755 unsigned long hpagesize;
2757 hpagesize = gethugepagesize(path);
2758 if (!hpagesize) {
2759 return NULL;
2762 if (memory < hpagesize) {
2763 return NULL;
2766 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2767 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2768 return NULL;
2771 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2772 return NULL;
2775 fd = mkstemp(filename);
2776 if (fd < 0) {
2777 perror("unable to create backing store for hugepages");
2778 free(filename);
2779 return NULL;
2781 unlink(filename);
2782 free(filename);
2784 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2787 * ftruncate is not supported by hugetlbfs in older
2788 * hosts, so don't bother bailing out on errors.
2789 * If anything goes wrong with it under other filesystems,
2790 * mmap will fail.
2792 if (ftruncate(fd, memory))
2793 perror("ftruncate");
2795 #ifdef MAP_POPULATE
2796 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2797 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2798 * to sidestep this quirk.
2800 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2801 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2802 #else
2803 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2804 #endif
2805 if (area == MAP_FAILED) {
2806 perror("file_ram_alloc: can't mmap RAM pages");
2807 close(fd);
2808 return (NULL);
2810 block->fd = fd;
2811 return area;
2813 #endif
2815 static ram_addr_t find_ram_offset(ram_addr_t size)
2817 RAMBlock *block, *next_block;
2818 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2820 if (QLIST_EMPTY(&ram_list.blocks))
2821 return 0;
2823 QLIST_FOREACH(block, &ram_list.blocks, next) {
2824 ram_addr_t end, next = RAM_ADDR_MAX;
2826 end = block->offset + block->length;
2828 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2829 if (next_block->offset >= end) {
2830 next = MIN(next, next_block->offset);
2833 if (next - end >= size && next - end < mingap) {
2834 offset = end;
2835 mingap = next - end;
2839 if (offset == RAM_ADDR_MAX) {
2840 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2841 (uint64_t)size);
2842 abort();
2845 return offset;
2848 static ram_addr_t last_ram_offset(void)
2850 RAMBlock *block;
2851 ram_addr_t last = 0;
2853 QLIST_FOREACH(block, &ram_list.blocks, next)
2854 last = MAX(last, block->offset + block->length);
2856 return last;
2859 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2861 RAMBlock *new_block, *block;
2863 new_block = NULL;
2864 QLIST_FOREACH(block, &ram_list.blocks, next) {
2865 if (block->offset == addr) {
2866 new_block = block;
2867 break;
2870 assert(new_block);
2871 assert(!new_block->idstr[0]);
2873 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2874 char *id = dev->parent_bus->info->get_dev_path(dev);
2875 if (id) {
2876 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2877 g_free(id);
2880 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2882 QLIST_FOREACH(block, &ram_list.blocks, next) {
2883 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2884 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2885 new_block->idstr);
2886 abort();
2891 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2892 MemoryRegion *mr)
2894 RAMBlock *new_block;
2896 size = TARGET_PAGE_ALIGN(size);
2897 new_block = g_malloc0(sizeof(*new_block));
2899 new_block->mr = mr;
2900 new_block->offset = find_ram_offset(size);
2901 if (host) {
2902 new_block->host = host;
2903 new_block->flags |= RAM_PREALLOC_MASK;
2904 } else {
2905 if (mem_path) {
2906 #if defined (__linux__) && !defined(TARGET_S390X)
2907 new_block->host = file_ram_alloc(new_block, size, mem_path);
2908 if (!new_block->host) {
2909 new_block->host = qemu_vmalloc(size);
2910 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2912 #else
2913 fprintf(stderr, "-mem-path option unsupported\n");
2914 exit(1);
2915 #endif
2916 } else {
2917 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2918 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2919 an system defined value, which is at least 256GB. Larger systems
2920 have larger values. We put the guest between the end of data
2921 segment (system break) and this value. We use 32GB as a base to
2922 have enough room for the system break to grow. */
2923 new_block->host = mmap((void*)0x800000000, size,
2924 PROT_EXEC|PROT_READ|PROT_WRITE,
2925 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2926 if (new_block->host == MAP_FAILED) {
2927 fprintf(stderr, "Allocating RAM failed\n");
2928 abort();
2930 #else
2931 if (xen_enabled()) {
2932 xen_ram_alloc(new_block->offset, size, mr);
2933 } else {
2934 new_block->host = qemu_vmalloc(size);
2936 #endif
2937 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2940 new_block->length = size;
2942 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2944 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2945 last_ram_offset() >> TARGET_PAGE_BITS);
2946 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2947 0xff, size >> TARGET_PAGE_BITS);
2949 if (kvm_enabled())
2950 kvm_setup_guest_memory(new_block->host, size);
2952 return new_block->offset;
2955 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2957 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2960 void qemu_ram_free_from_ptr(ram_addr_t addr)
2962 RAMBlock *block;
2964 QLIST_FOREACH(block, &ram_list.blocks, next) {
2965 if (addr == block->offset) {
2966 QLIST_REMOVE(block, next);
2967 g_free(block);
2968 return;
2973 void qemu_ram_free(ram_addr_t addr)
2975 RAMBlock *block;
2977 QLIST_FOREACH(block, &ram_list.blocks, next) {
2978 if (addr == block->offset) {
2979 QLIST_REMOVE(block, next);
2980 if (block->flags & RAM_PREALLOC_MASK) {
2982 } else if (mem_path) {
2983 #if defined (__linux__) && !defined(TARGET_S390X)
2984 if (block->fd) {
2985 munmap(block->host, block->length);
2986 close(block->fd);
2987 } else {
2988 qemu_vfree(block->host);
2990 #else
2991 abort();
2992 #endif
2993 } else {
2994 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2995 munmap(block->host, block->length);
2996 #else
2997 if (xen_enabled()) {
2998 xen_invalidate_map_cache_entry(block->host);
2999 } else {
3000 qemu_vfree(block->host);
3002 #endif
3004 g_free(block);
3005 return;
3011 #ifndef _WIN32
3012 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3014 RAMBlock *block;
3015 ram_addr_t offset;
3016 int flags;
3017 void *area, *vaddr;
3019 QLIST_FOREACH(block, &ram_list.blocks, next) {
3020 offset = addr - block->offset;
3021 if (offset < block->length) {
3022 vaddr = block->host + offset;
3023 if (block->flags & RAM_PREALLOC_MASK) {
3025 } else {
3026 flags = MAP_FIXED;
3027 munmap(vaddr, length);
3028 if (mem_path) {
3029 #if defined(__linux__) && !defined(TARGET_S390X)
3030 if (block->fd) {
3031 #ifdef MAP_POPULATE
3032 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3033 MAP_PRIVATE;
3034 #else
3035 flags |= MAP_PRIVATE;
3036 #endif
3037 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3038 flags, block->fd, offset);
3039 } else {
3040 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3041 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3042 flags, -1, 0);
3044 #else
3045 abort();
3046 #endif
3047 } else {
3048 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3049 flags |= MAP_SHARED | MAP_ANONYMOUS;
3050 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3051 flags, -1, 0);
3052 #else
3053 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3054 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3055 flags, -1, 0);
3056 #endif
3058 if (area != vaddr) {
3059 fprintf(stderr, "Could not remap addr: "
3060 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3061 length, addr);
3062 exit(1);
3064 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3066 return;
3070 #endif /* !_WIN32 */
3072 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3073 With the exception of the softmmu code in this file, this should
3074 only be used for local memory (e.g. video ram) that the device owns,
3075 and knows it isn't going to access beyond the end of the block.
3077 It should not be used for general purpose DMA.
3078 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3080 void *qemu_get_ram_ptr(ram_addr_t addr)
3082 RAMBlock *block;
3084 QLIST_FOREACH(block, &ram_list.blocks, next) {
3085 if (addr - block->offset < block->length) {
3086 /* Move this entry to to start of the list. */
3087 if (block != QLIST_FIRST(&ram_list.blocks)) {
3088 QLIST_REMOVE(block, next);
3089 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3091 if (xen_enabled()) {
3092 /* We need to check if the requested address is in the RAM
3093 * because we don't want to map the entire memory in QEMU.
3094 * In that case just map until the end of the page.
3096 if (block->offset == 0) {
3097 return xen_map_cache(addr, 0, 0);
3098 } else if (block->host == NULL) {
3099 block->host =
3100 xen_map_cache(block->offset, block->length, 1);
3103 return block->host + (addr - block->offset);
3107 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3108 abort();
3110 return NULL;
3113 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3114 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3116 void *qemu_safe_ram_ptr(ram_addr_t addr)
3118 RAMBlock *block;
3120 QLIST_FOREACH(block, &ram_list.blocks, next) {
3121 if (addr - block->offset < block->length) {
3122 if (xen_enabled()) {
3123 /* We need to check if the requested address is in the RAM
3124 * because we don't want to map the entire memory in QEMU.
3125 * In that case just map until the end of the page.
3127 if (block->offset == 0) {
3128 return xen_map_cache(addr, 0, 0);
3129 } else if (block->host == NULL) {
3130 block->host =
3131 xen_map_cache(block->offset, block->length, 1);
3134 return block->host + (addr - block->offset);
3138 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3139 abort();
3141 return NULL;
3144 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3145 * but takes a size argument */
3146 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3148 if (*size == 0) {
3149 return NULL;
3151 if (xen_enabled()) {
3152 return xen_map_cache(addr, *size, 1);
3153 } else {
3154 RAMBlock *block;
3156 QLIST_FOREACH(block, &ram_list.blocks, next) {
3157 if (addr - block->offset < block->length) {
3158 if (addr - block->offset + *size > block->length)
3159 *size = block->length - addr + block->offset;
3160 return block->host + (addr - block->offset);
3164 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3165 abort();
3169 void qemu_put_ram_ptr(void *addr)
3171 trace_qemu_put_ram_ptr(addr);
3174 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3176 RAMBlock *block;
3177 uint8_t *host = ptr;
3179 if (xen_enabled()) {
3180 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3181 return 0;
3184 QLIST_FOREACH(block, &ram_list.blocks, next) {
3185 /* This case append when the block is not mapped. */
3186 if (block->host == NULL) {
3187 continue;
3189 if (host - block->host < block->length) {
3190 *ram_addr = block->offset + (host - block->host);
3191 return 0;
3195 return -1;
3198 /* Some of the softmmu routines need to translate from a host pointer
3199 (typically a TLB entry) back to a ram offset. */
3200 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3202 ram_addr_t ram_addr;
3204 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3205 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3206 abort();
3208 return ram_addr;
3211 static uint64_t unassigned_mem_read(void *opaque, target_phys_addr_t addr,
3212 unsigned size)
3214 #ifdef DEBUG_UNASSIGNED
3215 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3216 #endif
3217 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3218 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
3219 #endif
3220 return 0;
3223 static void unassigned_mem_write(void *opaque, target_phys_addr_t addr,
3224 uint64_t val, unsigned size)
3226 #ifdef DEBUG_UNASSIGNED
3227 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
3228 #endif
3229 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3230 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
3231 #endif
3234 static const MemoryRegionOps unassigned_mem_ops = {
3235 .read = unassigned_mem_read,
3236 .write = unassigned_mem_write,
3237 .endianness = DEVICE_NATIVE_ENDIAN,
3240 static uint64_t error_mem_read(void *opaque, target_phys_addr_t addr,
3241 unsigned size)
3243 abort();
3246 static void error_mem_write(void *opaque, target_phys_addr_t addr,
3247 uint64_t value, unsigned size)
3249 abort();
3252 static const MemoryRegionOps error_mem_ops = {
3253 .read = error_mem_read,
3254 .write = error_mem_write,
3255 .endianness = DEVICE_NATIVE_ENDIAN,
3258 static const MemoryRegionOps rom_mem_ops = {
3259 .read = error_mem_read,
3260 .write = unassigned_mem_write,
3261 .endianness = DEVICE_NATIVE_ENDIAN,
3264 static void notdirty_mem_write(void *opaque, target_phys_addr_t ram_addr,
3265 uint64_t val, unsigned size)
3267 int dirty_flags;
3268 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3269 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3270 #if !defined(CONFIG_USER_ONLY)
3271 tb_invalidate_phys_page_fast(ram_addr, size);
3272 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3273 #endif
3275 switch (size) {
3276 case 1:
3277 stb_p(qemu_get_ram_ptr(ram_addr), val);
3278 break;
3279 case 2:
3280 stw_p(qemu_get_ram_ptr(ram_addr), val);
3281 break;
3282 case 4:
3283 stl_p(qemu_get_ram_ptr(ram_addr), val);
3284 break;
3285 default:
3286 abort();
3288 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3289 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3290 /* we remove the notdirty callback only if the code has been
3291 flushed */
3292 if (dirty_flags == 0xff)
3293 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3296 static const MemoryRegionOps notdirty_mem_ops = {
3297 .read = error_mem_read,
3298 .write = notdirty_mem_write,
3299 .endianness = DEVICE_NATIVE_ENDIAN,
3302 /* Generate a debug exception if a watchpoint has been hit. */
3303 static void check_watchpoint(int offset, int len_mask, int flags)
3305 CPUState *env = cpu_single_env;
3306 target_ulong pc, cs_base;
3307 TranslationBlock *tb;
3308 target_ulong vaddr;
3309 CPUWatchpoint *wp;
3310 int cpu_flags;
3312 if (env->watchpoint_hit) {
3313 /* We re-entered the check after replacing the TB. Now raise
3314 * the debug interrupt so that is will trigger after the
3315 * current instruction. */
3316 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3317 return;
3319 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3320 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3321 if ((vaddr == (wp->vaddr & len_mask) ||
3322 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3323 wp->flags |= BP_WATCHPOINT_HIT;
3324 if (!env->watchpoint_hit) {
3325 env->watchpoint_hit = wp;
3326 tb = tb_find_pc(env->mem_io_pc);
3327 if (!tb) {
3328 cpu_abort(env, "check_watchpoint: could not find TB for "
3329 "pc=%p", (void *)env->mem_io_pc);
3331 cpu_restore_state(tb, env, env->mem_io_pc);
3332 tb_phys_invalidate(tb, -1);
3333 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3334 env->exception_index = EXCP_DEBUG;
3335 cpu_loop_exit(env);
3336 } else {
3337 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3338 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3339 cpu_resume_from_signal(env, NULL);
3342 } else {
3343 wp->flags &= ~BP_WATCHPOINT_HIT;
3348 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3349 so these check for a hit then pass through to the normal out-of-line
3350 phys routines. */
3351 static uint64_t watch_mem_read(void *opaque, target_phys_addr_t addr,
3352 unsigned size)
3354 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3355 switch (size) {
3356 case 1: return ldub_phys(addr);
3357 case 2: return lduw_phys(addr);
3358 case 4: return ldl_phys(addr);
3359 default: abort();
3363 static void watch_mem_write(void *opaque, target_phys_addr_t addr,
3364 uint64_t val, unsigned size)
3366 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3367 switch (size) {
3368 case 1:
3369 stb_phys(addr, val);
3370 break;
3371 case 2:
3372 stw_phys(addr, val);
3373 break;
3374 case 4:
3375 stl_phys(addr, val);
3376 break;
3377 default: abort();
3381 static const MemoryRegionOps watch_mem_ops = {
3382 .read = watch_mem_read,
3383 .write = watch_mem_write,
3384 .endianness = DEVICE_NATIVE_ENDIAN,
3387 static uint64_t subpage_read(void *opaque, target_phys_addr_t addr,
3388 unsigned len)
3390 subpage_t *mmio = opaque;
3391 unsigned int idx = SUBPAGE_IDX(addr);
3392 MemoryRegionSection *section;
3393 #if defined(DEBUG_SUBPAGE)
3394 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3395 mmio, len, addr, idx);
3396 #endif
3398 section = &phys_sections[mmio->sub_section[idx]];
3399 addr += mmio->base;
3400 addr -= section->offset_within_address_space;
3401 addr += section->offset_within_region;
3402 return io_mem_read(section->mr->ram_addr, addr, len);
3405 static void subpage_write(void *opaque, target_phys_addr_t addr,
3406 uint64_t value, unsigned len)
3408 subpage_t *mmio = opaque;
3409 unsigned int idx = SUBPAGE_IDX(addr);
3410 MemoryRegionSection *section;
3411 #if defined(DEBUG_SUBPAGE)
3412 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3413 " idx %d value %"PRIx64"\n",
3414 __func__, mmio, len, addr, idx, value);
3415 #endif
3417 section = &phys_sections[mmio->sub_section[idx]];
3418 addr += mmio->base;
3419 addr -= section->offset_within_address_space;
3420 addr += section->offset_within_region;
3421 io_mem_write(section->mr->ram_addr, addr, value, len);
3424 static const MemoryRegionOps subpage_ops = {
3425 .read = subpage_read,
3426 .write = subpage_write,
3427 .endianness = DEVICE_NATIVE_ENDIAN,
3430 static uint64_t subpage_ram_read(void *opaque, target_phys_addr_t addr,
3431 unsigned size)
3433 ram_addr_t raddr = addr;
3434 void *ptr = qemu_get_ram_ptr(raddr);
3435 switch (size) {
3436 case 1: return ldub_p(ptr);
3437 case 2: return lduw_p(ptr);
3438 case 4: return ldl_p(ptr);
3439 default: abort();
3443 static void subpage_ram_write(void *opaque, target_phys_addr_t addr,
3444 uint64_t value, unsigned size)
3446 ram_addr_t raddr = addr;
3447 void *ptr = qemu_get_ram_ptr(raddr);
3448 switch (size) {
3449 case 1: return stb_p(ptr, value);
3450 case 2: return stw_p(ptr, value);
3451 case 4: return stl_p(ptr, value);
3452 default: abort();
3456 static const MemoryRegionOps subpage_ram_ops = {
3457 .read = subpage_ram_read,
3458 .write = subpage_ram_write,
3459 .endianness = DEVICE_NATIVE_ENDIAN,
3462 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3463 uint16_t section)
3465 int idx, eidx;
3467 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3468 return -1;
3469 idx = SUBPAGE_IDX(start);
3470 eidx = SUBPAGE_IDX(end);
3471 #if defined(DEBUG_SUBPAGE)
3472 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3473 mmio, start, end, idx, eidx, memory);
3474 #endif
3475 if (memory_region_is_ram(phys_sections[section].mr)) {
3476 MemoryRegionSection new_section = phys_sections[section];
3477 new_section.mr = &io_mem_subpage_ram;
3478 section = phys_section_add(&new_section);
3480 for (; idx <= eidx; idx++) {
3481 mmio->sub_section[idx] = section;
3484 return 0;
3487 static subpage_t *subpage_init(target_phys_addr_t base)
3489 subpage_t *mmio;
3491 mmio = g_malloc0(sizeof(subpage_t));
3493 mmio->base = base;
3494 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3495 "subpage", TARGET_PAGE_SIZE);
3496 mmio->iomem.subpage = true;
3497 #if defined(DEBUG_SUBPAGE)
3498 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3499 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3500 #endif
3501 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3503 return mmio;
3506 static int get_free_io_mem_idx(void)
3508 int i;
3510 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3511 if (!io_mem_used[i]) {
3512 io_mem_used[i] = 1;
3513 return i;
3515 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3516 return -1;
3519 /* mem_read and mem_write are arrays of functions containing the
3520 function to access byte (index 0), word (index 1) and dword (index
3521 2). Functions can be omitted with a NULL function pointer.
3522 If io_index is non zero, the corresponding io zone is
3523 modified. If it is zero, a new io zone is allocated. The return
3524 value can be used with cpu_register_physical_memory(). (-1) is
3525 returned if error. */
3526 static int cpu_register_io_memory_fixed(int io_index, MemoryRegion *mr)
3528 if (io_index <= 0) {
3529 io_index = get_free_io_mem_idx();
3530 if (io_index == -1)
3531 return io_index;
3532 } else {
3533 if (io_index >= IO_MEM_NB_ENTRIES)
3534 return -1;
3537 io_mem_region[io_index] = mr;
3539 return io_index;
3542 int cpu_register_io_memory(MemoryRegion *mr)
3544 return cpu_register_io_memory_fixed(0, mr);
3547 void cpu_unregister_io_memory(int io_index)
3549 io_mem_region[io_index] = NULL;
3550 io_mem_used[io_index] = 0;
3553 static uint16_t dummy_section(MemoryRegion *mr)
3555 MemoryRegionSection section = {
3556 .mr = mr,
3557 .offset_within_address_space = 0,
3558 .offset_within_region = 0,
3559 .size = UINT64_MAX,
3562 return phys_section_add(&section);
3565 static void io_mem_init(void)
3567 int i;
3569 /* Must be first: */
3570 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3571 assert(io_mem_ram.ram_addr == 0);
3572 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3573 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3574 "unassigned", UINT64_MAX);
3575 memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
3576 "notdirty", UINT64_MAX);
3577 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3578 "subpage-ram", UINT64_MAX);
3579 for (i=0; i<5; i++)
3580 io_mem_used[i] = 1;
3582 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3583 "watch", UINT64_MAX);
3586 static void core_begin(MemoryListener *listener)
3588 destroy_all_mappings();
3589 phys_sections_clear();
3590 phys_map.ptr = PHYS_MAP_NODE_NIL;
3591 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3594 static void core_commit(MemoryListener *listener)
3596 CPUState *env;
3598 /* since each CPU stores ram addresses in its TLB cache, we must
3599 reset the modified entries */
3600 /* XXX: slow ! */
3601 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3602 tlb_flush(env, 1);
3606 static void core_region_add(MemoryListener *listener,
3607 MemoryRegionSection *section)
3609 cpu_register_physical_memory_log(section, section->readonly);
3612 static void core_region_del(MemoryListener *listener,
3613 MemoryRegionSection *section)
3617 static void core_region_nop(MemoryListener *listener,
3618 MemoryRegionSection *section)
3620 cpu_register_physical_memory_log(section, section->readonly);
3623 static void core_log_start(MemoryListener *listener,
3624 MemoryRegionSection *section)
3628 static void core_log_stop(MemoryListener *listener,
3629 MemoryRegionSection *section)
3633 static void core_log_sync(MemoryListener *listener,
3634 MemoryRegionSection *section)
3638 static void core_log_global_start(MemoryListener *listener)
3640 cpu_physical_memory_set_dirty_tracking(1);
3643 static void core_log_global_stop(MemoryListener *listener)
3645 cpu_physical_memory_set_dirty_tracking(0);
3648 static void core_eventfd_add(MemoryListener *listener,
3649 MemoryRegionSection *section,
3650 bool match_data, uint64_t data, int fd)
3654 static void core_eventfd_del(MemoryListener *listener,
3655 MemoryRegionSection *section,
3656 bool match_data, uint64_t data, int fd)
3660 static void io_begin(MemoryListener *listener)
3664 static void io_commit(MemoryListener *listener)
3668 static void io_region_add(MemoryListener *listener,
3669 MemoryRegionSection *section)
3671 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3673 mrio->mr = section->mr;
3674 mrio->offset = section->offset_within_region;
3675 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3676 section->offset_within_address_space, section->size);
3677 ioport_register(&mrio->iorange);
3680 static void io_region_del(MemoryListener *listener,
3681 MemoryRegionSection *section)
3683 isa_unassign_ioport(section->offset_within_address_space, section->size);
3686 static void io_region_nop(MemoryListener *listener,
3687 MemoryRegionSection *section)
3691 static void io_log_start(MemoryListener *listener,
3692 MemoryRegionSection *section)
3696 static void io_log_stop(MemoryListener *listener,
3697 MemoryRegionSection *section)
3701 static void io_log_sync(MemoryListener *listener,
3702 MemoryRegionSection *section)
3706 static void io_log_global_start(MemoryListener *listener)
3710 static void io_log_global_stop(MemoryListener *listener)
3714 static void io_eventfd_add(MemoryListener *listener,
3715 MemoryRegionSection *section,
3716 bool match_data, uint64_t data, int fd)
3720 static void io_eventfd_del(MemoryListener *listener,
3721 MemoryRegionSection *section,
3722 bool match_data, uint64_t data, int fd)
3726 static MemoryListener core_memory_listener = {
3727 .begin = core_begin,
3728 .commit = core_commit,
3729 .region_add = core_region_add,
3730 .region_del = core_region_del,
3731 .region_nop = core_region_nop,
3732 .log_start = core_log_start,
3733 .log_stop = core_log_stop,
3734 .log_sync = core_log_sync,
3735 .log_global_start = core_log_global_start,
3736 .log_global_stop = core_log_global_stop,
3737 .eventfd_add = core_eventfd_add,
3738 .eventfd_del = core_eventfd_del,
3739 .priority = 0,
3742 static MemoryListener io_memory_listener = {
3743 .begin = io_begin,
3744 .commit = io_commit,
3745 .region_add = io_region_add,
3746 .region_del = io_region_del,
3747 .region_nop = io_region_nop,
3748 .log_start = io_log_start,
3749 .log_stop = io_log_stop,
3750 .log_sync = io_log_sync,
3751 .log_global_start = io_log_global_start,
3752 .log_global_stop = io_log_global_stop,
3753 .eventfd_add = io_eventfd_add,
3754 .eventfd_del = io_eventfd_del,
3755 .priority = 0,
3758 static void memory_map_init(void)
3760 system_memory = g_malloc(sizeof(*system_memory));
3761 memory_region_init(system_memory, "system", INT64_MAX);
3762 set_system_memory_map(system_memory);
3764 system_io = g_malloc(sizeof(*system_io));
3765 memory_region_init(system_io, "io", 65536);
3766 set_system_io_map(system_io);
3768 memory_listener_register(&core_memory_listener, system_memory);
3769 memory_listener_register(&io_memory_listener, system_io);
3772 MemoryRegion *get_system_memory(void)
3774 return system_memory;
3777 MemoryRegion *get_system_io(void)
3779 return system_io;
3782 #endif /* !defined(CONFIG_USER_ONLY) */
3784 /* physical memory access (slow version, mainly for debug) */
3785 #if defined(CONFIG_USER_ONLY)
3786 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3787 uint8_t *buf, int len, int is_write)
3789 int l, flags;
3790 target_ulong page;
3791 void * p;
3793 while (len > 0) {
3794 page = addr & TARGET_PAGE_MASK;
3795 l = (page + TARGET_PAGE_SIZE) - addr;
3796 if (l > len)
3797 l = len;
3798 flags = page_get_flags(page);
3799 if (!(flags & PAGE_VALID))
3800 return -1;
3801 if (is_write) {
3802 if (!(flags & PAGE_WRITE))
3803 return -1;
3804 /* XXX: this code should not depend on lock_user */
3805 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3806 return -1;
3807 memcpy(p, buf, l);
3808 unlock_user(p, addr, l);
3809 } else {
3810 if (!(flags & PAGE_READ))
3811 return -1;
3812 /* XXX: this code should not depend on lock_user */
3813 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3814 return -1;
3815 memcpy(buf, p, l);
3816 unlock_user(p, addr, 0);
3818 len -= l;
3819 buf += l;
3820 addr += l;
3822 return 0;
3825 #else
3826 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3827 int len, int is_write)
3829 int l, io_index;
3830 uint8_t *ptr;
3831 uint32_t val;
3832 target_phys_addr_t page;
3833 MemoryRegionSection section;
3835 while (len > 0) {
3836 page = addr & TARGET_PAGE_MASK;
3837 l = (page + TARGET_PAGE_SIZE) - addr;
3838 if (l > len)
3839 l = len;
3840 section = phys_page_find(page >> TARGET_PAGE_BITS);
3842 if (is_write) {
3843 if (!memory_region_is_ram(section.mr)) {
3844 target_phys_addr_t addr1;
3845 io_index = memory_region_get_ram_addr(section.mr)
3846 & (IO_MEM_NB_ENTRIES - 1);
3847 addr1 = (addr & ~TARGET_PAGE_MASK)
3848 + section.offset_within_region;
3849 /* XXX: could force cpu_single_env to NULL to avoid
3850 potential bugs */
3851 if (l >= 4 && ((addr1 & 3) == 0)) {
3852 /* 32 bit write access */
3853 val = ldl_p(buf);
3854 io_mem_write(io_index, addr1, val, 4);
3855 l = 4;
3856 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3857 /* 16 bit write access */
3858 val = lduw_p(buf);
3859 io_mem_write(io_index, addr1, val, 2);
3860 l = 2;
3861 } else {
3862 /* 8 bit write access */
3863 val = ldub_p(buf);
3864 io_mem_write(io_index, addr1, val, 1);
3865 l = 1;
3867 } else if (!section.readonly) {
3868 ram_addr_t addr1;
3869 addr1 = (memory_region_get_ram_addr(section.mr)
3870 + section.offset_within_region)
3871 | (addr & ~TARGET_PAGE_MASK);
3872 /* RAM case */
3873 ptr = qemu_get_ram_ptr(addr1);
3874 memcpy(ptr, buf, l);
3875 if (!cpu_physical_memory_is_dirty(addr1)) {
3876 /* invalidate code */
3877 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3878 /* set dirty bit */
3879 cpu_physical_memory_set_dirty_flags(
3880 addr1, (0xff & ~CODE_DIRTY_FLAG));
3882 qemu_put_ram_ptr(ptr);
3884 } else {
3885 if (!is_ram_rom_romd(&section)) {
3886 target_phys_addr_t addr1;
3887 /* I/O case */
3888 io_index = memory_region_get_ram_addr(section.mr)
3889 & (IO_MEM_NB_ENTRIES - 1);
3890 addr1 = (addr & ~TARGET_PAGE_MASK)
3891 + section.offset_within_region;
3892 if (l >= 4 && ((addr1 & 3) == 0)) {
3893 /* 32 bit read access */
3894 val = io_mem_read(io_index, addr1, 4);
3895 stl_p(buf, val);
3896 l = 4;
3897 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3898 /* 16 bit read access */
3899 val = io_mem_read(io_index, addr1, 2);
3900 stw_p(buf, val);
3901 l = 2;
3902 } else {
3903 /* 8 bit read access */
3904 val = io_mem_read(io_index, addr1, 1);
3905 stb_p(buf, val);
3906 l = 1;
3908 } else {
3909 /* RAM case */
3910 ptr = qemu_get_ram_ptr(section.mr->ram_addr
3911 + section.offset_within_region);
3912 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3913 qemu_put_ram_ptr(ptr);
3916 len -= l;
3917 buf += l;
3918 addr += l;
3922 /* used for ROM loading : can write in RAM and ROM */
3923 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3924 const uint8_t *buf, int len)
3926 int l;
3927 uint8_t *ptr;
3928 target_phys_addr_t page;
3929 MemoryRegionSection section;
3931 while (len > 0) {
3932 page = addr & TARGET_PAGE_MASK;
3933 l = (page + TARGET_PAGE_SIZE) - addr;
3934 if (l > len)
3935 l = len;
3936 section = phys_page_find(page >> TARGET_PAGE_BITS);
3938 if (!is_ram_rom_romd(&section)) {
3939 /* do nothing */
3940 } else {
3941 unsigned long addr1;
3942 addr1 = (memory_region_get_ram_addr(section.mr)
3943 + section.offset_within_region)
3944 + (addr & ~TARGET_PAGE_MASK);
3945 /* ROM/RAM case */
3946 ptr = qemu_get_ram_ptr(addr1);
3947 memcpy(ptr, buf, l);
3948 qemu_put_ram_ptr(ptr);
3950 len -= l;
3951 buf += l;
3952 addr += l;
3956 typedef struct {
3957 void *buffer;
3958 target_phys_addr_t addr;
3959 target_phys_addr_t len;
3960 } BounceBuffer;
3962 static BounceBuffer bounce;
3964 typedef struct MapClient {
3965 void *opaque;
3966 void (*callback)(void *opaque);
3967 QLIST_ENTRY(MapClient) link;
3968 } MapClient;
3970 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3971 = QLIST_HEAD_INITIALIZER(map_client_list);
3973 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3975 MapClient *client = g_malloc(sizeof(*client));
3977 client->opaque = opaque;
3978 client->callback = callback;
3979 QLIST_INSERT_HEAD(&map_client_list, client, link);
3980 return client;
3983 void cpu_unregister_map_client(void *_client)
3985 MapClient *client = (MapClient *)_client;
3987 QLIST_REMOVE(client, link);
3988 g_free(client);
3991 static void cpu_notify_map_clients(void)
3993 MapClient *client;
3995 while (!QLIST_EMPTY(&map_client_list)) {
3996 client = QLIST_FIRST(&map_client_list);
3997 client->callback(client->opaque);
3998 cpu_unregister_map_client(client);
4002 /* Map a physical memory region into a host virtual address.
4003 * May map a subset of the requested range, given by and returned in *plen.
4004 * May return NULL if resources needed to perform the mapping are exhausted.
4005 * Use only for reads OR writes - not for read-modify-write operations.
4006 * Use cpu_register_map_client() to know when retrying the map operation is
4007 * likely to succeed.
4009 void *cpu_physical_memory_map(target_phys_addr_t addr,
4010 target_phys_addr_t *plen,
4011 int is_write)
4013 target_phys_addr_t len = *plen;
4014 target_phys_addr_t todo = 0;
4015 int l;
4016 target_phys_addr_t page;
4017 MemoryRegionSection section;
4018 ram_addr_t raddr = RAM_ADDR_MAX;
4019 ram_addr_t rlen;
4020 void *ret;
4022 while (len > 0) {
4023 page = addr & TARGET_PAGE_MASK;
4024 l = (page + TARGET_PAGE_SIZE) - addr;
4025 if (l > len)
4026 l = len;
4027 section = phys_page_find(page >> TARGET_PAGE_BITS);
4029 if (!(memory_region_is_ram(section.mr) && !section.readonly)) {
4030 if (todo || bounce.buffer) {
4031 break;
4033 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4034 bounce.addr = addr;
4035 bounce.len = l;
4036 if (!is_write) {
4037 cpu_physical_memory_read(addr, bounce.buffer, l);
4040 *plen = l;
4041 return bounce.buffer;
4043 if (!todo) {
4044 raddr = memory_region_get_ram_addr(section.mr)
4045 + section.offset_within_region
4046 + (addr & ~TARGET_PAGE_MASK);
4049 len -= l;
4050 addr += l;
4051 todo += l;
4053 rlen = todo;
4054 ret = qemu_ram_ptr_length(raddr, &rlen);
4055 *plen = rlen;
4056 return ret;
4059 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4060 * Will also mark the memory as dirty if is_write == 1. access_len gives
4061 * the amount of memory that was actually read or written by the caller.
4063 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4064 int is_write, target_phys_addr_t access_len)
4066 if (buffer != bounce.buffer) {
4067 if (is_write) {
4068 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4069 while (access_len) {
4070 unsigned l;
4071 l = TARGET_PAGE_SIZE;
4072 if (l > access_len)
4073 l = access_len;
4074 if (!cpu_physical_memory_is_dirty(addr1)) {
4075 /* invalidate code */
4076 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4077 /* set dirty bit */
4078 cpu_physical_memory_set_dirty_flags(
4079 addr1, (0xff & ~CODE_DIRTY_FLAG));
4081 addr1 += l;
4082 access_len -= l;
4085 if (xen_enabled()) {
4086 xen_invalidate_map_cache_entry(buffer);
4088 return;
4090 if (is_write) {
4091 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4093 qemu_vfree(bounce.buffer);
4094 bounce.buffer = NULL;
4095 cpu_notify_map_clients();
4098 /* warning: addr must be aligned */
4099 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4100 enum device_endian endian)
4102 int io_index;
4103 uint8_t *ptr;
4104 uint32_t val;
4105 MemoryRegionSection section;
4107 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4109 if (!is_ram_rom_romd(&section)) {
4110 /* I/O case */
4111 io_index = memory_region_get_ram_addr(section.mr)
4112 & (IO_MEM_NB_ENTRIES - 1);
4113 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4114 val = io_mem_read(io_index, addr, 4);
4115 #if defined(TARGET_WORDS_BIGENDIAN)
4116 if (endian == DEVICE_LITTLE_ENDIAN) {
4117 val = bswap32(val);
4119 #else
4120 if (endian == DEVICE_BIG_ENDIAN) {
4121 val = bswap32(val);
4123 #endif
4124 } else {
4125 /* RAM case */
4126 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4127 & TARGET_PAGE_MASK)
4128 + section.offset_within_region) +
4129 (addr & ~TARGET_PAGE_MASK);
4130 switch (endian) {
4131 case DEVICE_LITTLE_ENDIAN:
4132 val = ldl_le_p(ptr);
4133 break;
4134 case DEVICE_BIG_ENDIAN:
4135 val = ldl_be_p(ptr);
4136 break;
4137 default:
4138 val = ldl_p(ptr);
4139 break;
4142 return val;
4145 uint32_t ldl_phys(target_phys_addr_t addr)
4147 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4150 uint32_t ldl_le_phys(target_phys_addr_t addr)
4152 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4155 uint32_t ldl_be_phys(target_phys_addr_t addr)
4157 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4160 /* warning: addr must be aligned */
4161 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4162 enum device_endian endian)
4164 int io_index;
4165 uint8_t *ptr;
4166 uint64_t val;
4167 MemoryRegionSection section;
4169 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4171 if (!is_ram_rom_romd(&section)) {
4172 /* I/O case */
4173 io_index = memory_region_get_ram_addr(section.mr)
4174 & (IO_MEM_NB_ENTRIES - 1);
4175 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4177 /* XXX This is broken when device endian != cpu endian.
4178 Fix and add "endian" variable check */
4179 #ifdef TARGET_WORDS_BIGENDIAN
4180 val = io_mem_read(io_index, addr, 4) << 32;
4181 val |= io_mem_read(io_index, addr + 4, 4);
4182 #else
4183 val = io_mem_read(io_index, addr, 4);
4184 val |= io_mem_read(io_index, addr + 4, 4) << 32;
4185 #endif
4186 } else {
4187 /* RAM case */
4188 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4189 & TARGET_PAGE_MASK)
4190 + section.offset_within_region)
4191 + (addr & ~TARGET_PAGE_MASK);
4192 switch (endian) {
4193 case DEVICE_LITTLE_ENDIAN:
4194 val = ldq_le_p(ptr);
4195 break;
4196 case DEVICE_BIG_ENDIAN:
4197 val = ldq_be_p(ptr);
4198 break;
4199 default:
4200 val = ldq_p(ptr);
4201 break;
4204 return val;
4207 uint64_t ldq_phys(target_phys_addr_t addr)
4209 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4212 uint64_t ldq_le_phys(target_phys_addr_t addr)
4214 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4217 uint64_t ldq_be_phys(target_phys_addr_t addr)
4219 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4222 /* XXX: optimize */
4223 uint32_t ldub_phys(target_phys_addr_t addr)
4225 uint8_t val;
4226 cpu_physical_memory_read(addr, &val, 1);
4227 return val;
4230 /* warning: addr must be aligned */
4231 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4232 enum device_endian endian)
4234 int io_index;
4235 uint8_t *ptr;
4236 uint64_t val;
4237 MemoryRegionSection section;
4239 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4241 if (!is_ram_rom_romd(&section)) {
4242 /* I/O case */
4243 io_index = memory_region_get_ram_addr(section.mr)
4244 & (IO_MEM_NB_ENTRIES - 1);
4245 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4246 val = io_mem_read(io_index, addr, 2);
4247 #if defined(TARGET_WORDS_BIGENDIAN)
4248 if (endian == DEVICE_LITTLE_ENDIAN) {
4249 val = bswap16(val);
4251 #else
4252 if (endian == DEVICE_BIG_ENDIAN) {
4253 val = bswap16(val);
4255 #endif
4256 } else {
4257 /* RAM case */
4258 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4259 & TARGET_PAGE_MASK)
4260 + section.offset_within_region)
4261 + (addr & ~TARGET_PAGE_MASK);
4262 switch (endian) {
4263 case DEVICE_LITTLE_ENDIAN:
4264 val = lduw_le_p(ptr);
4265 break;
4266 case DEVICE_BIG_ENDIAN:
4267 val = lduw_be_p(ptr);
4268 break;
4269 default:
4270 val = lduw_p(ptr);
4271 break;
4274 return val;
4277 uint32_t lduw_phys(target_phys_addr_t addr)
4279 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4282 uint32_t lduw_le_phys(target_phys_addr_t addr)
4284 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4287 uint32_t lduw_be_phys(target_phys_addr_t addr)
4289 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4292 /* warning: addr must be aligned. The ram page is not masked as dirty
4293 and the code inside is not invalidated. It is useful if the dirty
4294 bits are used to track modified PTEs */
4295 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4297 int io_index;
4298 uint8_t *ptr;
4299 MemoryRegionSection section;
4301 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4303 if (!memory_region_is_ram(section.mr) || section.readonly) {
4304 if (memory_region_is_ram(section.mr)) {
4305 io_index = io_mem_rom.ram_addr;
4306 } else {
4307 io_index = memory_region_get_ram_addr(section.mr);
4309 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4310 io_mem_write(io_index, addr, val, 4);
4311 } else {
4312 unsigned long addr1 = (memory_region_get_ram_addr(section.mr)
4313 & TARGET_PAGE_MASK)
4314 + section.offset_within_region
4315 + (addr & ~TARGET_PAGE_MASK);
4316 ptr = qemu_get_ram_ptr(addr1);
4317 stl_p(ptr, val);
4319 if (unlikely(in_migration)) {
4320 if (!cpu_physical_memory_is_dirty(addr1)) {
4321 /* invalidate code */
4322 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4323 /* set dirty bit */
4324 cpu_physical_memory_set_dirty_flags(
4325 addr1, (0xff & ~CODE_DIRTY_FLAG));
4331 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4333 int io_index;
4334 uint8_t *ptr;
4335 MemoryRegionSection section;
4337 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4339 if (!memory_region_is_ram(section.mr) || section.readonly) {
4340 if (memory_region_is_ram(section.mr)) {
4341 io_index = io_mem_rom.ram_addr;
4342 } else {
4343 io_index = memory_region_get_ram_addr(section.mr)
4344 & (IO_MEM_NB_ENTRIES - 1);
4346 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4347 #ifdef TARGET_WORDS_BIGENDIAN
4348 io_mem_write(io_index, addr, val >> 32, 4);
4349 io_mem_write(io_index, addr + 4, (uint32_t)val, 4);
4350 #else
4351 io_mem_write(io_index, addr, (uint32_t)val, 4);
4352 io_mem_write(io_index, addr + 4, val >> 32, 4);
4353 #endif
4354 } else {
4355 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr)
4356 & TARGET_PAGE_MASK)
4357 + section.offset_within_region)
4358 + (addr & ~TARGET_PAGE_MASK);
4359 stq_p(ptr, val);
4363 /* warning: addr must be aligned */
4364 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4365 enum device_endian endian)
4367 int io_index;
4368 uint8_t *ptr;
4369 MemoryRegionSection section;
4371 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4373 if (!memory_region_is_ram(section.mr) || section.readonly) {
4374 if (memory_region_is_ram(section.mr)) {
4375 io_index = io_mem_rom.ram_addr;
4376 } else {
4377 io_index = memory_region_get_ram_addr(section.mr)
4378 & (IO_MEM_NB_ENTRIES - 1);
4380 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4381 #if defined(TARGET_WORDS_BIGENDIAN)
4382 if (endian == DEVICE_LITTLE_ENDIAN) {
4383 val = bswap32(val);
4385 #else
4386 if (endian == DEVICE_BIG_ENDIAN) {
4387 val = bswap32(val);
4389 #endif
4390 io_mem_write(io_index, addr, val, 4);
4391 } else {
4392 unsigned long addr1;
4393 addr1 = (memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK)
4394 + section.offset_within_region
4395 + (addr & ~TARGET_PAGE_MASK);
4396 /* RAM case */
4397 ptr = qemu_get_ram_ptr(addr1);
4398 switch (endian) {
4399 case DEVICE_LITTLE_ENDIAN:
4400 stl_le_p(ptr, val);
4401 break;
4402 case DEVICE_BIG_ENDIAN:
4403 stl_be_p(ptr, val);
4404 break;
4405 default:
4406 stl_p(ptr, val);
4407 break;
4409 if (!cpu_physical_memory_is_dirty(addr1)) {
4410 /* invalidate code */
4411 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4412 /* set dirty bit */
4413 cpu_physical_memory_set_dirty_flags(addr1,
4414 (0xff & ~CODE_DIRTY_FLAG));
4419 void stl_phys(target_phys_addr_t addr, uint32_t val)
4421 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4424 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4426 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4429 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4431 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4434 /* XXX: optimize */
4435 void stb_phys(target_phys_addr_t addr, uint32_t val)
4437 uint8_t v = val;
4438 cpu_physical_memory_write(addr, &v, 1);
4441 /* warning: addr must be aligned */
4442 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4443 enum device_endian endian)
4445 int io_index;
4446 uint8_t *ptr;
4447 MemoryRegionSection section;
4449 section = phys_page_find(addr >> TARGET_PAGE_BITS);
4451 if (!memory_region_is_ram(section.mr) || section.readonly) {
4452 if (memory_region_is_ram(section.mr)) {
4453 io_index = io_mem_rom.ram_addr;
4454 } else {
4455 io_index = memory_region_get_ram_addr(section.mr)
4456 & (IO_MEM_NB_ENTRIES - 1);
4458 addr = (addr & ~TARGET_PAGE_MASK) + section.offset_within_region;
4459 #if defined(TARGET_WORDS_BIGENDIAN)
4460 if (endian == DEVICE_LITTLE_ENDIAN) {
4461 val = bswap16(val);
4463 #else
4464 if (endian == DEVICE_BIG_ENDIAN) {
4465 val = bswap16(val);
4467 #endif
4468 io_mem_write(io_index, addr, val, 2);
4469 } else {
4470 unsigned long addr1;
4471 addr1 = (memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK)
4472 + section.offset_within_region + (addr & ~TARGET_PAGE_MASK);
4473 /* RAM case */
4474 ptr = qemu_get_ram_ptr(addr1);
4475 switch (endian) {
4476 case DEVICE_LITTLE_ENDIAN:
4477 stw_le_p(ptr, val);
4478 break;
4479 case DEVICE_BIG_ENDIAN:
4480 stw_be_p(ptr, val);
4481 break;
4482 default:
4483 stw_p(ptr, val);
4484 break;
4486 if (!cpu_physical_memory_is_dirty(addr1)) {
4487 /* invalidate code */
4488 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4489 /* set dirty bit */
4490 cpu_physical_memory_set_dirty_flags(addr1,
4491 (0xff & ~CODE_DIRTY_FLAG));
4496 void stw_phys(target_phys_addr_t addr, uint32_t val)
4498 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4501 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4503 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4506 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4508 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4511 /* XXX: optimize */
4512 void stq_phys(target_phys_addr_t addr, uint64_t val)
4514 val = tswap64(val);
4515 cpu_physical_memory_write(addr, &val, 8);
4518 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4520 val = cpu_to_le64(val);
4521 cpu_physical_memory_write(addr, &val, 8);
4524 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4526 val = cpu_to_be64(val);
4527 cpu_physical_memory_write(addr, &val, 8);
4530 /* virtual memory access for debug (includes writing to ROM) */
4531 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4532 uint8_t *buf, int len, int is_write)
4534 int l;
4535 target_phys_addr_t phys_addr;
4536 target_ulong page;
4538 while (len > 0) {
4539 page = addr & TARGET_PAGE_MASK;
4540 phys_addr = cpu_get_phys_page_debug(env, page);
4541 /* if no physical page mapped, return an error */
4542 if (phys_addr == -1)
4543 return -1;
4544 l = (page + TARGET_PAGE_SIZE) - addr;
4545 if (l > len)
4546 l = len;
4547 phys_addr += (addr & ~TARGET_PAGE_MASK);
4548 if (is_write)
4549 cpu_physical_memory_write_rom(phys_addr, buf, l);
4550 else
4551 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4552 len -= l;
4553 buf += l;
4554 addr += l;
4556 return 0;
4558 #endif
4560 /* in deterministic execution mode, instructions doing device I/Os
4561 must be at the end of the TB */
4562 void cpu_io_recompile(CPUState *env, void *retaddr)
4564 TranslationBlock *tb;
4565 uint32_t n, cflags;
4566 target_ulong pc, cs_base;
4567 uint64_t flags;
4569 tb = tb_find_pc((unsigned long)retaddr);
4570 if (!tb) {
4571 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4572 retaddr);
4574 n = env->icount_decr.u16.low + tb->icount;
4575 cpu_restore_state(tb, env, (unsigned long)retaddr);
4576 /* Calculate how many instructions had been executed before the fault
4577 occurred. */
4578 n = n - env->icount_decr.u16.low;
4579 /* Generate a new TB ending on the I/O insn. */
4580 n++;
4581 /* On MIPS and SH, delay slot instructions can only be restarted if
4582 they were already the first instruction in the TB. If this is not
4583 the first instruction in a TB then re-execute the preceding
4584 branch. */
4585 #if defined(TARGET_MIPS)
4586 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4587 env->active_tc.PC -= 4;
4588 env->icount_decr.u16.low++;
4589 env->hflags &= ~MIPS_HFLAG_BMASK;
4591 #elif defined(TARGET_SH4)
4592 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4593 && n > 1) {
4594 env->pc -= 2;
4595 env->icount_decr.u16.low++;
4596 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4598 #endif
4599 /* This should never happen. */
4600 if (n > CF_COUNT_MASK)
4601 cpu_abort(env, "TB too big during recompile");
4603 cflags = n | CF_LAST_IO;
4604 pc = tb->pc;
4605 cs_base = tb->cs_base;
4606 flags = tb->flags;
4607 tb_phys_invalidate(tb, -1);
4608 /* FIXME: In theory this could raise an exception. In practice
4609 we have already translated the block once so it's probably ok. */
4610 tb_gen_code(env, pc, cs_base, flags, cflags);
4611 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4612 the first in the TB) then we end up generating a whole new TB and
4613 repeating the fault, which is horribly inefficient.
4614 Better would be to execute just this insn uncached, or generate a
4615 second new TB. */
4616 cpu_resume_from_signal(env, NULL);
4619 #if !defined(CONFIG_USER_ONLY)
4621 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4623 int i, target_code_size, max_target_code_size;
4624 int direct_jmp_count, direct_jmp2_count, cross_page;
4625 TranslationBlock *tb;
4627 target_code_size = 0;
4628 max_target_code_size = 0;
4629 cross_page = 0;
4630 direct_jmp_count = 0;
4631 direct_jmp2_count = 0;
4632 for(i = 0; i < nb_tbs; i++) {
4633 tb = &tbs[i];
4634 target_code_size += tb->size;
4635 if (tb->size > max_target_code_size)
4636 max_target_code_size = tb->size;
4637 if (tb->page_addr[1] != -1)
4638 cross_page++;
4639 if (tb->tb_next_offset[0] != 0xffff) {
4640 direct_jmp_count++;
4641 if (tb->tb_next_offset[1] != 0xffff) {
4642 direct_jmp2_count++;
4646 /* XXX: avoid using doubles ? */
4647 cpu_fprintf(f, "Translation buffer state:\n");
4648 cpu_fprintf(f, "gen code size %td/%ld\n",
4649 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4650 cpu_fprintf(f, "TB count %d/%d\n",
4651 nb_tbs, code_gen_max_blocks);
4652 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4653 nb_tbs ? target_code_size / nb_tbs : 0,
4654 max_target_code_size);
4655 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4656 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4657 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4658 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4659 cross_page,
4660 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4661 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4662 direct_jmp_count,
4663 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4664 direct_jmp2_count,
4665 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4666 cpu_fprintf(f, "\nStatistics:\n");
4667 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4668 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4669 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4670 tcg_dump_info(f, cpu_fprintf);
4673 /* NOTE: this function can trigger an exception */
4674 /* NOTE2: the returned address is not exactly the physical address: it
4675 is the offset relative to phys_ram_base */
4676 tb_page_addr_t get_page_addr_code(CPUState *env1, target_ulong addr)
4678 int mmu_idx, page_index, pd;
4679 void *p;
4681 page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
4682 mmu_idx = cpu_mmu_index(env1);
4683 if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
4684 (addr & TARGET_PAGE_MASK))) {
4685 ldub_code(addr);
4687 pd = env1->tlb_table[mmu_idx][page_index].addr_code & ~TARGET_PAGE_MASK;
4688 if (pd != io_mem_ram.ram_addr && pd != io_mem_rom.ram_addr
4689 && !io_mem_region[pd]->rom_device) {
4690 #if defined(TARGET_ALPHA) || defined(TARGET_MIPS) || defined(TARGET_SPARC)
4691 cpu_unassigned_access(env1, addr, 0, 1, 0, 4);
4692 #else
4693 cpu_abort(env1, "Trying to execute code outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
4694 #endif
4696 p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
4697 return qemu_ram_addr_from_host_nofail(p);
4701 * A helper function for the _utterly broken_ virtio device model to find out if
4702 * it's running on a big endian machine. Don't do this at home kids!
4704 bool virtio_is_big_endian(void);
4705 bool virtio_is_big_endian(void)
4707 #if defined(TARGET_WORDS_BIGENDIAN)
4708 return true;
4709 #else
4710 return false;
4711 #endif
4714 #define MMUSUFFIX _cmmu
4715 #undef GETPC
4716 #define GETPC() NULL
4717 #define env cpu_single_env
4718 #define SOFTMMU_CODE_ACCESS
4720 #define SHIFT 0
4721 #include "softmmu_template.h"
4723 #define SHIFT 1
4724 #include "softmmu_template.h"
4726 #define SHIFT 2
4727 #include "softmmu_template.h"
4729 #define SHIFT 3
4730 #include "softmmu_template.h"
4732 #undef env
4734 #endif