qemu-kvm: Fix error path of virtio_pci_set_guest_notifiers
[qemu/qemu-dev-zwu.git] / exec.c
blob4b5ce6f0aed9b9f9b5f32b5d05a208148b5428ba
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "exec-all.h"
30 #include "cache-utils.h"
32 #if !defined(TARGET_IA64)
33 #include "tcg.h"
34 #endif
36 #include "hw/hw.h"
37 #include "hw/qdev.h"
38 #include "osdep.h"
39 #include "kvm.h"
40 #include "hw/xen.h"
41 #include "qemu-timer.h"
42 #if defined(CONFIG_USER_ONLY)
43 #include <qemu.h>
44 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
45 #include <sys/param.h>
46 #if __FreeBSD_version >= 700104
47 #define HAVE_KINFO_GETVMMAP
48 #define sigqueue sigqueue_freebsd /* avoid redefinition */
49 #include <sys/time.h>
50 #include <sys/proc.h>
51 #include <machine/profile.h>
52 #define _KERNEL
53 #include <sys/user.h>
54 #undef _KERNEL
55 #undef sigqueue
56 #include <libutil.h>
57 #endif
58 #endif
59 #else /* !CONFIG_USER_ONLY */
60 #include "xen-mapcache.h"
61 #endif
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
117 #endif
119 CPUState *first_cpu;
120 /* current CPU in the current thread. It is only valid inside
121 cpu_exec() */
122 CPUState *cpu_single_env;
123 /* 0 = Do not count executed instructions.
124 1 = Precise instruction counting.
125 2 = Adaptive rate instruction counting. */
126 int use_icount = 0;
127 /* Current instruction counter. While executing translated code this may
128 include some instructions that have not yet been executed. */
129 int64_t qemu_icount;
131 typedef struct PageDesc {
132 /* list of TBs intersecting this ram page */
133 TranslationBlock *first_tb;
134 /* in order to optimize self modifying code, we count the number
135 of lookups we do to a given page to use a bitmap */
136 unsigned int code_write_count;
137 uint8_t *code_bitmap;
138 #if defined(CONFIG_USER_ONLY)
139 unsigned long flags;
140 #endif
141 } PageDesc;
143 /* In system mode we want L1_MAP to be based on ram offsets,
144 while in user mode we want it to be based on virtual addresses. */
145 #if !defined(CONFIG_USER_ONLY)
146 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
147 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
148 #else
149 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
150 #endif
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
153 #endif
155 /* Size of the L2 (and L3, etc) page tables. */
156 #define L2_BITS 10
157 #define L2_SIZE (1 << L2_BITS)
159 /* The bits remaining after N lower levels of page tables. */
160 #define P_L1_BITS_REM \
161 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
162 #define V_L1_BITS_REM \
163 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
165 /* Size of the L1 page table. Avoid silly small sizes. */
166 #if P_L1_BITS_REM < 4
167 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
168 #else
169 #define P_L1_BITS P_L1_BITS_REM
170 #endif
172 #if V_L1_BITS_REM < 4
173 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
174 #else
175 #define V_L1_BITS V_L1_BITS_REM
176 #endif
178 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
179 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
181 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
182 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
184 unsigned long qemu_real_host_page_size;
185 unsigned long qemu_host_page_bits;
186 unsigned long qemu_host_page_size;
187 unsigned long qemu_host_page_mask;
189 /* This is a multi-level map on the virtual address space.
190 The bottom level has pointers to PageDesc. */
191 static void *l1_map[V_L1_SIZE];
193 #if !defined(CONFIG_USER_ONLY)
194 typedef struct PhysPageDesc {
195 /* offset in host memory of the page + io_index in the low bits */
196 ram_addr_t phys_offset;
197 ram_addr_t region_offset;
198 } PhysPageDesc;
200 /* This is a multi-level map on the physical address space.
201 The bottom level has pointers to PhysPageDesc. */
202 static void *l1_phys_map[P_L1_SIZE];
204 static void io_mem_init(void);
206 /* io memory support */
207 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
208 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
209 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
210 static char io_mem_used[IO_MEM_NB_ENTRIES];
211 static int io_mem_watch;
212 #endif
214 /* log support */
215 #ifdef WIN32
216 static const char *logfilename = "qemu.log";
217 #else
218 static const char *logfilename = "/tmp/qemu.log";
219 #endif
220 FILE *logfile;
221 int loglevel;
222 static int log_append = 0;
224 /* statistics */
225 #if !defined(CONFIG_USER_ONLY)
226 static int tlb_flush_count;
227 #endif
228 static int tb_flush_count;
229 static int tb_phys_invalidate_count;
231 #ifdef _WIN32
232 static void map_exec(void *addr, long size)
234 DWORD old_protect;
235 VirtualProtect(addr, size,
236 PAGE_EXECUTE_READWRITE, &old_protect);
239 #else
240 static void map_exec(void *addr, long size)
242 unsigned long start, end, page_size;
244 page_size = getpagesize();
245 start = (unsigned long)addr;
246 start &= ~(page_size - 1);
248 end = (unsigned long)addr + size;
249 end += page_size - 1;
250 end &= ~(page_size - 1);
252 mprotect((void *)start, end - start,
253 PROT_READ | PROT_WRITE | PROT_EXEC);
255 #endif
257 static void page_init(void)
259 /* NOTE: we can always suppose that qemu_host_page_size >=
260 TARGET_PAGE_SIZE */
261 #ifdef _WIN32
263 SYSTEM_INFO system_info;
265 GetSystemInfo(&system_info);
266 qemu_real_host_page_size = system_info.dwPageSize;
268 #else
269 qemu_real_host_page_size = getpagesize();
270 #endif
271 if (qemu_host_page_size == 0)
272 qemu_host_page_size = qemu_real_host_page_size;
273 if (qemu_host_page_size < TARGET_PAGE_SIZE)
274 qemu_host_page_size = TARGET_PAGE_SIZE;
275 qemu_host_page_bits = 0;
276 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
277 qemu_host_page_bits++;
278 qemu_host_page_mask = ~(qemu_host_page_size - 1);
280 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
282 #ifdef HAVE_KINFO_GETVMMAP
283 struct kinfo_vmentry *freep;
284 int i, cnt;
286 freep = kinfo_getvmmap(getpid(), &cnt);
287 if (freep) {
288 mmap_lock();
289 for (i = 0; i < cnt; i++) {
290 unsigned long startaddr, endaddr;
292 startaddr = freep[i].kve_start;
293 endaddr = freep[i].kve_end;
294 if (h2g_valid(startaddr)) {
295 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
297 if (h2g_valid(endaddr)) {
298 endaddr = h2g(endaddr);
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
300 } else {
301 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
302 endaddr = ~0ul;
303 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
304 #endif
308 free(freep);
309 mmap_unlock();
311 #else
312 FILE *f;
314 last_brk = (unsigned long)sbrk(0);
316 f = fopen("/compat/linux/proc/self/maps", "r");
317 if (f) {
318 mmap_lock();
320 do {
321 unsigned long startaddr, endaddr;
322 int n;
324 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
326 if (n == 2 && h2g_valid(startaddr)) {
327 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
329 if (h2g_valid(endaddr)) {
330 endaddr = h2g(endaddr);
331 } else {
332 endaddr = ~0ul;
334 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
336 } while (!feof(f));
338 fclose(f);
339 mmap_unlock();
341 #endif
343 #endif
346 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
348 PageDesc *pd;
349 void **lp;
350 int i;
352 #if defined(CONFIG_USER_ONLY)
353 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
354 # define ALLOC(P, SIZE) \
355 do { \
356 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
357 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
358 } while (0)
359 #else
360 # define ALLOC(P, SIZE) \
361 do { P = qemu_mallocz(SIZE); } while (0)
362 #endif
364 /* Level 1. Always allocated. */
365 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
367 /* Level 2..N-1. */
368 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
369 void **p = *lp;
371 if (p == NULL) {
372 if (!alloc) {
373 return NULL;
375 ALLOC(p, sizeof(void *) * L2_SIZE);
376 *lp = p;
379 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
382 pd = *lp;
383 if (pd == NULL) {
384 if (!alloc) {
385 return NULL;
387 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
388 *lp = pd;
391 #undef ALLOC
393 return pd + (index & (L2_SIZE - 1));
396 static inline PageDesc *page_find(tb_page_addr_t index)
398 return page_find_alloc(index, 0);
401 #if !defined(CONFIG_USER_ONLY)
402 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
404 PhysPageDesc *pd;
405 void **lp;
406 int i;
408 /* Level 1. Always allocated. */
409 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
411 /* Level 2..N-1. */
412 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
413 void **p = *lp;
414 if (p == NULL) {
415 if (!alloc) {
416 return NULL;
418 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
420 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
423 pd = *lp;
424 if (pd == NULL) {
425 int i;
427 if (!alloc) {
428 return NULL;
431 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
433 for (i = 0; i < L2_SIZE; i++) {
434 pd[i].phys_offset = IO_MEM_UNASSIGNED;
435 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
439 return pd + (index & (L2_SIZE - 1));
442 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
444 return phys_page_find_alloc(index, 0);
447 static void tlb_protect_code(ram_addr_t ram_addr);
448 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
449 target_ulong vaddr);
450 #define mmap_lock() do { } while(0)
451 #define mmap_unlock() do { } while(0)
452 #endif
454 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
456 #if defined(CONFIG_USER_ONLY)
457 /* Currently it is not recommended to allocate big chunks of data in
458 user mode. It will change when a dedicated libc will be used */
459 #define USE_STATIC_CODE_GEN_BUFFER
460 #endif
462 #ifdef USE_STATIC_CODE_GEN_BUFFER
463 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
464 __attribute__((aligned (CODE_GEN_ALIGN)));
465 #endif
467 static void code_gen_alloc(unsigned long tb_size)
469 if (kvm_enabled())
470 return;
472 #ifdef USE_STATIC_CODE_GEN_BUFFER
473 code_gen_buffer = static_code_gen_buffer;
474 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
475 map_exec(code_gen_buffer, code_gen_buffer_size);
476 #else
477 code_gen_buffer_size = tb_size;
478 if (code_gen_buffer_size == 0) {
479 #if defined(CONFIG_USER_ONLY)
480 /* in user mode, phys_ram_size is not meaningful */
481 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
482 #else
483 /* XXX: needs adjustments */
484 code_gen_buffer_size = (unsigned long)(ram_size / 4);
485 #endif
487 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
488 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
489 /* The code gen buffer location may have constraints depending on
490 the host cpu and OS */
491 #if defined(__linux__)
493 int flags;
494 void *start = NULL;
496 flags = MAP_PRIVATE | MAP_ANONYMOUS;
497 #if defined(__x86_64__)
498 flags |= MAP_32BIT;
499 /* Cannot map more than that */
500 if (code_gen_buffer_size > (800 * 1024 * 1024))
501 code_gen_buffer_size = (800 * 1024 * 1024);
502 #elif defined(__sparc_v9__)
503 // Map the buffer below 2G, so we can use direct calls and branches
504 flags |= MAP_FIXED;
505 start = (void *) 0x60000000UL;
506 if (code_gen_buffer_size > (512 * 1024 * 1024))
507 code_gen_buffer_size = (512 * 1024 * 1024);
508 #elif defined(__arm__)
509 /* Map the buffer below 32M, so we can use direct calls and branches */
510 flags |= MAP_FIXED;
511 start = (void *) 0x01000000UL;
512 if (code_gen_buffer_size > 16 * 1024 * 1024)
513 code_gen_buffer_size = 16 * 1024 * 1024;
514 #elif defined(__s390x__)
515 /* Map the buffer so that we can use direct calls and branches. */
516 /* We have a +- 4GB range on the branches; leave some slop. */
517 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
518 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
520 start = (void *)0x90000000UL;
521 #endif
522 code_gen_buffer = mmap(start, code_gen_buffer_size,
523 PROT_WRITE | PROT_READ | PROT_EXEC,
524 flags, -1, 0);
525 if (code_gen_buffer == MAP_FAILED) {
526 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
527 exit(1);
530 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
531 || defined(__DragonFly__) || defined(__OpenBSD__)
533 int flags;
534 void *addr = NULL;
535 flags = MAP_PRIVATE | MAP_ANONYMOUS;
536 #if defined(__x86_64__)
537 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
538 * 0x40000000 is free */
539 flags |= MAP_FIXED;
540 addr = (void *)0x40000000;
541 /* Cannot map more than that */
542 if (code_gen_buffer_size > (800 * 1024 * 1024))
543 code_gen_buffer_size = (800 * 1024 * 1024);
544 #elif defined(__sparc_v9__)
545 // Map the buffer below 2G, so we can use direct calls and branches
546 flags |= MAP_FIXED;
547 addr = (void *) 0x60000000UL;
548 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
549 code_gen_buffer_size = (512 * 1024 * 1024);
551 #endif
552 code_gen_buffer = mmap(addr, code_gen_buffer_size,
553 PROT_WRITE | PROT_READ | PROT_EXEC,
554 flags, -1, 0);
555 if (code_gen_buffer == MAP_FAILED) {
556 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
557 exit(1);
560 #else
561 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
562 map_exec(code_gen_buffer, code_gen_buffer_size);
563 #endif
564 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
565 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
566 code_gen_buffer_max_size = code_gen_buffer_size -
567 (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
568 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
569 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
572 /* Must be called before using the QEMU cpus. 'tb_size' is the size
573 (in bytes) allocated to the translation buffer. Zero means default
574 size. */
575 void cpu_exec_init_all(unsigned long tb_size)
577 cpu_gen_init();
578 code_gen_alloc(tb_size);
579 code_gen_ptr = code_gen_buffer;
580 page_init();
581 #if !defined(CONFIG_USER_ONLY)
582 io_mem_init();
583 #endif
584 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
585 /* There's no guest base to take into account, so go ahead and
586 initialize the prologue now. */
587 tcg_prologue_init(&tcg_ctx);
588 #endif
591 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
593 static int cpu_common_post_load(void *opaque, int version_id)
595 CPUState *env = opaque;
597 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
598 version_id is increased. */
599 env->interrupt_request &= ~0x01;
600 tlb_flush(env, 1);
602 return 0;
605 static const VMStateDescription vmstate_cpu_common = {
606 .name = "cpu_common",
607 .version_id = 1,
608 .minimum_version_id = 1,
609 .minimum_version_id_old = 1,
610 .post_load = cpu_common_post_load,
611 .fields = (VMStateField []) {
612 VMSTATE_UINT32(halted, CPUState),
613 VMSTATE_UINT32(interrupt_request, CPUState),
614 VMSTATE_END_OF_LIST()
617 #endif
619 CPUState *qemu_get_cpu(int cpu)
621 CPUState *env = first_cpu;
623 while (env) {
624 if (env->cpu_index == cpu)
625 break;
626 env = env->next_cpu;
629 return env;
632 void cpu_exec_init(CPUState *env)
634 CPUState **penv;
635 int cpu_index;
637 #if defined(CONFIG_USER_ONLY)
638 cpu_list_lock();
639 #endif
640 env->next_cpu = NULL;
641 penv = &first_cpu;
642 cpu_index = 0;
643 while (*penv != NULL) {
644 penv = &(*penv)->next_cpu;
645 cpu_index++;
647 env->cpu_index = cpu_index;
648 env->numa_node = 0;
649 QTAILQ_INIT(&env->breakpoints);
650 QTAILQ_INIT(&env->watchpoints);
651 #ifndef CONFIG_USER_ONLY
652 env->thread_id = qemu_get_thread_id();
653 #endif
654 *penv = env;
655 #if defined(CONFIG_USER_ONLY)
656 cpu_list_unlock();
657 #endif
658 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
659 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
660 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
661 cpu_save, cpu_load, env);
662 #endif
665 /* Allocate a new translation block. Flush the translation buffer if
666 too many translation blocks or too much generated code. */
667 static TranslationBlock *tb_alloc(target_ulong pc)
669 TranslationBlock *tb;
671 if (nb_tbs >= code_gen_max_blocks ||
672 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
673 return NULL;
674 tb = &tbs[nb_tbs++];
675 tb->pc = pc;
676 tb->cflags = 0;
677 return tb;
680 void tb_free(TranslationBlock *tb)
682 /* In practice this is mostly used for single use temporary TB
683 Ignore the hard cases and just back up if this TB happens to
684 be the last one generated. */
685 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
686 code_gen_ptr = tb->tc_ptr;
687 nb_tbs--;
691 static inline void invalidate_page_bitmap(PageDesc *p)
693 if (p->code_bitmap) {
694 qemu_free(p->code_bitmap);
695 p->code_bitmap = NULL;
697 p->code_write_count = 0;
700 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
702 static void page_flush_tb_1 (int level, void **lp)
704 int i;
706 if (*lp == NULL) {
707 return;
709 if (level == 0) {
710 PageDesc *pd = *lp;
711 for (i = 0; i < L2_SIZE; ++i) {
712 pd[i].first_tb = NULL;
713 invalidate_page_bitmap(pd + i);
715 } else {
716 void **pp = *lp;
717 for (i = 0; i < L2_SIZE; ++i) {
718 page_flush_tb_1 (level - 1, pp + i);
723 static void page_flush_tb(void)
725 int i;
726 for (i = 0; i < V_L1_SIZE; i++) {
727 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
731 /* flush all the translation blocks */
732 /* XXX: tb_flush is currently not thread safe */
733 void tb_flush(CPUState *env1)
735 CPUState *env;
736 #if defined(DEBUG_FLUSH)
737 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
738 (unsigned long)(code_gen_ptr - code_gen_buffer),
739 nb_tbs, nb_tbs > 0 ?
740 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
741 #endif
742 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
743 cpu_abort(env1, "Internal error: code buffer overflow\n");
745 nb_tbs = 0;
747 for(env = first_cpu; env != NULL; env = env->next_cpu) {
748 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
751 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
752 page_flush_tb();
754 code_gen_ptr = code_gen_buffer;
755 /* XXX: flush processor icache at this point if cache flush is
756 expensive */
757 tb_flush_count++;
760 #ifdef DEBUG_TB_CHECK
762 static void tb_invalidate_check(target_ulong address)
764 TranslationBlock *tb;
765 int i;
766 address &= TARGET_PAGE_MASK;
767 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
768 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
769 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
770 address >= tb->pc + tb->size)) {
771 printf("ERROR invalidate: address=" TARGET_FMT_lx
772 " PC=%08lx size=%04x\n",
773 address, (long)tb->pc, tb->size);
779 /* verify that all the pages have correct rights for code */
780 static void tb_page_check(void)
782 TranslationBlock *tb;
783 int i, flags1, flags2;
785 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
786 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
787 flags1 = page_get_flags(tb->pc);
788 flags2 = page_get_flags(tb->pc + tb->size - 1);
789 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
790 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
791 (long)tb->pc, tb->size, flags1, flags2);
797 #endif
799 /* invalidate one TB */
800 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
801 int next_offset)
803 TranslationBlock *tb1;
804 for(;;) {
805 tb1 = *ptb;
806 if (tb1 == tb) {
807 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
808 break;
810 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
814 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
816 TranslationBlock *tb1;
817 unsigned int n1;
819 for(;;) {
820 tb1 = *ptb;
821 n1 = (long)tb1 & 3;
822 tb1 = (TranslationBlock *)((long)tb1 & ~3);
823 if (tb1 == tb) {
824 *ptb = tb1->page_next[n1];
825 break;
827 ptb = &tb1->page_next[n1];
831 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
833 TranslationBlock *tb1, **ptb;
834 unsigned int n1;
836 ptb = &tb->jmp_next[n];
837 tb1 = *ptb;
838 if (tb1) {
839 /* find tb(n) in circular list */
840 for(;;) {
841 tb1 = *ptb;
842 n1 = (long)tb1 & 3;
843 tb1 = (TranslationBlock *)((long)tb1 & ~3);
844 if (n1 == n && tb1 == tb)
845 break;
846 if (n1 == 2) {
847 ptb = &tb1->jmp_first;
848 } else {
849 ptb = &tb1->jmp_next[n1];
852 /* now we can suppress tb(n) from the list */
853 *ptb = tb->jmp_next[n];
855 tb->jmp_next[n] = NULL;
859 /* reset the jump entry 'n' of a TB so that it is not chained to
860 another TB */
861 static inline void tb_reset_jump(TranslationBlock *tb, int n)
863 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
866 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
868 CPUState *env;
869 PageDesc *p;
870 unsigned int h, n1;
871 tb_page_addr_t phys_pc;
872 TranslationBlock *tb1, *tb2;
874 /* remove the TB from the hash list */
875 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
876 h = tb_phys_hash_func(phys_pc);
877 tb_remove(&tb_phys_hash[h], tb,
878 offsetof(TranslationBlock, phys_hash_next));
880 /* remove the TB from the page list */
881 if (tb->page_addr[0] != page_addr) {
882 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
883 tb_page_remove(&p->first_tb, tb);
884 invalidate_page_bitmap(p);
886 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
887 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
888 tb_page_remove(&p->first_tb, tb);
889 invalidate_page_bitmap(p);
892 tb_invalidated_flag = 1;
894 /* remove the TB from the hash list */
895 h = tb_jmp_cache_hash_func(tb->pc);
896 for(env = first_cpu; env != NULL; env = env->next_cpu) {
897 if (env->tb_jmp_cache[h] == tb)
898 env->tb_jmp_cache[h] = NULL;
901 /* suppress this TB from the two jump lists */
902 tb_jmp_remove(tb, 0);
903 tb_jmp_remove(tb, 1);
905 /* suppress any remaining jumps to this TB */
906 tb1 = tb->jmp_first;
907 for(;;) {
908 n1 = (long)tb1 & 3;
909 if (n1 == 2)
910 break;
911 tb1 = (TranslationBlock *)((long)tb1 & ~3);
912 tb2 = tb1->jmp_next[n1];
913 tb_reset_jump(tb1, n1);
914 tb1->jmp_next[n1] = NULL;
915 tb1 = tb2;
917 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
919 tb_phys_invalidate_count++;
922 static inline void set_bits(uint8_t *tab, int start, int len)
924 int end, mask, end1;
926 end = start + len;
927 tab += start >> 3;
928 mask = 0xff << (start & 7);
929 if ((start & ~7) == (end & ~7)) {
930 if (start < end) {
931 mask &= ~(0xff << (end & 7));
932 *tab |= mask;
934 } else {
935 *tab++ |= mask;
936 start = (start + 8) & ~7;
937 end1 = end & ~7;
938 while (start < end1) {
939 *tab++ = 0xff;
940 start += 8;
942 if (start < end) {
943 mask = ~(0xff << (end & 7));
944 *tab |= mask;
949 static void build_page_bitmap(PageDesc *p)
951 int n, tb_start, tb_end;
952 TranslationBlock *tb;
954 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
956 tb = p->first_tb;
957 while (tb != NULL) {
958 n = (long)tb & 3;
959 tb = (TranslationBlock *)((long)tb & ~3);
960 /* NOTE: this is subtle as a TB may span two physical pages */
961 if (n == 0) {
962 /* NOTE: tb_end may be after the end of the page, but
963 it is not a problem */
964 tb_start = tb->pc & ~TARGET_PAGE_MASK;
965 tb_end = tb_start + tb->size;
966 if (tb_end > TARGET_PAGE_SIZE)
967 tb_end = TARGET_PAGE_SIZE;
968 } else {
969 tb_start = 0;
970 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
972 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
973 tb = tb->page_next[n];
977 TranslationBlock *tb_gen_code(CPUState *env,
978 target_ulong pc, target_ulong cs_base,
979 int flags, int cflags)
981 TranslationBlock *tb;
982 uint8_t *tc_ptr;
983 tb_page_addr_t phys_pc, phys_page2;
984 target_ulong virt_page2;
985 int code_gen_size;
987 phys_pc = get_page_addr_code(env, pc);
988 tb = tb_alloc(pc);
989 if (!tb) {
990 /* flush must be done */
991 tb_flush(env);
992 /* cannot fail at this point */
993 tb = tb_alloc(pc);
994 /* Don't forget to invalidate previous TB info. */
995 tb_invalidated_flag = 1;
997 tc_ptr = code_gen_ptr;
998 tb->tc_ptr = tc_ptr;
999 tb->cs_base = cs_base;
1000 tb->flags = flags;
1001 tb->cflags = cflags;
1002 cpu_gen_code(env, tb, &code_gen_size);
1003 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1005 /* check next page if needed */
1006 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1007 phys_page2 = -1;
1008 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1009 phys_page2 = get_page_addr_code(env, virt_page2);
1011 tb_link_page(tb, phys_pc, phys_page2);
1012 return tb;
1015 /* invalidate all TBs which intersect with the target physical page
1016 starting in range [start;end[. NOTE: start and end must refer to
1017 the same physical page. 'is_cpu_write_access' should be true if called
1018 from a real cpu write access: the virtual CPU will exit the current
1019 TB if code is modified inside this TB. */
1020 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1021 int is_cpu_write_access)
1023 TranslationBlock *tb, *tb_next, *saved_tb;
1024 CPUState *env = cpu_single_env;
1025 tb_page_addr_t tb_start, tb_end;
1026 PageDesc *p;
1027 int n;
1028 #ifdef TARGET_HAS_PRECISE_SMC
1029 int current_tb_not_found = is_cpu_write_access;
1030 TranslationBlock *current_tb = NULL;
1031 int current_tb_modified = 0;
1032 target_ulong current_pc = 0;
1033 target_ulong current_cs_base = 0;
1034 int current_flags = 0;
1035 #endif /* TARGET_HAS_PRECISE_SMC */
1037 p = page_find(start >> TARGET_PAGE_BITS);
1038 if (!p)
1039 return;
1040 if (!p->code_bitmap &&
1041 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1042 is_cpu_write_access) {
1043 /* build code bitmap */
1044 build_page_bitmap(p);
1047 /* we remove all the TBs in the range [start, end[ */
1048 /* XXX: see if in some cases it could be faster to invalidate all the code */
1049 tb = p->first_tb;
1050 while (tb != NULL) {
1051 n = (long)tb & 3;
1052 tb = (TranslationBlock *)((long)tb & ~3);
1053 tb_next = tb->page_next[n];
1054 /* NOTE: this is subtle as a TB may span two physical pages */
1055 if (n == 0) {
1056 /* NOTE: tb_end may be after the end of the page, but
1057 it is not a problem */
1058 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1059 tb_end = tb_start + tb->size;
1060 } else {
1061 tb_start = tb->page_addr[1];
1062 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1064 if (!(tb_end <= start || tb_start >= end)) {
1065 #ifdef TARGET_HAS_PRECISE_SMC
1066 if (current_tb_not_found) {
1067 current_tb_not_found = 0;
1068 current_tb = NULL;
1069 if (env->mem_io_pc) {
1070 /* now we have a real cpu fault */
1071 current_tb = tb_find_pc(env->mem_io_pc);
1074 if (current_tb == tb &&
1075 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1076 /* If we are modifying the current TB, we must stop
1077 its execution. We could be more precise by checking
1078 that the modification is after the current PC, but it
1079 would require a specialized function to partially
1080 restore the CPU state */
1082 current_tb_modified = 1;
1083 cpu_restore_state(current_tb, env, env->mem_io_pc);
1084 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1085 &current_flags);
1087 #endif /* TARGET_HAS_PRECISE_SMC */
1088 /* we need to do that to handle the case where a signal
1089 occurs while doing tb_phys_invalidate() */
1090 saved_tb = NULL;
1091 if (env) {
1092 saved_tb = env->current_tb;
1093 env->current_tb = NULL;
1095 tb_phys_invalidate(tb, -1);
1096 if (env) {
1097 env->current_tb = saved_tb;
1098 if (env->interrupt_request && env->current_tb)
1099 cpu_interrupt(env, env->interrupt_request);
1102 tb = tb_next;
1104 #if !defined(CONFIG_USER_ONLY)
1105 /* if no code remaining, no need to continue to use slow writes */
1106 if (!p->first_tb) {
1107 invalidate_page_bitmap(p);
1108 if (is_cpu_write_access) {
1109 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1112 #endif
1113 #ifdef TARGET_HAS_PRECISE_SMC
1114 if (current_tb_modified) {
1115 /* we generate a block containing just the instruction
1116 modifying the memory. It will ensure that it cannot modify
1117 itself */
1118 env->current_tb = NULL;
1119 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1120 cpu_resume_from_signal(env, NULL);
1122 #endif
1125 /* len must be <= 8 and start must be a multiple of len */
1126 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1128 PageDesc *p;
1129 int offset, b;
1130 #if 0
1131 if (1) {
1132 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1133 cpu_single_env->mem_io_vaddr, len,
1134 cpu_single_env->eip,
1135 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1137 #endif
1138 p = page_find(start >> TARGET_PAGE_BITS);
1139 if (!p)
1140 return;
1141 if (p->code_bitmap) {
1142 offset = start & ~TARGET_PAGE_MASK;
1143 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1144 if (b & ((1 << len) - 1))
1145 goto do_invalidate;
1146 } else {
1147 do_invalidate:
1148 tb_invalidate_phys_page_range(start, start + len, 1);
1152 #if !defined(CONFIG_SOFTMMU)
1153 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1154 unsigned long pc, void *puc)
1156 TranslationBlock *tb;
1157 PageDesc *p;
1158 int n;
1159 #ifdef TARGET_HAS_PRECISE_SMC
1160 TranslationBlock *current_tb = NULL;
1161 CPUState *env = cpu_single_env;
1162 int current_tb_modified = 0;
1163 target_ulong current_pc = 0;
1164 target_ulong current_cs_base = 0;
1165 int current_flags = 0;
1166 #endif
1168 addr &= TARGET_PAGE_MASK;
1169 p = page_find(addr >> TARGET_PAGE_BITS);
1170 if (!p)
1171 return;
1172 tb = p->first_tb;
1173 #ifdef TARGET_HAS_PRECISE_SMC
1174 if (tb && pc != 0) {
1175 current_tb = tb_find_pc(pc);
1177 #endif
1178 while (tb != NULL) {
1179 n = (long)tb & 3;
1180 tb = (TranslationBlock *)((long)tb & ~3);
1181 #ifdef TARGET_HAS_PRECISE_SMC
1182 if (current_tb == tb &&
1183 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1184 /* If we are modifying the current TB, we must stop
1185 its execution. We could be more precise by checking
1186 that the modification is after the current PC, but it
1187 would require a specialized function to partially
1188 restore the CPU state */
1190 current_tb_modified = 1;
1191 cpu_restore_state(current_tb, env, pc);
1192 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1193 &current_flags);
1195 #endif /* TARGET_HAS_PRECISE_SMC */
1196 tb_phys_invalidate(tb, addr);
1197 tb = tb->page_next[n];
1199 p->first_tb = NULL;
1200 #ifdef TARGET_HAS_PRECISE_SMC
1201 if (current_tb_modified) {
1202 /* we generate a block containing just the instruction
1203 modifying the memory. It will ensure that it cannot modify
1204 itself */
1205 env->current_tb = NULL;
1206 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1207 cpu_resume_from_signal(env, puc);
1209 #endif
1211 #endif
1213 /* add the tb in the target page and protect it if necessary */
1214 static inline void tb_alloc_page(TranslationBlock *tb,
1215 unsigned int n, tb_page_addr_t page_addr)
1217 PageDesc *p;
1218 TranslationBlock *last_first_tb;
1220 tb->page_addr[n] = page_addr;
1221 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1222 tb->page_next[n] = p->first_tb;
1223 last_first_tb = p->first_tb;
1224 p->first_tb = (TranslationBlock *)((long)tb | n);
1225 invalidate_page_bitmap(p);
1227 #if defined(TARGET_HAS_SMC) || 1
1229 #if defined(CONFIG_USER_ONLY)
1230 if (p->flags & PAGE_WRITE) {
1231 target_ulong addr;
1232 PageDesc *p2;
1233 int prot;
1235 /* force the host page as non writable (writes will have a
1236 page fault + mprotect overhead) */
1237 page_addr &= qemu_host_page_mask;
1238 prot = 0;
1239 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1240 addr += TARGET_PAGE_SIZE) {
1242 p2 = page_find (addr >> TARGET_PAGE_BITS);
1243 if (!p2)
1244 continue;
1245 prot |= p2->flags;
1246 p2->flags &= ~PAGE_WRITE;
1248 mprotect(g2h(page_addr), qemu_host_page_size,
1249 (prot & PAGE_BITS) & ~PAGE_WRITE);
1250 #ifdef DEBUG_TB_INVALIDATE
1251 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1252 page_addr);
1253 #endif
1255 #else
1256 /* if some code is already present, then the pages are already
1257 protected. So we handle the case where only the first TB is
1258 allocated in a physical page */
1259 if (!last_first_tb) {
1260 tlb_protect_code(page_addr);
1262 #endif
1264 #endif /* TARGET_HAS_SMC */
1267 /* add a new TB and link it to the physical page tables. phys_page2 is
1268 (-1) to indicate that only one page contains the TB. */
1269 void tb_link_page(TranslationBlock *tb,
1270 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1272 unsigned int h;
1273 TranslationBlock **ptb;
1275 /* Grab the mmap lock to stop another thread invalidating this TB
1276 before we are done. */
1277 mmap_lock();
1278 /* add in the physical hash table */
1279 h = tb_phys_hash_func(phys_pc);
1280 ptb = &tb_phys_hash[h];
1281 tb->phys_hash_next = *ptb;
1282 *ptb = tb;
1284 /* add in the page list */
1285 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1286 if (phys_page2 != -1)
1287 tb_alloc_page(tb, 1, phys_page2);
1288 else
1289 tb->page_addr[1] = -1;
1291 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1292 tb->jmp_next[0] = NULL;
1293 tb->jmp_next[1] = NULL;
1295 /* init original jump addresses */
1296 if (tb->tb_next_offset[0] != 0xffff)
1297 tb_reset_jump(tb, 0);
1298 if (tb->tb_next_offset[1] != 0xffff)
1299 tb_reset_jump(tb, 1);
1301 #ifdef DEBUG_TB_CHECK
1302 tb_page_check();
1303 #endif
1304 mmap_unlock();
1307 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1308 tb[1].tc_ptr. Return NULL if not found */
1309 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1311 int m_min, m_max, m;
1312 unsigned long v;
1313 TranslationBlock *tb;
1315 if (nb_tbs <= 0)
1316 return NULL;
1317 if (tc_ptr < (unsigned long)code_gen_buffer ||
1318 tc_ptr >= (unsigned long)code_gen_ptr)
1319 return NULL;
1320 /* binary search (cf Knuth) */
1321 m_min = 0;
1322 m_max = nb_tbs - 1;
1323 while (m_min <= m_max) {
1324 m = (m_min + m_max) >> 1;
1325 tb = &tbs[m];
1326 v = (unsigned long)tb->tc_ptr;
1327 if (v == tc_ptr)
1328 return tb;
1329 else if (tc_ptr < v) {
1330 m_max = m - 1;
1331 } else {
1332 m_min = m + 1;
1335 return &tbs[m_max];
1338 static void tb_reset_jump_recursive(TranslationBlock *tb);
1340 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1342 TranslationBlock *tb1, *tb_next, **ptb;
1343 unsigned int n1;
1345 tb1 = tb->jmp_next[n];
1346 if (tb1 != NULL) {
1347 /* find head of list */
1348 for(;;) {
1349 n1 = (long)tb1 & 3;
1350 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1351 if (n1 == 2)
1352 break;
1353 tb1 = tb1->jmp_next[n1];
1355 /* we are now sure now that tb jumps to tb1 */
1356 tb_next = tb1;
1358 /* remove tb from the jmp_first list */
1359 ptb = &tb_next->jmp_first;
1360 for(;;) {
1361 tb1 = *ptb;
1362 n1 = (long)tb1 & 3;
1363 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1364 if (n1 == n && tb1 == tb)
1365 break;
1366 ptb = &tb1->jmp_next[n1];
1368 *ptb = tb->jmp_next[n];
1369 tb->jmp_next[n] = NULL;
1371 /* suppress the jump to next tb in generated code */
1372 tb_reset_jump(tb, n);
1374 /* suppress jumps in the tb on which we could have jumped */
1375 tb_reset_jump_recursive(tb_next);
1379 static void tb_reset_jump_recursive(TranslationBlock *tb)
1381 tb_reset_jump_recursive2(tb, 0);
1382 tb_reset_jump_recursive2(tb, 1);
1385 #if defined(TARGET_HAS_ICE)
1386 #if defined(CONFIG_USER_ONLY)
1387 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1389 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1391 #else
1392 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1394 target_phys_addr_t addr;
1395 target_ulong pd;
1396 ram_addr_t ram_addr;
1397 PhysPageDesc *p;
1399 addr = cpu_get_phys_page_debug(env, pc);
1400 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1401 if (!p) {
1402 pd = IO_MEM_UNASSIGNED;
1403 } else {
1404 pd = p->phys_offset;
1406 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1407 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1409 #endif
1410 #endif /* TARGET_HAS_ICE */
1412 #if defined(CONFIG_USER_ONLY)
1413 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1418 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1419 int flags, CPUWatchpoint **watchpoint)
1421 return -ENOSYS;
1423 #else
1424 /* Add a watchpoint. */
1425 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1426 int flags, CPUWatchpoint **watchpoint)
1428 target_ulong len_mask = ~(len - 1);
1429 CPUWatchpoint *wp;
1431 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1432 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1433 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1434 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1435 return -EINVAL;
1437 wp = qemu_malloc(sizeof(*wp));
1439 wp->vaddr = addr;
1440 wp->len_mask = len_mask;
1441 wp->flags = flags;
1443 /* keep all GDB-injected watchpoints in front */
1444 if (flags & BP_GDB)
1445 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1446 else
1447 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1449 tlb_flush_page(env, addr);
1451 if (watchpoint)
1452 *watchpoint = wp;
1453 return 0;
1456 /* Remove a specific watchpoint. */
1457 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1458 int flags)
1460 target_ulong len_mask = ~(len - 1);
1461 CPUWatchpoint *wp;
1463 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1464 if (addr == wp->vaddr && len_mask == wp->len_mask
1465 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1466 cpu_watchpoint_remove_by_ref(env, wp);
1467 return 0;
1470 return -ENOENT;
1473 /* Remove a specific watchpoint by reference. */
1474 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1476 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1478 tlb_flush_page(env, watchpoint->vaddr);
1480 qemu_free(watchpoint);
1483 /* Remove all matching watchpoints. */
1484 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1486 CPUWatchpoint *wp, *next;
1488 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1489 if (wp->flags & mask)
1490 cpu_watchpoint_remove_by_ref(env, wp);
1493 #endif
1495 /* Add a breakpoint. */
1496 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1497 CPUBreakpoint **breakpoint)
1499 #if defined(TARGET_HAS_ICE)
1500 CPUBreakpoint *bp;
1502 bp = qemu_malloc(sizeof(*bp));
1504 bp->pc = pc;
1505 bp->flags = flags;
1507 /* keep all GDB-injected breakpoints in front */
1508 if (flags & BP_GDB)
1509 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1510 else
1511 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1513 breakpoint_invalidate(env, pc);
1515 if (breakpoint)
1516 *breakpoint = bp;
1517 return 0;
1518 #else
1519 return -ENOSYS;
1520 #endif
1523 /* Remove a specific breakpoint. */
1524 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1526 #if defined(TARGET_HAS_ICE)
1527 CPUBreakpoint *bp;
1529 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1530 if (bp->pc == pc && bp->flags == flags) {
1531 cpu_breakpoint_remove_by_ref(env, bp);
1532 return 0;
1535 return -ENOENT;
1536 #else
1537 return -ENOSYS;
1538 #endif
1541 /* Remove a specific breakpoint by reference. */
1542 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1544 #if defined(TARGET_HAS_ICE)
1545 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1547 breakpoint_invalidate(env, breakpoint->pc);
1549 qemu_free(breakpoint);
1550 #endif
1553 /* Remove all matching breakpoints. */
1554 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1556 #if defined(TARGET_HAS_ICE)
1557 CPUBreakpoint *bp, *next;
1559 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1560 if (bp->flags & mask)
1561 cpu_breakpoint_remove_by_ref(env, bp);
1563 #endif
1566 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1567 CPU loop after each instruction */
1568 void cpu_single_step(CPUState *env, int enabled)
1570 #if defined(TARGET_HAS_ICE)
1571 if (env->singlestep_enabled != enabled) {
1572 env->singlestep_enabled = enabled;
1573 if (kvm_enabled())
1574 kvm_update_guest_debug(env, 0);
1575 else {
1576 /* must flush all the translated code to avoid inconsistencies */
1577 /* XXX: only flush what is necessary */
1578 tb_flush(env);
1581 #endif
1584 /* enable or disable low levels log */
1585 void cpu_set_log(int log_flags)
1587 loglevel = log_flags;
1588 if (loglevel && !logfile) {
1589 logfile = fopen(logfilename, log_append ? "a" : "w");
1590 if (!logfile) {
1591 perror(logfilename);
1592 _exit(1);
1594 #if !defined(CONFIG_SOFTMMU)
1595 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1597 static char logfile_buf[4096];
1598 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1600 #elif !defined(_WIN32)
1601 /* Win32 doesn't support line-buffering and requires size >= 2 */
1602 setvbuf(logfile, NULL, _IOLBF, 0);
1603 #endif
1604 log_append = 1;
1606 if (!loglevel && logfile) {
1607 fclose(logfile);
1608 logfile = NULL;
1612 void cpu_set_log_filename(const char *filename)
1614 logfilename = strdup(filename);
1615 if (logfile) {
1616 fclose(logfile);
1617 logfile = NULL;
1619 cpu_set_log(loglevel);
1622 static void cpu_unlink_tb(CPUState *env)
1624 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1625 problem and hope the cpu will stop of its own accord. For userspace
1626 emulation this often isn't actually as bad as it sounds. Often
1627 signals are used primarily to interrupt blocking syscalls. */
1628 TranslationBlock *tb;
1629 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1631 spin_lock(&interrupt_lock);
1632 tb = env->current_tb;
1633 /* if the cpu is currently executing code, we must unlink it and
1634 all the potentially executing TB */
1635 if (tb) {
1636 env->current_tb = NULL;
1637 tb_reset_jump_recursive(tb);
1639 spin_unlock(&interrupt_lock);
1642 #ifndef CONFIG_USER_ONLY
1643 /* mask must never be zero, except for A20 change call */
1644 static void tcg_handle_interrupt(CPUState *env, int mask)
1646 int old_mask;
1648 old_mask = env->interrupt_request;
1649 env->interrupt_request |= mask;
1652 * If called from iothread context, wake the target cpu in
1653 * case its halted.
1655 if (!qemu_cpu_is_self(env)) {
1656 qemu_cpu_kick(env);
1657 return;
1660 if (use_icount) {
1661 env->icount_decr.u16.high = 0xffff;
1662 if (!can_do_io(env)
1663 && (mask & ~old_mask) != 0) {
1664 cpu_abort(env, "Raised interrupt while not in I/O function");
1666 } else {
1667 cpu_unlink_tb(env);
1671 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1673 #else /* CONFIG_USER_ONLY */
1675 void cpu_interrupt(CPUState *env, int mask)
1677 env->interrupt_request |= mask;
1678 cpu_unlink_tb(env);
1680 #endif /* CONFIG_USER_ONLY */
1682 void cpu_reset_interrupt(CPUState *env, int mask)
1684 env->interrupt_request &= ~mask;
1687 void cpu_exit(CPUState *env)
1689 env->exit_request = 1;
1690 cpu_unlink_tb(env);
1693 const CPULogItem cpu_log_items[] = {
1694 { CPU_LOG_TB_OUT_ASM, "out_asm",
1695 "show generated host assembly code for each compiled TB" },
1696 { CPU_LOG_TB_IN_ASM, "in_asm",
1697 "show target assembly code for each compiled TB" },
1698 { CPU_LOG_TB_OP, "op",
1699 "show micro ops for each compiled TB" },
1700 { CPU_LOG_TB_OP_OPT, "op_opt",
1701 "show micro ops "
1702 #ifdef TARGET_I386
1703 "before eflags optimization and "
1704 #endif
1705 "after liveness analysis" },
1706 { CPU_LOG_INT, "int",
1707 "show interrupts/exceptions in short format" },
1708 { CPU_LOG_EXEC, "exec",
1709 "show trace before each executed TB (lots of logs)" },
1710 { CPU_LOG_TB_CPU, "cpu",
1711 "show CPU state before block translation" },
1712 #ifdef TARGET_I386
1713 { CPU_LOG_PCALL, "pcall",
1714 "show protected mode far calls/returns/exceptions" },
1715 { CPU_LOG_RESET, "cpu_reset",
1716 "show CPU state before CPU resets" },
1717 #endif
1718 #ifdef DEBUG_IOPORT
1719 { CPU_LOG_IOPORT, "ioport",
1720 "show all i/o ports accesses" },
1721 #endif
1722 { 0, NULL, NULL },
1725 #ifndef CONFIG_USER_ONLY
1726 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1727 = QLIST_HEAD_INITIALIZER(memory_client_list);
1729 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1730 ram_addr_t size,
1731 ram_addr_t phys_offset,
1732 bool log_dirty)
1734 CPUPhysMemoryClient *client;
1735 QLIST_FOREACH(client, &memory_client_list, list) {
1736 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1740 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1741 target_phys_addr_t end)
1743 CPUPhysMemoryClient *client;
1744 QLIST_FOREACH(client, &memory_client_list, list) {
1745 int r = client->sync_dirty_bitmap(client, start, end);
1746 if (r < 0)
1747 return r;
1749 return 0;
1752 static int cpu_notify_migration_log(int enable)
1754 CPUPhysMemoryClient *client;
1755 QLIST_FOREACH(client, &memory_client_list, list) {
1756 int r = client->migration_log(client, enable);
1757 if (r < 0)
1758 return r;
1760 return 0;
1763 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1764 * address. Each intermediate table provides the next L2_BITs of guest
1765 * physical address space. The number of levels vary based on host and
1766 * guest configuration, making it efficient to build the final guest
1767 * physical address by seeding the L1 offset and shifting and adding in
1768 * each L2 offset as we recurse through them. */
1769 static void phys_page_for_each_1(CPUPhysMemoryClient *client,
1770 int level, void **lp, target_phys_addr_t addr)
1772 int i;
1774 if (*lp == NULL) {
1775 return;
1777 if (level == 0) {
1778 PhysPageDesc *pd = *lp;
1779 addr <<= L2_BITS + TARGET_PAGE_BITS;
1780 for (i = 0; i < L2_SIZE; ++i) {
1781 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1782 client->set_memory(client, addr | i << TARGET_PAGE_BITS,
1783 TARGET_PAGE_SIZE, pd[i].phys_offset, false);
1786 } else {
1787 void **pp = *lp;
1788 for (i = 0; i < L2_SIZE; ++i) {
1789 phys_page_for_each_1(client, level - 1, pp + i,
1790 (addr << L2_BITS) | i);
1795 static void phys_page_for_each(CPUPhysMemoryClient *client)
1797 int i;
1798 for (i = 0; i < P_L1_SIZE; ++i) {
1799 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1800 l1_phys_map + i, i);
1804 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1806 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1807 phys_page_for_each(client);
1810 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1812 QLIST_REMOVE(client, list);
1814 #endif
1816 static int cmp1(const char *s1, int n, const char *s2)
1818 if (strlen(s2) != n)
1819 return 0;
1820 return memcmp(s1, s2, n) == 0;
1823 /* takes a comma separated list of log masks. Return 0 if error. */
1824 int cpu_str_to_log_mask(const char *str)
1826 const CPULogItem *item;
1827 int mask;
1828 const char *p, *p1;
1830 p = str;
1831 mask = 0;
1832 for(;;) {
1833 p1 = strchr(p, ',');
1834 if (!p1)
1835 p1 = p + strlen(p);
1836 if(cmp1(p,p1-p,"all")) {
1837 for(item = cpu_log_items; item->mask != 0; item++) {
1838 mask |= item->mask;
1840 } else {
1841 for(item = cpu_log_items; item->mask != 0; item++) {
1842 if (cmp1(p, p1 - p, item->name))
1843 goto found;
1845 return 0;
1847 found:
1848 mask |= item->mask;
1849 if (*p1 != ',')
1850 break;
1851 p = p1 + 1;
1853 return mask;
1856 void cpu_abort(CPUState *env, const char *fmt, ...)
1858 va_list ap;
1859 va_list ap2;
1861 va_start(ap, fmt);
1862 va_copy(ap2, ap);
1863 fprintf(stderr, "qemu: fatal: ");
1864 vfprintf(stderr, fmt, ap);
1865 fprintf(stderr, "\n");
1866 #ifdef TARGET_I386
1867 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1868 #else
1869 cpu_dump_state(env, stderr, fprintf, 0);
1870 #endif
1871 if (qemu_log_enabled()) {
1872 qemu_log("qemu: fatal: ");
1873 qemu_log_vprintf(fmt, ap2);
1874 qemu_log("\n");
1875 #ifdef TARGET_I386
1876 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1877 #else
1878 log_cpu_state(env, 0);
1879 #endif
1880 qemu_log_flush();
1881 qemu_log_close();
1883 va_end(ap2);
1884 va_end(ap);
1885 #if defined(CONFIG_USER_ONLY)
1887 struct sigaction act;
1888 sigfillset(&act.sa_mask);
1889 act.sa_handler = SIG_DFL;
1890 sigaction(SIGABRT, &act, NULL);
1892 #endif
1893 abort();
1896 CPUState *cpu_copy(CPUState *env)
1898 CPUState *new_env = cpu_init(env->cpu_model_str);
1899 CPUState *next_cpu = new_env->next_cpu;
1900 int cpu_index = new_env->cpu_index;
1901 #if defined(TARGET_HAS_ICE)
1902 CPUBreakpoint *bp;
1903 CPUWatchpoint *wp;
1904 #endif
1906 memcpy(new_env, env, sizeof(CPUState));
1908 /* Preserve chaining and index. */
1909 new_env->next_cpu = next_cpu;
1910 new_env->cpu_index = cpu_index;
1912 /* Clone all break/watchpoints.
1913 Note: Once we support ptrace with hw-debug register access, make sure
1914 BP_CPU break/watchpoints are handled correctly on clone. */
1915 QTAILQ_INIT(&env->breakpoints);
1916 QTAILQ_INIT(&env->watchpoints);
1917 #if defined(TARGET_HAS_ICE)
1918 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1919 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1921 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1922 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1923 wp->flags, NULL);
1925 #endif
1927 return new_env;
1930 #if !defined(CONFIG_USER_ONLY)
1932 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1934 unsigned int i;
1936 /* Discard jump cache entries for any tb which might potentially
1937 overlap the flushed page. */
1938 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1939 memset (&env->tb_jmp_cache[i], 0,
1940 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1942 i = tb_jmp_cache_hash_page(addr);
1943 memset (&env->tb_jmp_cache[i], 0,
1944 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1947 static CPUTLBEntry s_cputlb_empty_entry = {
1948 .addr_read = -1,
1949 .addr_write = -1,
1950 .addr_code = -1,
1951 .addend = -1,
1954 /* NOTE: if flush_global is true, also flush global entries (not
1955 implemented yet) */
1956 void tlb_flush(CPUState *env, int flush_global)
1958 int i;
1960 #if defined(DEBUG_TLB)
1961 printf("tlb_flush:\n");
1962 #endif
1963 /* must reset current TB so that interrupts cannot modify the
1964 links while we are modifying them */
1965 env->current_tb = NULL;
1967 for(i = 0; i < CPU_TLB_SIZE; i++) {
1968 int mmu_idx;
1969 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1970 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1974 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1976 env->tlb_flush_addr = -1;
1977 env->tlb_flush_mask = 0;
1978 tlb_flush_count++;
1981 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1983 if (addr == (tlb_entry->addr_read &
1984 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1985 addr == (tlb_entry->addr_write &
1986 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1987 addr == (tlb_entry->addr_code &
1988 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1989 *tlb_entry = s_cputlb_empty_entry;
1993 void tlb_flush_page(CPUState *env, target_ulong addr)
1995 int i;
1996 int mmu_idx;
1998 #if defined(DEBUG_TLB)
1999 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2000 #endif
2001 /* Check if we need to flush due to large pages. */
2002 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2003 #if defined(DEBUG_TLB)
2004 printf("tlb_flush_page: forced full flush ("
2005 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2006 env->tlb_flush_addr, env->tlb_flush_mask);
2007 #endif
2008 tlb_flush(env, 1);
2009 return;
2011 /* must reset current TB so that interrupts cannot modify the
2012 links while we are modifying them */
2013 env->current_tb = NULL;
2015 addr &= TARGET_PAGE_MASK;
2016 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2017 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2018 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2020 tlb_flush_jmp_cache(env, addr);
2023 /* update the TLBs so that writes to code in the virtual page 'addr'
2024 can be detected */
2025 static void tlb_protect_code(ram_addr_t ram_addr)
2027 cpu_physical_memory_reset_dirty(ram_addr,
2028 ram_addr + TARGET_PAGE_SIZE,
2029 CODE_DIRTY_FLAG);
2032 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2033 tested for self modifying code */
2034 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2035 target_ulong vaddr)
2037 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2040 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2041 unsigned long start, unsigned long length)
2043 unsigned long addr;
2044 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2045 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2046 if ((addr - start) < length) {
2047 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2052 /* Note: start and end must be within the same ram block. */
2053 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2054 int dirty_flags)
2056 CPUState *env;
2057 unsigned long length, start1;
2058 int i;
2060 start &= TARGET_PAGE_MASK;
2061 end = TARGET_PAGE_ALIGN(end);
2063 length = end - start;
2064 if (length == 0)
2065 return;
2066 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2068 /* we modify the TLB cache so that the dirty bit will be set again
2069 when accessing the range */
2070 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2071 /* Check that we don't span multiple blocks - this breaks the
2072 address comparisons below. */
2073 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2074 != (end - 1) - start) {
2075 abort();
2078 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2079 int mmu_idx;
2080 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2081 for(i = 0; i < CPU_TLB_SIZE; i++)
2082 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2083 start1, length);
2088 int cpu_physical_memory_set_dirty_tracking(int enable)
2090 int ret = 0;
2091 in_migration = enable;
2092 ret = cpu_notify_migration_log(!!enable);
2093 return ret;
2096 int cpu_physical_memory_get_dirty_tracking(void)
2098 return in_migration;
2101 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2102 target_phys_addr_t end_addr)
2104 int ret;
2106 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2107 return ret;
2110 int cpu_physical_log_start(target_phys_addr_t start_addr,
2111 ram_addr_t size)
2113 CPUPhysMemoryClient *client;
2114 QLIST_FOREACH(client, &memory_client_list, list) {
2115 if (client->log_start) {
2116 int r = client->log_start(client, start_addr, size);
2117 if (r < 0) {
2118 return r;
2122 return 0;
2125 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2126 ram_addr_t size)
2128 CPUPhysMemoryClient *client;
2129 QLIST_FOREACH(client, &memory_client_list, list) {
2130 if (client->log_stop) {
2131 int r = client->log_stop(client, start_addr, size);
2132 if (r < 0) {
2133 return r;
2137 return 0;
2140 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2142 ram_addr_t ram_addr;
2143 void *p;
2145 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2146 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2147 + tlb_entry->addend);
2148 ram_addr = qemu_ram_addr_from_host_nofail(p);
2149 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2150 tlb_entry->addr_write |= TLB_NOTDIRTY;
2155 /* update the TLB according to the current state of the dirty bits */
2156 void cpu_tlb_update_dirty(CPUState *env)
2158 int i;
2159 int mmu_idx;
2160 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2161 for(i = 0; i < CPU_TLB_SIZE; i++)
2162 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2166 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2168 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2169 tlb_entry->addr_write = vaddr;
2172 /* update the TLB corresponding to virtual page vaddr
2173 so that it is no longer dirty */
2174 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2176 int i;
2177 int mmu_idx;
2179 vaddr &= TARGET_PAGE_MASK;
2180 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2181 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2182 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2185 /* Our TLB does not support large pages, so remember the area covered by
2186 large pages and trigger a full TLB flush if these are invalidated. */
2187 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2188 target_ulong size)
2190 target_ulong mask = ~(size - 1);
2192 if (env->tlb_flush_addr == (target_ulong)-1) {
2193 env->tlb_flush_addr = vaddr & mask;
2194 env->tlb_flush_mask = mask;
2195 return;
2197 /* Extend the existing region to include the new page.
2198 This is a compromise between unnecessary flushes and the cost
2199 of maintaining a full variable size TLB. */
2200 mask &= env->tlb_flush_mask;
2201 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2202 mask <<= 1;
2204 env->tlb_flush_addr &= mask;
2205 env->tlb_flush_mask = mask;
2208 /* Add a new TLB entry. At most one entry for a given virtual address
2209 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2210 supplied size is only used by tlb_flush_page. */
2211 void tlb_set_page(CPUState *env, target_ulong vaddr,
2212 target_phys_addr_t paddr, int prot,
2213 int mmu_idx, target_ulong size)
2215 PhysPageDesc *p;
2216 unsigned long pd;
2217 unsigned int index;
2218 target_ulong address;
2219 target_ulong code_address;
2220 unsigned long addend;
2221 CPUTLBEntry *te;
2222 CPUWatchpoint *wp;
2223 target_phys_addr_t iotlb;
2225 assert(size >= TARGET_PAGE_SIZE);
2226 if (size != TARGET_PAGE_SIZE) {
2227 tlb_add_large_page(env, vaddr, size);
2229 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2230 if (!p) {
2231 pd = IO_MEM_UNASSIGNED;
2232 } else {
2233 pd = p->phys_offset;
2235 #if defined(DEBUG_TLB)
2236 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2237 " prot=%x idx=%d pd=0x%08lx\n",
2238 vaddr, paddr, prot, mmu_idx, pd);
2239 #endif
2241 address = vaddr;
2242 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2243 /* IO memory case (romd handled later) */
2244 address |= TLB_MMIO;
2246 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2247 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2248 /* Normal RAM. */
2249 iotlb = pd & TARGET_PAGE_MASK;
2250 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2251 iotlb |= IO_MEM_NOTDIRTY;
2252 else
2253 iotlb |= IO_MEM_ROM;
2254 } else {
2255 /* IO handlers are currently passed a physical address.
2256 It would be nice to pass an offset from the base address
2257 of that region. This would avoid having to special case RAM,
2258 and avoid full address decoding in every device.
2259 We can't use the high bits of pd for this because
2260 IO_MEM_ROMD uses these as a ram address. */
2261 iotlb = (pd & ~TARGET_PAGE_MASK);
2262 if (p) {
2263 iotlb += p->region_offset;
2264 } else {
2265 iotlb += paddr;
2269 code_address = address;
2270 /* Make accesses to pages with watchpoints go via the
2271 watchpoint trap routines. */
2272 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2273 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2274 /* Avoid trapping reads of pages with a write breakpoint. */
2275 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2276 iotlb = io_mem_watch + paddr;
2277 address |= TLB_MMIO;
2278 break;
2283 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2284 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2285 te = &env->tlb_table[mmu_idx][index];
2286 te->addend = addend - vaddr;
2287 if (prot & PAGE_READ) {
2288 te->addr_read = address;
2289 } else {
2290 te->addr_read = -1;
2293 if (prot & PAGE_EXEC) {
2294 te->addr_code = code_address;
2295 } else {
2296 te->addr_code = -1;
2298 if (prot & PAGE_WRITE) {
2299 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2300 (pd & IO_MEM_ROMD)) {
2301 /* Write access calls the I/O callback. */
2302 te->addr_write = address | TLB_MMIO;
2303 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2304 !cpu_physical_memory_is_dirty(pd)) {
2305 te->addr_write = address | TLB_NOTDIRTY;
2306 } else {
2307 te->addr_write = address;
2309 } else {
2310 te->addr_write = -1;
2314 #else
2316 void tlb_flush(CPUState *env, int flush_global)
2320 void tlb_flush_page(CPUState *env, target_ulong addr)
2325 * Walks guest process memory "regions" one by one
2326 * and calls callback function 'fn' for each region.
2329 struct walk_memory_regions_data
2331 walk_memory_regions_fn fn;
2332 void *priv;
2333 unsigned long start;
2334 int prot;
2337 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2338 abi_ulong end, int new_prot)
2340 if (data->start != -1ul) {
2341 int rc = data->fn(data->priv, data->start, end, data->prot);
2342 if (rc != 0) {
2343 return rc;
2347 data->start = (new_prot ? end : -1ul);
2348 data->prot = new_prot;
2350 return 0;
2353 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2354 abi_ulong base, int level, void **lp)
2356 abi_ulong pa;
2357 int i, rc;
2359 if (*lp == NULL) {
2360 return walk_memory_regions_end(data, base, 0);
2363 if (level == 0) {
2364 PageDesc *pd = *lp;
2365 for (i = 0; i < L2_SIZE; ++i) {
2366 int prot = pd[i].flags;
2368 pa = base | (i << TARGET_PAGE_BITS);
2369 if (prot != data->prot) {
2370 rc = walk_memory_regions_end(data, pa, prot);
2371 if (rc != 0) {
2372 return rc;
2376 } else {
2377 void **pp = *lp;
2378 for (i = 0; i < L2_SIZE; ++i) {
2379 pa = base | ((abi_ulong)i <<
2380 (TARGET_PAGE_BITS + L2_BITS * level));
2381 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2382 if (rc != 0) {
2383 return rc;
2388 return 0;
2391 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2393 struct walk_memory_regions_data data;
2394 unsigned long i;
2396 data.fn = fn;
2397 data.priv = priv;
2398 data.start = -1ul;
2399 data.prot = 0;
2401 for (i = 0; i < V_L1_SIZE; i++) {
2402 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2403 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2404 if (rc != 0) {
2405 return rc;
2409 return walk_memory_regions_end(&data, 0, 0);
2412 static int dump_region(void *priv, abi_ulong start,
2413 abi_ulong end, unsigned long prot)
2415 FILE *f = (FILE *)priv;
2417 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2418 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2419 start, end, end - start,
2420 ((prot & PAGE_READ) ? 'r' : '-'),
2421 ((prot & PAGE_WRITE) ? 'w' : '-'),
2422 ((prot & PAGE_EXEC) ? 'x' : '-'));
2424 return (0);
2427 /* dump memory mappings */
2428 void page_dump(FILE *f)
2430 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2431 "start", "end", "size", "prot");
2432 walk_memory_regions(f, dump_region);
2435 int page_get_flags(target_ulong address)
2437 PageDesc *p;
2439 p = page_find(address >> TARGET_PAGE_BITS);
2440 if (!p)
2441 return 0;
2442 return p->flags;
2445 /* Modify the flags of a page and invalidate the code if necessary.
2446 The flag PAGE_WRITE_ORG is positioned automatically depending
2447 on PAGE_WRITE. The mmap_lock should already be held. */
2448 void page_set_flags(target_ulong start, target_ulong end, int flags)
2450 target_ulong addr, len;
2452 /* This function should never be called with addresses outside the
2453 guest address space. If this assert fires, it probably indicates
2454 a missing call to h2g_valid. */
2455 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2456 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2457 #endif
2458 assert(start < end);
2460 start = start & TARGET_PAGE_MASK;
2461 end = TARGET_PAGE_ALIGN(end);
2463 if (flags & PAGE_WRITE) {
2464 flags |= PAGE_WRITE_ORG;
2467 for (addr = start, len = end - start;
2468 len != 0;
2469 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2470 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2472 /* If the write protection bit is set, then we invalidate
2473 the code inside. */
2474 if (!(p->flags & PAGE_WRITE) &&
2475 (flags & PAGE_WRITE) &&
2476 p->first_tb) {
2477 tb_invalidate_phys_page(addr, 0, NULL);
2479 p->flags = flags;
2483 int page_check_range(target_ulong start, target_ulong len, int flags)
2485 PageDesc *p;
2486 target_ulong end;
2487 target_ulong addr;
2489 /* This function should never be called with addresses outside the
2490 guest address space. If this assert fires, it probably indicates
2491 a missing call to h2g_valid. */
2492 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2493 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2494 #endif
2496 if (len == 0) {
2497 return 0;
2499 if (start + len - 1 < start) {
2500 /* We've wrapped around. */
2501 return -1;
2504 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2505 start = start & TARGET_PAGE_MASK;
2507 for (addr = start, len = end - start;
2508 len != 0;
2509 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2510 p = page_find(addr >> TARGET_PAGE_BITS);
2511 if( !p )
2512 return -1;
2513 if( !(p->flags & PAGE_VALID) )
2514 return -1;
2516 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2517 return -1;
2518 if (flags & PAGE_WRITE) {
2519 if (!(p->flags & PAGE_WRITE_ORG))
2520 return -1;
2521 /* unprotect the page if it was put read-only because it
2522 contains translated code */
2523 if (!(p->flags & PAGE_WRITE)) {
2524 if (!page_unprotect(addr, 0, NULL))
2525 return -1;
2527 return 0;
2530 return 0;
2533 /* called from signal handler: invalidate the code and unprotect the
2534 page. Return TRUE if the fault was successfully handled. */
2535 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2537 unsigned int prot;
2538 PageDesc *p;
2539 target_ulong host_start, host_end, addr;
2541 /* Technically this isn't safe inside a signal handler. However we
2542 know this only ever happens in a synchronous SEGV handler, so in
2543 practice it seems to be ok. */
2544 mmap_lock();
2546 p = page_find(address >> TARGET_PAGE_BITS);
2547 if (!p) {
2548 mmap_unlock();
2549 return 0;
2552 /* if the page was really writable, then we change its
2553 protection back to writable */
2554 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2555 host_start = address & qemu_host_page_mask;
2556 host_end = host_start + qemu_host_page_size;
2558 prot = 0;
2559 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2560 p = page_find(addr >> TARGET_PAGE_BITS);
2561 p->flags |= PAGE_WRITE;
2562 prot |= p->flags;
2564 /* and since the content will be modified, we must invalidate
2565 the corresponding translated code. */
2566 tb_invalidate_phys_page(addr, pc, puc);
2567 #ifdef DEBUG_TB_CHECK
2568 tb_invalidate_check(addr);
2569 #endif
2571 mprotect((void *)g2h(host_start), qemu_host_page_size,
2572 prot & PAGE_BITS);
2574 mmap_unlock();
2575 return 1;
2577 mmap_unlock();
2578 return 0;
2581 static inline void tlb_set_dirty(CPUState *env,
2582 unsigned long addr, target_ulong vaddr)
2585 #endif /* defined(CONFIG_USER_ONLY) */
2587 #if !defined(CONFIG_USER_ONLY)
2589 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2590 typedef struct subpage_t {
2591 target_phys_addr_t base;
2592 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2593 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2594 } subpage_t;
2596 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2597 ram_addr_t memory, ram_addr_t region_offset);
2598 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2599 ram_addr_t orig_memory,
2600 ram_addr_t region_offset);
2601 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2602 need_subpage) \
2603 do { \
2604 if (addr > start_addr) \
2605 start_addr2 = 0; \
2606 else { \
2607 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2608 if (start_addr2 > 0) \
2609 need_subpage = 1; \
2612 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2613 end_addr2 = TARGET_PAGE_SIZE - 1; \
2614 else { \
2615 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2616 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2617 need_subpage = 1; \
2619 } while (0)
2621 /* register physical memory.
2622 For RAM, 'size' must be a multiple of the target page size.
2623 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2624 io memory page. The address used when calling the IO function is
2625 the offset from the start of the region, plus region_offset. Both
2626 start_addr and region_offset are rounded down to a page boundary
2627 before calculating this offset. This should not be a problem unless
2628 the low bits of start_addr and region_offset differ. */
2629 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2630 ram_addr_t size,
2631 ram_addr_t phys_offset,
2632 ram_addr_t region_offset,
2633 bool log_dirty)
2635 target_phys_addr_t addr, end_addr;
2636 PhysPageDesc *p;
2637 CPUState *env;
2638 ram_addr_t orig_size = size;
2639 subpage_t *subpage;
2641 assert(size);
2642 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2644 if (phys_offset == IO_MEM_UNASSIGNED) {
2645 region_offset = start_addr;
2647 region_offset &= TARGET_PAGE_MASK;
2648 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2649 end_addr = start_addr + (target_phys_addr_t)size;
2651 addr = start_addr;
2652 do {
2653 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2654 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2655 ram_addr_t orig_memory = p->phys_offset;
2656 target_phys_addr_t start_addr2, end_addr2;
2657 int need_subpage = 0;
2659 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2660 need_subpage);
2661 if (need_subpage) {
2662 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2663 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2664 &p->phys_offset, orig_memory,
2665 p->region_offset);
2666 } else {
2667 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2668 >> IO_MEM_SHIFT];
2670 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2671 region_offset);
2672 p->region_offset = 0;
2673 } else {
2674 p->phys_offset = phys_offset;
2675 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2676 (phys_offset & IO_MEM_ROMD))
2677 phys_offset += TARGET_PAGE_SIZE;
2679 } else {
2680 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2681 p->phys_offset = phys_offset;
2682 p->region_offset = region_offset;
2683 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2684 (phys_offset & IO_MEM_ROMD)) {
2685 phys_offset += TARGET_PAGE_SIZE;
2686 } else {
2687 target_phys_addr_t start_addr2, end_addr2;
2688 int need_subpage = 0;
2690 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2691 end_addr2, need_subpage);
2693 if (need_subpage) {
2694 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2695 &p->phys_offset, IO_MEM_UNASSIGNED,
2696 addr & TARGET_PAGE_MASK);
2697 subpage_register(subpage, start_addr2, end_addr2,
2698 phys_offset, region_offset);
2699 p->region_offset = 0;
2703 region_offset += TARGET_PAGE_SIZE;
2704 addr += TARGET_PAGE_SIZE;
2705 } while (addr != end_addr);
2707 /* since each CPU stores ram addresses in its TLB cache, we must
2708 reset the modified entries */
2709 /* XXX: slow ! */
2710 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2711 tlb_flush(env, 1);
2715 /* XXX: temporary until new memory mapping API */
2716 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2718 PhysPageDesc *p;
2720 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2721 if (!p)
2722 return IO_MEM_UNASSIGNED;
2723 return p->phys_offset;
2726 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2728 if (kvm_enabled())
2729 kvm_coalesce_mmio_region(addr, size);
2732 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2734 if (kvm_enabled())
2735 kvm_uncoalesce_mmio_region(addr, size);
2738 void qemu_flush_coalesced_mmio_buffer(void)
2740 if (kvm_enabled())
2741 kvm_flush_coalesced_mmio_buffer();
2744 #if defined(__linux__) && !defined(TARGET_S390X)
2746 #include <sys/vfs.h>
2748 #define HUGETLBFS_MAGIC 0x958458f6
2750 static long gethugepagesize(const char *path)
2752 struct statfs fs;
2753 int ret;
2755 do {
2756 ret = statfs(path, &fs);
2757 } while (ret != 0 && errno == EINTR);
2759 if (ret != 0) {
2760 perror(path);
2761 return 0;
2764 if (fs.f_type != HUGETLBFS_MAGIC)
2765 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2767 return fs.f_bsize;
2770 static void *file_ram_alloc(RAMBlock *block,
2771 ram_addr_t memory,
2772 const char *path)
2774 char *filename;
2775 void *area;
2776 int fd;
2777 #ifdef MAP_POPULATE
2778 int flags;
2779 #endif
2780 unsigned long hpagesize;
2782 hpagesize = gethugepagesize(path);
2783 if (!hpagesize) {
2784 return NULL;
2787 if (memory < hpagesize) {
2788 return NULL;
2791 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2792 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2793 return NULL;
2796 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2797 return NULL;
2800 fd = mkstemp(filename);
2801 if (fd < 0) {
2802 perror("unable to create backing store for hugepages");
2803 free(filename);
2804 return NULL;
2806 unlink(filename);
2807 free(filename);
2809 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2812 * ftruncate is not supported by hugetlbfs in older
2813 * hosts, so don't bother bailing out on errors.
2814 * If anything goes wrong with it under other filesystems,
2815 * mmap will fail.
2817 if (ftruncate(fd, memory))
2818 perror("ftruncate");
2820 #ifdef MAP_POPULATE
2821 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2822 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2823 * to sidestep this quirk.
2825 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2826 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2827 #else
2828 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2829 #endif
2830 if (area == MAP_FAILED) {
2831 perror("file_ram_alloc: can't mmap RAM pages");
2832 close(fd);
2833 return (NULL);
2835 block->fd = fd;
2836 return area;
2838 #endif
2840 static ram_addr_t find_ram_offset(ram_addr_t size)
2842 RAMBlock *block, *next_block;
2843 ram_addr_t offset = 0, mingap = ULONG_MAX;
2845 if (QLIST_EMPTY(&ram_list.blocks))
2846 return 0;
2848 QLIST_FOREACH(block, &ram_list.blocks, next) {
2849 ram_addr_t end, next = ULONG_MAX;
2851 end = block->offset + block->length;
2853 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2854 if (next_block->offset >= end) {
2855 next = MIN(next, next_block->offset);
2858 if (next - end >= size && next - end < mingap) {
2859 offset = end;
2860 mingap = next - end;
2863 return offset;
2866 static ram_addr_t last_ram_offset(void)
2868 RAMBlock *block;
2869 ram_addr_t last = 0;
2871 QLIST_FOREACH(block, &ram_list.blocks, next)
2872 last = MAX(last, block->offset + block->length);
2874 return last;
2877 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2878 ram_addr_t size, void *host)
2880 RAMBlock *new_block, *block;
2882 size = TARGET_PAGE_ALIGN(size);
2883 new_block = qemu_mallocz(sizeof(*new_block));
2885 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2886 char *id = dev->parent_bus->info->get_dev_path(dev);
2887 if (id) {
2888 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2889 qemu_free(id);
2892 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2894 QLIST_FOREACH(block, &ram_list.blocks, next) {
2895 if (!strcmp(block->idstr, new_block->idstr)) {
2896 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2897 new_block->idstr);
2898 abort();
2902 new_block->offset = find_ram_offset(size);
2903 if (host) {
2904 new_block->host = host;
2905 new_block->flags |= RAM_PREALLOC_MASK;
2906 } else {
2907 if (mem_path) {
2908 #if defined (__linux__) && !defined(TARGET_S390X)
2909 new_block->host = file_ram_alloc(new_block, size, mem_path);
2910 if (!new_block->host) {
2911 new_block->host = qemu_vmalloc(size);
2912 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2914 #else
2915 fprintf(stderr, "-mem-path option unsupported\n");
2916 exit(1);
2917 #endif
2918 } else {
2919 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2920 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2921 an system defined value, which is at least 256GB. Larger systems
2922 have larger values. We put the guest between the end of data
2923 segment (system break) and this value. We use 32GB as a base to
2924 have enough room for the system break to grow. */
2925 new_block->host = mmap((void*)0x800000000, size,
2926 PROT_EXEC|PROT_READ|PROT_WRITE,
2927 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2928 if (new_block->host == MAP_FAILED) {
2929 fprintf(stderr, "Allocating RAM failed\n");
2930 abort();
2932 #else
2933 if (xen_mapcache_enabled()) {
2934 xen_ram_alloc(new_block->offset, size);
2935 } else {
2936 new_block->host = qemu_vmalloc(size);
2938 #endif
2939 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2942 new_block->length = size;
2944 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2946 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2947 last_ram_offset() >> TARGET_PAGE_BITS);
2948 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2949 0xff, size >> TARGET_PAGE_BITS);
2951 if (kvm_enabled())
2952 kvm_setup_guest_memory(new_block->host, size);
2954 return new_block->offset;
2957 void qemu_ram_unmap(ram_addr_t addr)
2959 RAMBlock *block;
2961 QLIST_FOREACH(block, &ram_list.blocks, next) {
2962 if (addr == block->offset) {
2963 QLIST_REMOVE(block, next);
2964 qemu_free(block);
2965 return;
2970 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2972 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2975 void qemu_ram_free_from_ptr(ram_addr_t addr)
2977 RAMBlock *block;
2979 QLIST_FOREACH(block, &ram_list.blocks, next) {
2980 if (addr == block->offset) {
2981 QLIST_REMOVE(block, next);
2982 qemu_free(block);
2983 return;
2988 void qemu_ram_free(ram_addr_t addr)
2990 RAMBlock *block;
2992 QLIST_FOREACH(block, &ram_list.blocks, next) {
2993 if (addr == block->offset) {
2994 QLIST_REMOVE(block, next);
2995 if (block->flags & RAM_PREALLOC_MASK) {
2997 } else if (mem_path) {
2998 #if defined (__linux__) && !defined(TARGET_S390X)
2999 if (block->fd) {
3000 munmap(block->host, block->length);
3001 close(block->fd);
3002 } else {
3003 qemu_vfree(block->host);
3005 #else
3006 abort();
3007 #endif
3008 } else {
3009 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3010 munmap(block->host, block->length);
3011 #else
3012 if (xen_mapcache_enabled()) {
3013 qemu_invalidate_entry(block->host);
3014 } else {
3015 qemu_vfree(block->host);
3017 #endif
3019 qemu_free(block);
3020 return;
3026 #ifndef _WIN32
3027 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3029 RAMBlock *block;
3030 ram_addr_t offset;
3031 int flags;
3032 void *area, *vaddr;
3034 QLIST_FOREACH(block, &ram_list.blocks, next) {
3035 offset = addr - block->offset;
3036 if (offset < block->length) {
3037 vaddr = block->host + offset;
3038 if (block->flags & RAM_PREALLOC_MASK) {
3040 } else {
3041 flags = MAP_FIXED;
3042 munmap(vaddr, length);
3043 if (mem_path) {
3044 #if defined(__linux__) && !defined(TARGET_S390X)
3045 if (block->fd) {
3046 #ifdef MAP_POPULATE
3047 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3048 MAP_PRIVATE;
3049 #else
3050 flags |= MAP_PRIVATE;
3051 #endif
3052 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3053 flags, block->fd, offset);
3054 } else {
3055 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3056 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3057 flags, -1, 0);
3059 #else
3060 abort();
3061 #endif
3062 } else {
3063 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3064 flags |= MAP_SHARED | MAP_ANONYMOUS;
3065 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3066 flags, -1, 0);
3067 #else
3068 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3069 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3070 flags, -1, 0);
3071 #endif
3073 if (area != vaddr) {
3074 fprintf(stderr, "Could not remap addr: %lx@%lx\n",
3075 length, addr);
3076 exit(1);
3078 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3080 return;
3084 #endif /* !_WIN32 */
3086 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3087 With the exception of the softmmu code in this file, this should
3088 only be used for local memory (e.g. video ram) that the device owns,
3089 and knows it isn't going to access beyond the end of the block.
3091 It should not be used for general purpose DMA.
3092 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3094 void *qemu_get_ram_ptr(ram_addr_t addr)
3096 RAMBlock *block;
3098 QLIST_FOREACH(block, &ram_list.blocks, next) {
3099 if (addr - block->offset < block->length) {
3100 /* Move this entry to to start of the list. */
3101 if (block != QLIST_FIRST(&ram_list.blocks)) {
3102 QLIST_REMOVE(block, next);
3103 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3105 if (xen_mapcache_enabled()) {
3106 /* We need to check if the requested address is in the RAM
3107 * because we don't want to map the entire memory in QEMU.
3109 if (block->offset == 0) {
3110 return qemu_map_cache(addr, 0, 1);
3111 } else if (block->host == NULL) {
3112 block->host = xen_map_block(block->offset, block->length);
3115 return block->host + (addr - block->offset);
3119 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3120 abort();
3122 return NULL;
3125 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3126 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3128 void *qemu_safe_ram_ptr(ram_addr_t addr)
3130 RAMBlock *block;
3132 QLIST_FOREACH(block, &ram_list.blocks, next) {
3133 if (addr - block->offset < block->length) {
3134 if (xen_mapcache_enabled()) {
3135 /* We need to check if the requested address is in the RAM
3136 * because we don't want to map the entire memory in QEMU.
3138 if (block->offset == 0) {
3139 return qemu_map_cache(addr, 0, 1);
3140 } else if (block->host == NULL) {
3141 block->host = xen_map_block(block->offset, block->length);
3144 return block->host + (addr - block->offset);
3148 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3149 abort();
3151 return NULL;
3154 void qemu_put_ram_ptr(void *addr)
3156 trace_qemu_put_ram_ptr(addr);
3158 if (xen_mapcache_enabled()) {
3159 RAMBlock *block;
3161 QLIST_FOREACH(block, &ram_list.blocks, next) {
3162 if (addr == block->host) {
3163 break;
3166 if (block && block->host) {
3167 xen_unmap_block(block->host, block->length);
3168 block->host = NULL;
3169 } else {
3170 qemu_map_cache_unlock(addr);
3175 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3177 RAMBlock *block;
3178 uint8_t *host = ptr;
3180 QLIST_FOREACH(block, &ram_list.blocks, next) {
3181 /* This case append when the block is not mapped. */
3182 if (block->host == NULL) {
3183 continue;
3185 if (host - block->host < block->length) {
3186 *ram_addr = block->offset + (host - block->host);
3187 return 0;
3191 if (xen_mapcache_enabled()) {
3192 *ram_addr = qemu_ram_addr_from_mapcache(ptr);
3193 return 0;
3196 return -1;
3199 /* Some of the softmmu routines need to translate from a host pointer
3200 (typically a TLB entry) back to a ram offset. */
3201 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3203 ram_addr_t ram_addr;
3205 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3206 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3207 abort();
3209 return ram_addr;
3212 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3214 #ifdef DEBUG_UNASSIGNED
3215 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3216 #endif
3217 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3218 do_unassigned_access(addr, 0, 0, 0, 1);
3219 #endif
3220 return 0;
3223 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3225 #ifdef DEBUG_UNASSIGNED
3226 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3227 #endif
3228 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3229 do_unassigned_access(addr, 0, 0, 0, 2);
3230 #endif
3231 return 0;
3234 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3236 #ifdef DEBUG_UNASSIGNED
3237 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3238 #endif
3239 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3240 do_unassigned_access(addr, 0, 0, 0, 4);
3241 #endif
3242 return 0;
3245 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3247 #ifdef DEBUG_UNASSIGNED
3248 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3249 #endif
3250 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3251 do_unassigned_access(addr, 1, 0, 0, 1);
3252 #endif
3255 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3257 #ifdef DEBUG_UNASSIGNED
3258 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3259 #endif
3260 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3261 do_unassigned_access(addr, 1, 0, 0, 2);
3262 #endif
3265 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3267 #ifdef DEBUG_UNASSIGNED
3268 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3269 #endif
3270 #if defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3271 do_unassigned_access(addr, 1, 0, 0, 4);
3272 #endif
3275 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3276 unassigned_mem_readb,
3277 unassigned_mem_readw,
3278 unassigned_mem_readl,
3281 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3282 unassigned_mem_writeb,
3283 unassigned_mem_writew,
3284 unassigned_mem_writel,
3287 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3288 uint32_t val)
3290 int dirty_flags;
3291 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3292 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3293 #if !defined(CONFIG_USER_ONLY)
3294 tb_invalidate_phys_page_fast(ram_addr, 1);
3295 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3296 #endif
3298 stb_p(qemu_get_ram_ptr(ram_addr), val);
3299 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3300 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3301 /* we remove the notdirty callback only if the code has been
3302 flushed */
3303 if (dirty_flags == 0xff)
3304 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3307 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3308 uint32_t val)
3310 int dirty_flags;
3311 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3312 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3313 #if !defined(CONFIG_USER_ONLY)
3314 tb_invalidate_phys_page_fast(ram_addr, 2);
3315 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3316 #endif
3318 stw_p(qemu_get_ram_ptr(ram_addr), val);
3319 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3320 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3321 /* we remove the notdirty callback only if the code has been
3322 flushed */
3323 if (dirty_flags == 0xff)
3324 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3327 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3328 uint32_t val)
3330 int dirty_flags;
3331 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3332 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3333 #if !defined(CONFIG_USER_ONLY)
3334 tb_invalidate_phys_page_fast(ram_addr, 4);
3335 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3336 #endif
3338 stl_p(qemu_get_ram_ptr(ram_addr), val);
3339 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3340 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3341 /* we remove the notdirty callback only if the code has been
3342 flushed */
3343 if (dirty_flags == 0xff)
3344 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3347 static CPUReadMemoryFunc * const error_mem_read[3] = {
3348 NULL, /* never used */
3349 NULL, /* never used */
3350 NULL, /* never used */
3353 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3354 notdirty_mem_writeb,
3355 notdirty_mem_writew,
3356 notdirty_mem_writel,
3359 /* Generate a debug exception if a watchpoint has been hit. */
3360 static void check_watchpoint(int offset, int len_mask, int flags)
3362 CPUState *env = cpu_single_env;
3363 target_ulong pc, cs_base;
3364 TranslationBlock *tb;
3365 target_ulong vaddr;
3366 CPUWatchpoint *wp;
3367 int cpu_flags;
3369 if (env->watchpoint_hit) {
3370 /* We re-entered the check after replacing the TB. Now raise
3371 * the debug interrupt so that is will trigger after the
3372 * current instruction. */
3373 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3374 return;
3376 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3377 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3378 if ((vaddr == (wp->vaddr & len_mask) ||
3379 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3380 wp->flags |= BP_WATCHPOINT_HIT;
3381 if (!env->watchpoint_hit) {
3382 env->watchpoint_hit = wp;
3383 tb = tb_find_pc(env->mem_io_pc);
3384 if (!tb) {
3385 cpu_abort(env, "check_watchpoint: could not find TB for "
3386 "pc=%p", (void *)env->mem_io_pc);
3388 cpu_restore_state(tb, env, env->mem_io_pc);
3389 tb_phys_invalidate(tb, -1);
3390 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3391 env->exception_index = EXCP_DEBUG;
3392 } else {
3393 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3394 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3396 cpu_resume_from_signal(env, NULL);
3398 } else {
3399 wp->flags &= ~BP_WATCHPOINT_HIT;
3404 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3405 so these check for a hit then pass through to the normal out-of-line
3406 phys routines. */
3407 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3409 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3410 return ldub_phys(addr);
3413 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3415 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3416 return lduw_phys(addr);
3419 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3421 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3422 return ldl_phys(addr);
3425 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3426 uint32_t val)
3428 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3429 stb_phys(addr, val);
3432 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3433 uint32_t val)
3435 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3436 stw_phys(addr, val);
3439 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3440 uint32_t val)
3442 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3443 stl_phys(addr, val);
3446 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3447 watch_mem_readb,
3448 watch_mem_readw,
3449 watch_mem_readl,
3452 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3453 watch_mem_writeb,
3454 watch_mem_writew,
3455 watch_mem_writel,
3458 static inline uint32_t subpage_readlen (subpage_t *mmio,
3459 target_phys_addr_t addr,
3460 unsigned int len)
3462 unsigned int idx = SUBPAGE_IDX(addr);
3463 #if defined(DEBUG_SUBPAGE)
3464 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3465 mmio, len, addr, idx);
3466 #endif
3468 addr += mmio->region_offset[idx];
3469 idx = mmio->sub_io_index[idx];
3470 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3473 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3474 uint32_t value, unsigned int len)
3476 unsigned int idx = SUBPAGE_IDX(addr);
3477 #if defined(DEBUG_SUBPAGE)
3478 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3479 __func__, mmio, len, addr, idx, value);
3480 #endif
3482 addr += mmio->region_offset[idx];
3483 idx = mmio->sub_io_index[idx];
3484 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3487 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3489 return subpage_readlen(opaque, addr, 0);
3492 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3493 uint32_t value)
3495 subpage_writelen(opaque, addr, value, 0);
3498 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3500 return subpage_readlen(opaque, addr, 1);
3503 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3504 uint32_t value)
3506 subpage_writelen(opaque, addr, value, 1);
3509 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3511 return subpage_readlen(opaque, addr, 2);
3514 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3515 uint32_t value)
3517 subpage_writelen(opaque, addr, value, 2);
3520 static CPUReadMemoryFunc * const subpage_read[] = {
3521 &subpage_readb,
3522 &subpage_readw,
3523 &subpage_readl,
3526 static CPUWriteMemoryFunc * const subpage_write[] = {
3527 &subpage_writeb,
3528 &subpage_writew,
3529 &subpage_writel,
3532 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3533 ram_addr_t memory, ram_addr_t region_offset)
3535 int idx, eidx;
3537 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3538 return -1;
3539 idx = SUBPAGE_IDX(start);
3540 eidx = SUBPAGE_IDX(end);
3541 #if defined(DEBUG_SUBPAGE)
3542 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3543 mmio, start, end, idx, eidx, memory);
3544 #endif
3545 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3546 memory = IO_MEM_UNASSIGNED;
3547 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3548 for (; idx <= eidx; idx++) {
3549 mmio->sub_io_index[idx] = memory;
3550 mmio->region_offset[idx] = region_offset;
3553 return 0;
3556 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3557 ram_addr_t orig_memory,
3558 ram_addr_t region_offset)
3560 subpage_t *mmio;
3561 int subpage_memory;
3563 mmio = qemu_mallocz(sizeof(subpage_t));
3565 mmio->base = base;
3566 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3567 DEVICE_NATIVE_ENDIAN);
3568 #if defined(DEBUG_SUBPAGE)
3569 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3570 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3571 #endif
3572 *phys = subpage_memory | IO_MEM_SUBPAGE;
3573 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3575 return mmio;
3578 static int get_free_io_mem_idx(void)
3580 int i;
3582 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3583 if (!io_mem_used[i]) {
3584 io_mem_used[i] = 1;
3585 return i;
3587 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3588 return -1;
3592 * Usually, devices operate in little endian mode. There are devices out
3593 * there that operate in big endian too. Each device gets byte swapped
3594 * mmio if plugged onto a CPU that does the other endianness.
3596 * CPU Device swap?
3598 * little little no
3599 * little big yes
3600 * big little yes
3601 * big big no
3604 typedef struct SwapEndianContainer {
3605 CPUReadMemoryFunc *read[3];
3606 CPUWriteMemoryFunc *write[3];
3607 void *opaque;
3608 } SwapEndianContainer;
3610 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3612 uint32_t val;
3613 SwapEndianContainer *c = opaque;
3614 val = c->read[0](c->opaque, addr);
3615 return val;
3618 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3620 uint32_t val;
3621 SwapEndianContainer *c = opaque;
3622 val = bswap16(c->read[1](c->opaque, addr));
3623 return val;
3626 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3628 uint32_t val;
3629 SwapEndianContainer *c = opaque;
3630 val = bswap32(c->read[2](c->opaque, addr));
3631 return val;
3634 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3635 swapendian_mem_readb,
3636 swapendian_mem_readw,
3637 swapendian_mem_readl
3640 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3641 uint32_t val)
3643 SwapEndianContainer *c = opaque;
3644 c->write[0](c->opaque, addr, val);
3647 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3648 uint32_t val)
3650 SwapEndianContainer *c = opaque;
3651 c->write[1](c->opaque, addr, bswap16(val));
3654 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3655 uint32_t val)
3657 SwapEndianContainer *c = opaque;
3658 c->write[2](c->opaque, addr, bswap32(val));
3661 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3662 swapendian_mem_writeb,
3663 swapendian_mem_writew,
3664 swapendian_mem_writel
3667 static void swapendian_init(int io_index)
3669 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3670 int i;
3672 /* Swap mmio for big endian targets */
3673 c->opaque = io_mem_opaque[io_index];
3674 for (i = 0; i < 3; i++) {
3675 c->read[i] = io_mem_read[io_index][i];
3676 c->write[i] = io_mem_write[io_index][i];
3678 io_mem_read[io_index][i] = swapendian_readfn[i];
3679 io_mem_write[io_index][i] = swapendian_writefn[i];
3681 io_mem_opaque[io_index] = c;
3684 static void swapendian_del(int io_index)
3686 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3687 qemu_free(io_mem_opaque[io_index]);
3691 /* mem_read and mem_write are arrays of functions containing the
3692 function to access byte (index 0), word (index 1) and dword (index
3693 2). Functions can be omitted with a NULL function pointer.
3694 If io_index is non zero, the corresponding io zone is
3695 modified. If it is zero, a new io zone is allocated. The return
3696 value can be used with cpu_register_physical_memory(). (-1) is
3697 returned if error. */
3698 static int cpu_register_io_memory_fixed(int io_index,
3699 CPUReadMemoryFunc * const *mem_read,
3700 CPUWriteMemoryFunc * const *mem_write,
3701 void *opaque, enum device_endian endian)
3703 int i;
3705 if (io_index <= 0) {
3706 io_index = get_free_io_mem_idx();
3707 if (io_index == -1)
3708 return io_index;
3709 } else {
3710 io_index >>= IO_MEM_SHIFT;
3711 if (io_index >= IO_MEM_NB_ENTRIES)
3712 return -1;
3715 for (i = 0; i < 3; ++i) {
3716 io_mem_read[io_index][i]
3717 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3719 for (i = 0; i < 3; ++i) {
3720 io_mem_write[io_index][i]
3721 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3723 io_mem_opaque[io_index] = opaque;
3725 switch (endian) {
3726 case DEVICE_BIG_ENDIAN:
3727 #ifndef TARGET_WORDS_BIGENDIAN
3728 swapendian_init(io_index);
3729 #endif
3730 break;
3731 case DEVICE_LITTLE_ENDIAN:
3732 #ifdef TARGET_WORDS_BIGENDIAN
3733 swapendian_init(io_index);
3734 #endif
3735 break;
3736 case DEVICE_NATIVE_ENDIAN:
3737 default:
3738 break;
3741 return (io_index << IO_MEM_SHIFT);
3744 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3745 CPUWriteMemoryFunc * const *mem_write,
3746 void *opaque, enum device_endian endian)
3748 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3751 void cpu_unregister_io_memory(int io_table_address)
3753 int i;
3754 int io_index = io_table_address >> IO_MEM_SHIFT;
3756 swapendian_del(io_index);
3758 for (i=0;i < 3; i++) {
3759 io_mem_read[io_index][i] = unassigned_mem_read[i];
3760 io_mem_write[io_index][i] = unassigned_mem_write[i];
3762 io_mem_opaque[io_index] = NULL;
3763 io_mem_used[io_index] = 0;
3766 static void io_mem_init(void)
3768 int i;
3770 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3771 unassigned_mem_write, NULL,
3772 DEVICE_NATIVE_ENDIAN);
3773 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3774 unassigned_mem_write, NULL,
3775 DEVICE_NATIVE_ENDIAN);
3776 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3777 notdirty_mem_write, NULL,
3778 DEVICE_NATIVE_ENDIAN);
3779 for (i=0; i<5; i++)
3780 io_mem_used[i] = 1;
3782 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3783 watch_mem_write, NULL,
3784 DEVICE_NATIVE_ENDIAN);
3787 #endif /* !defined(CONFIG_USER_ONLY) */
3789 /* physical memory access (slow version, mainly for debug) */
3790 #if defined(CONFIG_USER_ONLY)
3791 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3792 uint8_t *buf, int len, int is_write)
3794 int l, flags;
3795 target_ulong page;
3796 void * p;
3798 while (len > 0) {
3799 page = addr & TARGET_PAGE_MASK;
3800 l = (page + TARGET_PAGE_SIZE) - addr;
3801 if (l > len)
3802 l = len;
3803 flags = page_get_flags(page);
3804 if (!(flags & PAGE_VALID))
3805 return -1;
3806 if (is_write) {
3807 if (!(flags & PAGE_WRITE))
3808 return -1;
3809 /* XXX: this code should not depend on lock_user */
3810 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3811 return -1;
3812 memcpy(p, buf, l);
3813 unlock_user(p, addr, l);
3814 } else {
3815 if (!(flags & PAGE_READ))
3816 return -1;
3817 /* XXX: this code should not depend on lock_user */
3818 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3819 return -1;
3820 memcpy(buf, p, l);
3821 unlock_user(p, addr, 0);
3823 len -= l;
3824 buf += l;
3825 addr += l;
3827 return 0;
3830 #else
3831 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3832 int len, int is_write)
3834 int l, io_index;
3835 uint8_t *ptr;
3836 uint32_t val;
3837 target_phys_addr_t page;
3838 unsigned long pd;
3839 PhysPageDesc *p;
3841 while (len > 0) {
3842 page = addr & TARGET_PAGE_MASK;
3843 l = (page + TARGET_PAGE_SIZE) - addr;
3844 if (l > len)
3845 l = len;
3846 p = phys_page_find(page >> TARGET_PAGE_BITS);
3847 if (!p) {
3848 pd = IO_MEM_UNASSIGNED;
3849 } else {
3850 pd = p->phys_offset;
3853 if (is_write) {
3854 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3855 target_phys_addr_t addr1 = addr;
3856 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3857 if (p)
3858 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3859 /* XXX: could force cpu_single_env to NULL to avoid
3860 potential bugs */
3861 if (l >= 4 && ((addr1 & 3) == 0)) {
3862 /* 32 bit write access */
3863 val = ldl_p(buf);
3864 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3865 l = 4;
3866 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3867 /* 16 bit write access */
3868 val = lduw_p(buf);
3869 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3870 l = 2;
3871 } else {
3872 /* 8 bit write access */
3873 val = ldub_p(buf);
3874 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3875 l = 1;
3877 } else {
3878 unsigned long addr1;
3879 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3880 /* RAM case */
3881 ptr = qemu_get_ram_ptr(addr1);
3882 memcpy(ptr, buf, l);
3883 if (!cpu_physical_memory_is_dirty(addr1)) {
3884 /* invalidate code */
3885 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3886 /* set dirty bit */
3887 cpu_physical_memory_set_dirty_flags(
3888 addr1, (0xff & ~CODE_DIRTY_FLAG));
3890 /* qemu doesn't execute guest code directly, but kvm does
3891 therefore flush instruction caches */
3892 if (kvm_enabled())
3893 flush_icache_range((unsigned long)ptr,
3894 ((unsigned long)ptr)+l);
3895 qemu_put_ram_ptr(ptr);
3897 } else {
3898 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3899 !(pd & IO_MEM_ROMD)) {
3900 target_phys_addr_t addr1 = addr;
3901 /* I/O case */
3902 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3903 if (p)
3904 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3905 if (l >= 4 && ((addr1 & 3) == 0)) {
3906 /* 32 bit read access */
3907 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3908 stl_p(buf, val);
3909 l = 4;
3910 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3911 /* 16 bit read access */
3912 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3913 stw_p(buf, val);
3914 l = 2;
3915 } else {
3916 /* 8 bit read access */
3917 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3918 stb_p(buf, val);
3919 l = 1;
3921 } else {
3922 /* RAM case */
3923 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3924 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3925 qemu_put_ram_ptr(ptr);
3928 len -= l;
3929 buf += l;
3930 addr += l;
3934 /* used for ROM loading : can write in RAM and ROM */
3935 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3936 const uint8_t *buf, int len)
3938 int l;
3939 uint8_t *ptr;
3940 target_phys_addr_t page;
3941 unsigned long pd;
3942 PhysPageDesc *p;
3944 while (len > 0) {
3945 page = addr & TARGET_PAGE_MASK;
3946 l = (page + TARGET_PAGE_SIZE) - addr;
3947 if (l > len)
3948 l = len;
3949 p = phys_page_find(page >> TARGET_PAGE_BITS);
3950 if (!p) {
3951 pd = IO_MEM_UNASSIGNED;
3952 } else {
3953 pd = p->phys_offset;
3956 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3957 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3958 !(pd & IO_MEM_ROMD)) {
3959 /* do nothing */
3960 } else {
3961 unsigned long addr1;
3962 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3963 /* ROM/RAM case */
3964 ptr = qemu_get_ram_ptr(addr1);
3965 memcpy(ptr, buf, l);
3966 qemu_put_ram_ptr(ptr);
3968 len -= l;
3969 buf += l;
3970 addr += l;
3974 typedef struct {
3975 void *buffer;
3976 target_phys_addr_t addr;
3977 target_phys_addr_t len;
3978 } BounceBuffer;
3980 static BounceBuffer bounce;
3982 typedef struct MapClient {
3983 void *opaque;
3984 void (*callback)(void *opaque);
3985 QLIST_ENTRY(MapClient) link;
3986 } MapClient;
3988 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3989 = QLIST_HEAD_INITIALIZER(map_client_list);
3991 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3993 MapClient *client = qemu_malloc(sizeof(*client));
3995 client->opaque = opaque;
3996 client->callback = callback;
3997 QLIST_INSERT_HEAD(&map_client_list, client, link);
3998 return client;
4001 void cpu_unregister_map_client(void *_client)
4003 MapClient *client = (MapClient *)_client;
4005 QLIST_REMOVE(client, link);
4006 qemu_free(client);
4009 static void cpu_notify_map_clients(void)
4011 MapClient *client;
4013 while (!QLIST_EMPTY(&map_client_list)) {
4014 client = QLIST_FIRST(&map_client_list);
4015 client->callback(client->opaque);
4016 cpu_unregister_map_client(client);
4020 /* Map a physical memory region into a host virtual address.
4021 * May map a subset of the requested range, given by and returned in *plen.
4022 * May return NULL if resources needed to perform the mapping are exhausted.
4023 * Use only for reads OR writes - not for read-modify-write operations.
4024 * Use cpu_register_map_client() to know when retrying the map operation is
4025 * likely to succeed.
4027 void *cpu_physical_memory_map(target_phys_addr_t addr,
4028 target_phys_addr_t *plen,
4029 int is_write)
4031 target_phys_addr_t len = *plen;
4032 target_phys_addr_t done = 0;
4033 int l;
4034 uint8_t *ret = NULL;
4035 uint8_t *ptr;
4036 target_phys_addr_t page;
4037 unsigned long pd;
4038 PhysPageDesc *p;
4039 unsigned long addr1;
4041 while (len > 0) {
4042 page = addr & TARGET_PAGE_MASK;
4043 l = (page + TARGET_PAGE_SIZE) - addr;
4044 if (l > len)
4045 l = len;
4046 p = phys_page_find(page >> TARGET_PAGE_BITS);
4047 if (!p) {
4048 pd = IO_MEM_UNASSIGNED;
4049 } else {
4050 pd = p->phys_offset;
4053 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4054 if (done || bounce.buffer) {
4055 break;
4057 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4058 bounce.addr = addr;
4059 bounce.len = l;
4060 if (!is_write) {
4061 cpu_physical_memory_read(addr, bounce.buffer, l);
4063 ptr = bounce.buffer;
4064 } else {
4065 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4066 ptr = qemu_get_ram_ptr(addr1);
4068 if (!done) {
4069 ret = ptr;
4070 } else if (ret + done != ptr) {
4071 break;
4074 len -= l;
4075 addr += l;
4076 done += l;
4078 *plen = done;
4079 return ret;
4082 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4083 * Will also mark the memory as dirty if is_write == 1. access_len gives
4084 * the amount of memory that was actually read or written by the caller.
4086 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4087 int is_write, target_phys_addr_t access_len)
4089 unsigned long flush_len = (unsigned long)access_len;
4091 if (buffer != bounce.buffer) {
4092 if (is_write) {
4093 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4094 while (access_len) {
4095 unsigned l;
4096 l = TARGET_PAGE_SIZE;
4097 if (l > access_len)
4098 l = access_len;
4099 if (!cpu_physical_memory_is_dirty(addr1)) {
4100 /* invalidate code */
4101 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4102 /* set dirty bit */
4103 cpu_physical_memory_set_dirty_flags(
4104 addr1, (0xff & ~CODE_DIRTY_FLAG));
4106 addr1 += l;
4107 access_len -= l;
4109 dma_flush_range((unsigned long)buffer,
4110 (unsigned long)buffer + flush_len);
4112 if (xen_mapcache_enabled()) {
4113 uint8_t *buffer1 = buffer;
4114 uint8_t *end_buffer = buffer + len;
4116 while (buffer1 < end_buffer) {
4117 qemu_put_ram_ptr(buffer1);
4118 buffer1 += TARGET_PAGE_SIZE;
4121 return;
4123 if (is_write) {
4124 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4126 qemu_vfree(bounce.buffer);
4127 bounce.buffer = NULL;
4128 cpu_notify_map_clients();
4131 /* warning: addr must be aligned */
4132 uint32_t ldl_phys(target_phys_addr_t addr)
4134 int io_index;
4135 uint8_t *ptr;
4136 uint32_t val;
4137 unsigned long pd;
4138 PhysPageDesc *p;
4140 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4141 if (!p) {
4142 pd = IO_MEM_UNASSIGNED;
4143 } else {
4144 pd = p->phys_offset;
4147 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4148 !(pd & IO_MEM_ROMD)) {
4149 /* I/O case */
4150 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4151 if (p)
4152 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4153 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4154 } else {
4155 /* RAM case */
4156 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4157 (addr & ~TARGET_PAGE_MASK);
4158 val = ldl_p(ptr);
4160 return val;
4163 /* warning: addr must be aligned */
4164 uint64_t ldq_phys(target_phys_addr_t addr)
4166 int io_index;
4167 uint8_t *ptr;
4168 uint64_t val;
4169 unsigned long pd;
4170 PhysPageDesc *p;
4172 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4173 if (!p) {
4174 pd = IO_MEM_UNASSIGNED;
4175 } else {
4176 pd = p->phys_offset;
4179 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4180 !(pd & IO_MEM_ROMD)) {
4181 /* I/O case */
4182 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4183 if (p)
4184 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4185 #ifdef TARGET_WORDS_BIGENDIAN
4186 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4187 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4188 #else
4189 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4190 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4191 #endif
4192 } else {
4193 /* RAM case */
4194 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4195 (addr & ~TARGET_PAGE_MASK);
4196 val = ldq_p(ptr);
4198 return val;
4201 /* XXX: optimize */
4202 uint32_t ldub_phys(target_phys_addr_t addr)
4204 uint8_t val;
4205 cpu_physical_memory_read(addr, &val, 1);
4206 return val;
4209 /* warning: addr must be aligned */
4210 uint32_t lduw_phys(target_phys_addr_t addr)
4212 int io_index;
4213 uint8_t *ptr;
4214 uint64_t val;
4215 unsigned long pd;
4216 PhysPageDesc *p;
4218 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4219 if (!p) {
4220 pd = IO_MEM_UNASSIGNED;
4221 } else {
4222 pd = p->phys_offset;
4225 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4226 !(pd & IO_MEM_ROMD)) {
4227 /* I/O case */
4228 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4229 if (p)
4230 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4231 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4232 } else {
4233 /* RAM case */
4234 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4235 (addr & ~TARGET_PAGE_MASK);
4236 val = lduw_p(ptr);
4238 return val;
4241 /* warning: addr must be aligned. The ram page is not masked as dirty
4242 and the code inside is not invalidated. It is useful if the dirty
4243 bits are used to track modified PTEs */
4244 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4246 int io_index;
4247 uint8_t *ptr;
4248 unsigned long pd;
4249 PhysPageDesc *p;
4251 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4252 if (!p) {
4253 pd = IO_MEM_UNASSIGNED;
4254 } else {
4255 pd = p->phys_offset;
4258 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4259 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4260 if (p)
4261 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4262 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4263 } else {
4264 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4265 ptr = qemu_get_ram_ptr(addr1);
4266 stl_p(ptr, val);
4268 if (unlikely(in_migration)) {
4269 if (!cpu_physical_memory_is_dirty(addr1)) {
4270 /* invalidate code */
4271 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4272 /* set dirty bit */
4273 cpu_physical_memory_set_dirty_flags(
4274 addr1, (0xff & ~CODE_DIRTY_FLAG));
4280 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4282 int io_index;
4283 uint8_t *ptr;
4284 unsigned long pd;
4285 PhysPageDesc *p;
4287 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4288 if (!p) {
4289 pd = IO_MEM_UNASSIGNED;
4290 } else {
4291 pd = p->phys_offset;
4294 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4295 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4296 if (p)
4297 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4298 #ifdef TARGET_WORDS_BIGENDIAN
4299 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4300 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4301 #else
4302 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4303 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4304 #endif
4305 } else {
4306 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4307 (addr & ~TARGET_PAGE_MASK);
4308 stq_p(ptr, val);
4312 /* warning: addr must be aligned */
4313 void stl_phys(target_phys_addr_t addr, uint32_t val)
4315 int io_index;
4316 uint8_t *ptr;
4317 unsigned long pd;
4318 PhysPageDesc *p;
4320 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4321 if (!p) {
4322 pd = IO_MEM_UNASSIGNED;
4323 } else {
4324 pd = p->phys_offset;
4327 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4328 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4329 if (p)
4330 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4331 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4332 } else {
4333 unsigned long addr1;
4334 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4335 /* RAM case */
4336 ptr = qemu_get_ram_ptr(addr1);
4337 stl_p(ptr, val);
4338 if (!cpu_physical_memory_is_dirty(addr1)) {
4339 /* invalidate code */
4340 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4341 /* set dirty bit */
4342 cpu_physical_memory_set_dirty_flags(addr1,
4343 (0xff & ~CODE_DIRTY_FLAG));
4348 /* XXX: optimize */
4349 void stb_phys(target_phys_addr_t addr, uint32_t val)
4351 uint8_t v = val;
4352 cpu_physical_memory_write(addr, &v, 1);
4355 /* warning: addr must be aligned */
4356 void stw_phys(target_phys_addr_t addr, uint32_t val)
4358 int io_index;
4359 uint8_t *ptr;
4360 unsigned long pd;
4361 PhysPageDesc *p;
4363 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4364 if (!p) {
4365 pd = IO_MEM_UNASSIGNED;
4366 } else {
4367 pd = p->phys_offset;
4370 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4371 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4372 if (p)
4373 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4374 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4375 } else {
4376 unsigned long addr1;
4377 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4378 /* RAM case */
4379 ptr = qemu_get_ram_ptr(addr1);
4380 stw_p(ptr, val);
4381 if (!cpu_physical_memory_is_dirty(addr1)) {
4382 /* invalidate code */
4383 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4384 /* set dirty bit */
4385 cpu_physical_memory_set_dirty_flags(addr1,
4386 (0xff & ~CODE_DIRTY_FLAG));
4391 /* XXX: optimize */
4392 void stq_phys(target_phys_addr_t addr, uint64_t val)
4394 val = tswap64(val);
4395 cpu_physical_memory_write(addr, &val, 8);
4398 /* virtual memory access for debug (includes writing to ROM) */
4399 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4400 uint8_t *buf, int len, int is_write)
4402 int l;
4403 target_phys_addr_t phys_addr;
4404 target_ulong page;
4406 while (len > 0) {
4407 page = addr & TARGET_PAGE_MASK;
4408 phys_addr = cpu_get_phys_page_debug(env, page);
4409 /* if no physical page mapped, return an error */
4410 if (phys_addr == -1)
4411 return -1;
4412 l = (page + TARGET_PAGE_SIZE) - addr;
4413 if (l > len)
4414 l = len;
4415 phys_addr += (addr & ~TARGET_PAGE_MASK);
4416 if (is_write)
4417 cpu_physical_memory_write_rom(phys_addr, buf, l);
4418 else
4419 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4420 len -= l;
4421 buf += l;
4422 addr += l;
4424 return 0;
4426 #endif
4428 /* in deterministic execution mode, instructions doing device I/Os
4429 must be at the end of the TB */
4430 void cpu_io_recompile(CPUState *env, void *retaddr)
4432 TranslationBlock *tb;
4433 uint32_t n, cflags;
4434 target_ulong pc, cs_base;
4435 uint64_t flags;
4437 tb = tb_find_pc((unsigned long)retaddr);
4438 if (!tb) {
4439 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4440 retaddr);
4442 n = env->icount_decr.u16.low + tb->icount;
4443 cpu_restore_state(tb, env, (unsigned long)retaddr);
4444 /* Calculate how many instructions had been executed before the fault
4445 occurred. */
4446 n = n - env->icount_decr.u16.low;
4447 /* Generate a new TB ending on the I/O insn. */
4448 n++;
4449 /* On MIPS and SH, delay slot instructions can only be restarted if
4450 they were already the first instruction in the TB. If this is not
4451 the first instruction in a TB then re-execute the preceding
4452 branch. */
4453 #if defined(TARGET_MIPS)
4454 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4455 env->active_tc.PC -= 4;
4456 env->icount_decr.u16.low++;
4457 env->hflags &= ~MIPS_HFLAG_BMASK;
4459 #elif defined(TARGET_SH4)
4460 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4461 && n > 1) {
4462 env->pc -= 2;
4463 env->icount_decr.u16.low++;
4464 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4466 #endif
4467 /* This should never happen. */
4468 if (n > CF_COUNT_MASK)
4469 cpu_abort(env, "TB too big during recompile");
4471 cflags = n | CF_LAST_IO;
4472 pc = tb->pc;
4473 cs_base = tb->cs_base;
4474 flags = tb->flags;
4475 tb_phys_invalidate(tb, -1);
4476 /* FIXME: In theory this could raise an exception. In practice
4477 we have already translated the block once so it's probably ok. */
4478 tb_gen_code(env, pc, cs_base, flags, cflags);
4479 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4480 the first in the TB) then we end up generating a whole new TB and
4481 repeating the fault, which is horribly inefficient.
4482 Better would be to execute just this insn uncached, or generate a
4483 second new TB. */
4484 cpu_resume_from_signal(env, NULL);
4487 #if !defined(CONFIG_USER_ONLY)
4489 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4491 int i, target_code_size, max_target_code_size;
4492 int direct_jmp_count, direct_jmp2_count, cross_page;
4493 TranslationBlock *tb;
4495 target_code_size = 0;
4496 max_target_code_size = 0;
4497 cross_page = 0;
4498 direct_jmp_count = 0;
4499 direct_jmp2_count = 0;
4500 for(i = 0; i < nb_tbs; i++) {
4501 tb = &tbs[i];
4502 target_code_size += tb->size;
4503 if (tb->size > max_target_code_size)
4504 max_target_code_size = tb->size;
4505 if (tb->page_addr[1] != -1)
4506 cross_page++;
4507 if (tb->tb_next_offset[0] != 0xffff) {
4508 direct_jmp_count++;
4509 if (tb->tb_next_offset[1] != 0xffff) {
4510 direct_jmp2_count++;
4514 /* XXX: avoid using doubles ? */
4515 cpu_fprintf(f, "Translation buffer state:\n");
4516 cpu_fprintf(f, "gen code size %td/%ld\n",
4517 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4518 cpu_fprintf(f, "TB count %d/%d\n",
4519 nb_tbs, code_gen_max_blocks);
4520 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4521 nb_tbs ? target_code_size / nb_tbs : 0,
4522 max_target_code_size);
4523 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4524 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4525 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4526 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4527 cross_page,
4528 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4529 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4530 direct_jmp_count,
4531 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4532 direct_jmp2_count,
4533 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4534 cpu_fprintf(f, "\nStatistics:\n");
4535 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4536 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4537 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4538 #ifdef CONFIG_PROFILER
4539 tcg_dump_info(f, cpu_fprintf);
4540 #endif
4543 #define MMUSUFFIX _cmmu
4544 #define GETPC() NULL
4545 #define env cpu_single_env
4546 #define SOFTMMU_CODE_ACCESS
4548 #define SHIFT 0
4549 #include "softmmu_template.h"
4551 #define SHIFT 1
4552 #include "softmmu_template.h"
4554 #define SHIFT 2
4555 #include "softmmu_template.h"
4557 #define SHIFT 3
4558 #include "softmmu_template.h"
4560 #undef env
4562 #endif