qemu-kvm: Remove extboot support
[qemu-kvm.git] / exec.c
blob857806365498dcb84585831ad24cb5ad4f0181bd
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "cache-utils.h"
31 #include "tcg.h"
32 #include "hw/hw.h"
33 #include "hw/qdev.h"
34 #include "osdep.h"
35 #include "kvm.h"
36 #include "hw/xen.h"
37 #include "qemu-timer.h"
38 #include "memory.h"
39 #include "exec-memory.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
43 #include <sys/param.h>
44 #if __FreeBSD_version >= 700104
45 #define HAVE_KINFO_GETVMMAP
46 #define sigqueue sigqueue_freebsd /* avoid redefinition */
47 #include <sys/time.h>
48 #include <sys/proc.h>
49 #include <machine/profile.h>
50 #define _KERNEL
51 #include <sys/user.h>
52 #undef _KERNEL
53 #undef sigqueue
54 #include <libutil.h>
55 #endif
56 #endif
57 #else /* !CONFIG_USER_ONLY */
58 #include "xen-mapcache.h"
59 #include "trace.h"
60 #endif
62 //#define DEBUG_TB_INVALIDATE
63 //#define DEBUG_FLUSH
64 //#define DEBUG_TLB
65 //#define DEBUG_UNASSIGNED
67 /* make various TB consistency checks */
68 //#define DEBUG_TB_CHECK
69 //#define DEBUG_TLB_CHECK
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
76 #undef DEBUG_TB_CHECK
77 #endif
79 #define SMC_BITMAP_USE_THRESHOLD 10
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
84 static int nb_tbs;
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88 #if defined(__arm__) || defined(__sparc_v9__)
89 /* The prologue must be reachable with a direct jump. ARM and Sparc64
90 have limited branch ranges (possibly also PPC) so place it in a
91 section close to code segment. */
92 #define code_gen_section \
93 __attribute__((__section__(".gen_code"))) \
94 __attribute__((aligned (32)))
95 #elif defined(_WIN32)
96 /* Maximum alignment for Win32 is 16. */
97 #define code_gen_section \
98 __attribute__((aligned (16)))
99 #else
100 #define code_gen_section \
101 __attribute__((aligned (32)))
102 #endif
104 uint8_t code_gen_prologue[1024] code_gen_section;
105 static uint8_t *code_gen_buffer;
106 static unsigned long code_gen_buffer_size;
107 /* threshold to flush the translated code buffer */
108 static unsigned long code_gen_buffer_max_size;
109 static uint8_t *code_gen_ptr;
111 #if !defined(CONFIG_USER_ONLY)
112 int phys_ram_fd;
113 static int in_migration;
115 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
117 static MemoryRegion *system_memory;
118 static MemoryRegion *system_io;
120 #endif
122 CPUState *first_cpu;
123 /* current CPU in the current thread. It is only valid inside
124 cpu_exec() */
125 CPUState *cpu_single_env;
126 /* 0 = Do not count executed instructions.
127 1 = Precise instruction counting.
128 2 = Adaptive rate instruction counting. */
129 int use_icount = 0;
130 /* Current instruction counter. While executing translated code this may
131 include some instructions that have not yet been executed. */
132 int64_t qemu_icount;
134 typedef struct PageDesc {
135 /* list of TBs intersecting this ram page */
136 TranslationBlock *first_tb;
137 /* in order to optimize self modifying code, we count the number
138 of lookups we do to a given page to use a bitmap */
139 unsigned int code_write_count;
140 uint8_t *code_bitmap;
141 #if defined(CONFIG_USER_ONLY)
142 unsigned long flags;
143 #endif
144 } PageDesc;
146 /* In system mode we want L1_MAP to be based on ram offsets,
147 while in user mode we want it to be based on virtual addresses. */
148 #if !defined(CONFIG_USER_ONLY)
149 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
150 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
153 #endif
154 #else
155 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
156 #endif
158 /* Size of the L2 (and L3, etc) page tables. */
159 #define L2_BITS 10
160 #define L2_SIZE (1 << L2_BITS)
162 /* The bits remaining after N lower levels of page tables. */
163 #define P_L1_BITS_REM \
164 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
165 #define V_L1_BITS_REM \
166 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168 /* Size of the L1 page table. Avoid silly small sizes. */
169 #if P_L1_BITS_REM < 4
170 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
171 #else
172 #define P_L1_BITS P_L1_BITS_REM
173 #endif
175 #if V_L1_BITS_REM < 4
176 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
177 #else
178 #define V_L1_BITS V_L1_BITS_REM
179 #endif
181 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
182 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
184 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
185 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
187 unsigned long qemu_real_host_page_size;
188 unsigned long qemu_host_page_size;
189 unsigned long qemu_host_page_mask;
191 /* This is a multi-level map on the virtual address space.
192 The bottom level has pointers to PageDesc. */
193 static void *l1_map[V_L1_SIZE];
195 #if !defined(CONFIG_USER_ONLY)
196 typedef struct PhysPageDesc {
197 /* offset in host memory of the page + io_index in the low bits */
198 ram_addr_t phys_offset;
199 ram_addr_t region_offset;
200 } PhysPageDesc;
202 /* This is a multi-level map on the physical address space.
203 The bottom level has pointers to PhysPageDesc. */
204 static void *l1_phys_map[P_L1_SIZE];
206 static void io_mem_init(void);
207 static void memory_map_init(void);
209 /* io memory support */
210 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
211 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
212 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
213 static char io_mem_used[IO_MEM_NB_ENTRIES];
214 static int io_mem_watch;
215 #endif
217 /* log support */
218 #ifdef WIN32
219 static const char *logfilename = "qemu.log";
220 #else
221 static const char *logfilename = "/tmp/qemu.log";
222 #endif
223 FILE *logfile;
224 int loglevel;
225 static int log_append = 0;
227 /* statistics */
228 #if !defined(CONFIG_USER_ONLY)
229 static int tlb_flush_count;
230 #endif
231 static int tb_flush_count;
232 static int tb_phys_invalidate_count;
234 #ifdef _WIN32
235 static void map_exec(void *addr, long size)
237 DWORD old_protect;
238 VirtualProtect(addr, size,
239 PAGE_EXECUTE_READWRITE, &old_protect);
242 #else
243 static void map_exec(void *addr, long size)
245 unsigned long start, end, page_size;
247 page_size = getpagesize();
248 start = (unsigned long)addr;
249 start &= ~(page_size - 1);
251 end = (unsigned long)addr + size;
252 end += page_size - 1;
253 end &= ~(page_size - 1);
255 mprotect((void *)start, end - start,
256 PROT_READ | PROT_WRITE | PROT_EXEC);
258 #endif
260 static void page_init(void)
262 /* NOTE: we can always suppose that qemu_host_page_size >=
263 TARGET_PAGE_SIZE */
264 #ifdef _WIN32
266 SYSTEM_INFO system_info;
268 GetSystemInfo(&system_info);
269 qemu_real_host_page_size = system_info.dwPageSize;
271 #else
272 qemu_real_host_page_size = getpagesize();
273 #endif
274 if (qemu_host_page_size == 0)
275 qemu_host_page_size = qemu_real_host_page_size;
276 if (qemu_host_page_size < TARGET_PAGE_SIZE)
277 qemu_host_page_size = TARGET_PAGE_SIZE;
278 qemu_host_page_mask = ~(qemu_host_page_size - 1);
280 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
282 #ifdef HAVE_KINFO_GETVMMAP
283 struct kinfo_vmentry *freep;
284 int i, cnt;
286 freep = kinfo_getvmmap(getpid(), &cnt);
287 if (freep) {
288 mmap_lock();
289 for (i = 0; i < cnt; i++) {
290 unsigned long startaddr, endaddr;
292 startaddr = freep[i].kve_start;
293 endaddr = freep[i].kve_end;
294 if (h2g_valid(startaddr)) {
295 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
297 if (h2g_valid(endaddr)) {
298 endaddr = h2g(endaddr);
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
300 } else {
301 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
302 endaddr = ~0ul;
303 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
304 #endif
308 free(freep);
309 mmap_unlock();
311 #else
312 FILE *f;
314 last_brk = (unsigned long)sbrk(0);
316 f = fopen("/compat/linux/proc/self/maps", "r");
317 if (f) {
318 mmap_lock();
320 do {
321 unsigned long startaddr, endaddr;
322 int n;
324 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
326 if (n == 2 && h2g_valid(startaddr)) {
327 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
329 if (h2g_valid(endaddr)) {
330 endaddr = h2g(endaddr);
331 } else {
332 endaddr = ~0ul;
334 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
336 } while (!feof(f));
338 fclose(f);
339 mmap_unlock();
341 #endif
343 #endif
346 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
348 PageDesc *pd;
349 void **lp;
350 int i;
352 #if defined(CONFIG_USER_ONLY)
353 /* We can't use g_malloc because it may recurse into a locked mutex. */
354 # define ALLOC(P, SIZE) \
355 do { \
356 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
357 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
358 } while (0)
359 #else
360 # define ALLOC(P, SIZE) \
361 do { P = g_malloc0(SIZE); } while (0)
362 #endif
364 /* Level 1. Always allocated. */
365 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
367 /* Level 2..N-1. */
368 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
369 void **p = *lp;
371 if (p == NULL) {
372 if (!alloc) {
373 return NULL;
375 ALLOC(p, sizeof(void *) * L2_SIZE);
376 *lp = p;
379 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
382 pd = *lp;
383 if (pd == NULL) {
384 if (!alloc) {
385 return NULL;
387 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
388 *lp = pd;
391 #undef ALLOC
393 return pd + (index & (L2_SIZE - 1));
396 static inline PageDesc *page_find(tb_page_addr_t index)
398 return page_find_alloc(index, 0);
401 #if !defined(CONFIG_USER_ONLY)
402 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
404 PhysPageDesc *pd;
405 void **lp;
406 int i;
408 /* Level 1. Always allocated. */
409 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
411 /* Level 2..N-1. */
412 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
413 void **p = *lp;
414 if (p == NULL) {
415 if (!alloc) {
416 return NULL;
418 *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
420 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
423 pd = *lp;
424 if (pd == NULL) {
425 int i;
427 if (!alloc) {
428 return NULL;
431 *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
433 for (i = 0; i < L2_SIZE; i++) {
434 pd[i].phys_offset = IO_MEM_UNASSIGNED;
435 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
439 return pd + (index & (L2_SIZE - 1));
442 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
444 return phys_page_find_alloc(index, 0);
447 static void tlb_protect_code(ram_addr_t ram_addr);
448 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
449 target_ulong vaddr);
450 #define mmap_lock() do { } while(0)
451 #define mmap_unlock() do { } while(0)
452 #endif
454 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
456 #if defined(CONFIG_USER_ONLY)
457 /* Currently it is not recommended to allocate big chunks of data in
458 user mode. It will change when a dedicated libc will be used */
459 #define USE_STATIC_CODE_GEN_BUFFER
460 #endif
462 #ifdef USE_STATIC_CODE_GEN_BUFFER
463 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
464 __attribute__((aligned (CODE_GEN_ALIGN)));
465 #endif
467 static void code_gen_alloc(unsigned long tb_size)
469 #ifdef USE_STATIC_CODE_GEN_BUFFER
470 code_gen_buffer = static_code_gen_buffer;
471 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
472 map_exec(code_gen_buffer, code_gen_buffer_size);
473 #else
474 code_gen_buffer_size = tb_size;
475 if (code_gen_buffer_size == 0) {
476 #if defined(CONFIG_USER_ONLY)
477 /* in user mode, phys_ram_size is not meaningful */
478 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
479 #else
480 /* XXX: needs adjustments */
481 code_gen_buffer_size = (unsigned long)(ram_size / 4);
482 #endif
484 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
485 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
486 /* The code gen buffer location may have constraints depending on
487 the host cpu and OS */
488 #if defined(__linux__)
490 int flags;
491 void *start = NULL;
493 flags = MAP_PRIVATE | MAP_ANONYMOUS;
494 #if defined(__x86_64__)
495 flags |= MAP_32BIT;
496 /* Cannot map more than that */
497 if (code_gen_buffer_size > (800 * 1024 * 1024))
498 code_gen_buffer_size = (800 * 1024 * 1024);
499 #elif defined(__sparc_v9__)
500 // Map the buffer below 2G, so we can use direct calls and branches
501 flags |= MAP_FIXED;
502 start = (void *) 0x60000000UL;
503 if (code_gen_buffer_size > (512 * 1024 * 1024))
504 code_gen_buffer_size = (512 * 1024 * 1024);
505 #elif defined(__arm__)
506 /* Map the buffer below 32M, so we can use direct calls and branches */
507 flags |= MAP_FIXED;
508 start = (void *) 0x01000000UL;
509 if (code_gen_buffer_size > 16 * 1024 * 1024)
510 code_gen_buffer_size = 16 * 1024 * 1024;
511 #elif defined(__s390x__)
512 /* Map the buffer so that we can use direct calls and branches. */
513 /* We have a +- 4GB range on the branches; leave some slop. */
514 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
515 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
517 start = (void *)0x90000000UL;
518 #endif
519 code_gen_buffer = mmap(start, code_gen_buffer_size,
520 PROT_WRITE | PROT_READ | PROT_EXEC,
521 flags, -1, 0);
522 if (code_gen_buffer == MAP_FAILED) {
523 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
524 exit(1);
527 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
528 || defined(__DragonFly__) || defined(__OpenBSD__) \
529 || defined(__NetBSD__)
531 int flags;
532 void *addr = NULL;
533 flags = MAP_PRIVATE | MAP_ANONYMOUS;
534 #if defined(__x86_64__)
535 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
536 * 0x40000000 is free */
537 flags |= MAP_FIXED;
538 addr = (void *)0x40000000;
539 /* Cannot map more than that */
540 if (code_gen_buffer_size > (800 * 1024 * 1024))
541 code_gen_buffer_size = (800 * 1024 * 1024);
542 #elif defined(__sparc_v9__)
543 // Map the buffer below 2G, so we can use direct calls and branches
544 flags |= MAP_FIXED;
545 addr = (void *) 0x60000000UL;
546 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
547 code_gen_buffer_size = (512 * 1024 * 1024);
549 #endif
550 code_gen_buffer = mmap(addr, code_gen_buffer_size,
551 PROT_WRITE | PROT_READ | PROT_EXEC,
552 flags, -1, 0);
553 if (code_gen_buffer == MAP_FAILED) {
554 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
555 exit(1);
558 #else
559 code_gen_buffer = g_malloc(code_gen_buffer_size);
560 map_exec(code_gen_buffer, code_gen_buffer_size);
561 #endif
562 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
563 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
564 code_gen_buffer_max_size = code_gen_buffer_size -
565 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
566 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
567 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
570 /* Must be called before using the QEMU cpus. 'tb_size' is the size
571 (in bytes) allocated to the translation buffer. Zero means default
572 size. */
573 void tcg_exec_init(unsigned long tb_size)
575 cpu_gen_init();
576 code_gen_alloc(tb_size);
577 code_gen_ptr = code_gen_buffer;
578 page_init();
579 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
580 /* There's no guest base to take into account, so go ahead and
581 initialize the prologue now. */
582 tcg_prologue_init(&tcg_ctx);
583 #endif
586 bool tcg_enabled(void)
588 return code_gen_buffer != NULL;
591 void cpu_exec_init_all(void)
593 #if !defined(CONFIG_USER_ONLY)
594 memory_map_init();
595 io_mem_init();
596 #endif
599 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
601 static int cpu_common_post_load(void *opaque, int version_id)
603 CPUState *env = opaque;
605 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
606 version_id is increased. */
607 env->interrupt_request &= ~0x01;
608 tlb_flush(env, 1);
610 return 0;
613 static const VMStateDescription vmstate_cpu_common = {
614 .name = "cpu_common",
615 .version_id = 1,
616 .minimum_version_id = 1,
617 .minimum_version_id_old = 1,
618 .post_load = cpu_common_post_load,
619 .fields = (VMStateField []) {
620 VMSTATE_UINT32(halted, CPUState),
621 VMSTATE_UINT32(interrupt_request, CPUState),
622 VMSTATE_END_OF_LIST()
625 #endif
627 CPUState *qemu_get_cpu(int cpu)
629 CPUState *env = first_cpu;
631 while (env) {
632 if (env->cpu_index == cpu)
633 break;
634 env = env->next_cpu;
637 return env;
640 void cpu_exec_init(CPUState *env)
642 CPUState **penv;
643 int cpu_index;
645 #if defined(CONFIG_USER_ONLY)
646 cpu_list_lock();
647 #endif
648 env->next_cpu = NULL;
649 penv = &first_cpu;
650 cpu_index = 0;
651 while (*penv != NULL) {
652 penv = &(*penv)->next_cpu;
653 cpu_index++;
655 env->cpu_index = cpu_index;
656 env->numa_node = 0;
657 QTAILQ_INIT(&env->breakpoints);
658 QTAILQ_INIT(&env->watchpoints);
659 #ifndef CONFIG_USER_ONLY
660 env->thread_id = qemu_get_thread_id();
661 #endif
662 *penv = env;
663 #if defined(CONFIG_USER_ONLY)
664 cpu_list_unlock();
665 #endif
666 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
667 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
668 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
669 cpu_save, cpu_load, env);
670 #endif
673 /* Allocate a new translation block. Flush the translation buffer if
674 too many translation blocks or too much generated code. */
675 static TranslationBlock *tb_alloc(target_ulong pc)
677 TranslationBlock *tb;
679 if (nb_tbs >= code_gen_max_blocks ||
680 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
681 return NULL;
682 tb = &tbs[nb_tbs++];
683 tb->pc = pc;
684 tb->cflags = 0;
685 return tb;
688 void tb_free(TranslationBlock *tb)
690 /* In practice this is mostly used for single use temporary TB
691 Ignore the hard cases and just back up if this TB happens to
692 be the last one generated. */
693 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
694 code_gen_ptr = tb->tc_ptr;
695 nb_tbs--;
699 static inline void invalidate_page_bitmap(PageDesc *p)
701 if (p->code_bitmap) {
702 g_free(p->code_bitmap);
703 p->code_bitmap = NULL;
705 p->code_write_count = 0;
708 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
710 static void page_flush_tb_1 (int level, void **lp)
712 int i;
714 if (*lp == NULL) {
715 return;
717 if (level == 0) {
718 PageDesc *pd = *lp;
719 for (i = 0; i < L2_SIZE; ++i) {
720 pd[i].first_tb = NULL;
721 invalidate_page_bitmap(pd + i);
723 } else {
724 void **pp = *lp;
725 for (i = 0; i < L2_SIZE; ++i) {
726 page_flush_tb_1 (level - 1, pp + i);
731 static void page_flush_tb(void)
733 int i;
734 for (i = 0; i < V_L1_SIZE; i++) {
735 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
739 /* flush all the translation blocks */
740 /* XXX: tb_flush is currently not thread safe */
741 void tb_flush(CPUState *env1)
743 CPUState *env;
744 #if defined(DEBUG_FLUSH)
745 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
746 (unsigned long)(code_gen_ptr - code_gen_buffer),
747 nb_tbs, nb_tbs > 0 ?
748 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
749 #endif
750 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
751 cpu_abort(env1, "Internal error: code buffer overflow\n");
753 nb_tbs = 0;
755 for(env = first_cpu; env != NULL; env = env->next_cpu) {
756 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
759 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
760 page_flush_tb();
762 code_gen_ptr = code_gen_buffer;
763 /* XXX: flush processor icache at this point if cache flush is
764 expensive */
765 tb_flush_count++;
768 #ifdef DEBUG_TB_CHECK
770 static void tb_invalidate_check(target_ulong address)
772 TranslationBlock *tb;
773 int i;
774 address &= TARGET_PAGE_MASK;
775 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
776 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
777 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
778 address >= tb->pc + tb->size)) {
779 printf("ERROR invalidate: address=" TARGET_FMT_lx
780 " PC=%08lx size=%04x\n",
781 address, (long)tb->pc, tb->size);
787 /* verify that all the pages have correct rights for code */
788 static void tb_page_check(void)
790 TranslationBlock *tb;
791 int i, flags1, flags2;
793 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
794 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
795 flags1 = page_get_flags(tb->pc);
796 flags2 = page_get_flags(tb->pc + tb->size - 1);
797 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
798 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
799 (long)tb->pc, tb->size, flags1, flags2);
805 #endif
807 /* invalidate one TB */
808 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
809 int next_offset)
811 TranslationBlock *tb1;
812 for(;;) {
813 tb1 = *ptb;
814 if (tb1 == tb) {
815 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
816 break;
818 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
822 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
824 TranslationBlock *tb1;
825 unsigned int n1;
827 for(;;) {
828 tb1 = *ptb;
829 n1 = (long)tb1 & 3;
830 tb1 = (TranslationBlock *)((long)tb1 & ~3);
831 if (tb1 == tb) {
832 *ptb = tb1->page_next[n1];
833 break;
835 ptb = &tb1->page_next[n1];
839 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
841 TranslationBlock *tb1, **ptb;
842 unsigned int n1;
844 ptb = &tb->jmp_next[n];
845 tb1 = *ptb;
846 if (tb1) {
847 /* find tb(n) in circular list */
848 for(;;) {
849 tb1 = *ptb;
850 n1 = (long)tb1 & 3;
851 tb1 = (TranslationBlock *)((long)tb1 & ~3);
852 if (n1 == n && tb1 == tb)
853 break;
854 if (n1 == 2) {
855 ptb = &tb1->jmp_first;
856 } else {
857 ptb = &tb1->jmp_next[n1];
860 /* now we can suppress tb(n) from the list */
861 *ptb = tb->jmp_next[n];
863 tb->jmp_next[n] = NULL;
867 /* reset the jump entry 'n' of a TB so that it is not chained to
868 another TB */
869 static inline void tb_reset_jump(TranslationBlock *tb, int n)
871 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
874 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
876 CPUState *env;
877 PageDesc *p;
878 unsigned int h, n1;
879 tb_page_addr_t phys_pc;
880 TranslationBlock *tb1, *tb2;
882 /* remove the TB from the hash list */
883 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
884 h = tb_phys_hash_func(phys_pc);
885 tb_remove(&tb_phys_hash[h], tb,
886 offsetof(TranslationBlock, phys_hash_next));
888 /* remove the TB from the page list */
889 if (tb->page_addr[0] != page_addr) {
890 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
891 tb_page_remove(&p->first_tb, tb);
892 invalidate_page_bitmap(p);
894 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
895 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
896 tb_page_remove(&p->first_tb, tb);
897 invalidate_page_bitmap(p);
900 tb_invalidated_flag = 1;
902 /* remove the TB from the hash list */
903 h = tb_jmp_cache_hash_func(tb->pc);
904 for(env = first_cpu; env != NULL; env = env->next_cpu) {
905 if (env->tb_jmp_cache[h] == tb)
906 env->tb_jmp_cache[h] = NULL;
909 /* suppress this TB from the two jump lists */
910 tb_jmp_remove(tb, 0);
911 tb_jmp_remove(tb, 1);
913 /* suppress any remaining jumps to this TB */
914 tb1 = tb->jmp_first;
915 for(;;) {
916 n1 = (long)tb1 & 3;
917 if (n1 == 2)
918 break;
919 tb1 = (TranslationBlock *)((long)tb1 & ~3);
920 tb2 = tb1->jmp_next[n1];
921 tb_reset_jump(tb1, n1);
922 tb1->jmp_next[n1] = NULL;
923 tb1 = tb2;
925 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
927 tb_phys_invalidate_count++;
930 static inline void set_bits(uint8_t *tab, int start, int len)
932 int end, mask, end1;
934 end = start + len;
935 tab += start >> 3;
936 mask = 0xff << (start & 7);
937 if ((start & ~7) == (end & ~7)) {
938 if (start < end) {
939 mask &= ~(0xff << (end & 7));
940 *tab |= mask;
942 } else {
943 *tab++ |= mask;
944 start = (start + 8) & ~7;
945 end1 = end & ~7;
946 while (start < end1) {
947 *tab++ = 0xff;
948 start += 8;
950 if (start < end) {
951 mask = ~(0xff << (end & 7));
952 *tab |= mask;
957 static void build_page_bitmap(PageDesc *p)
959 int n, tb_start, tb_end;
960 TranslationBlock *tb;
962 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
964 tb = p->first_tb;
965 while (tb != NULL) {
966 n = (long)tb & 3;
967 tb = (TranslationBlock *)((long)tb & ~3);
968 /* NOTE: this is subtle as a TB may span two physical pages */
969 if (n == 0) {
970 /* NOTE: tb_end may be after the end of the page, but
971 it is not a problem */
972 tb_start = tb->pc & ~TARGET_PAGE_MASK;
973 tb_end = tb_start + tb->size;
974 if (tb_end > TARGET_PAGE_SIZE)
975 tb_end = TARGET_PAGE_SIZE;
976 } else {
977 tb_start = 0;
978 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
980 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
981 tb = tb->page_next[n];
985 TranslationBlock *tb_gen_code(CPUState *env,
986 target_ulong pc, target_ulong cs_base,
987 int flags, int cflags)
989 TranslationBlock *tb;
990 uint8_t *tc_ptr;
991 tb_page_addr_t phys_pc, phys_page2;
992 target_ulong virt_page2;
993 int code_gen_size;
995 phys_pc = get_page_addr_code(env, pc);
996 tb = tb_alloc(pc);
997 if (!tb) {
998 /* flush must be done */
999 tb_flush(env);
1000 /* cannot fail at this point */
1001 tb = tb_alloc(pc);
1002 /* Don't forget to invalidate previous TB info. */
1003 tb_invalidated_flag = 1;
1005 tc_ptr = code_gen_ptr;
1006 tb->tc_ptr = tc_ptr;
1007 tb->cs_base = cs_base;
1008 tb->flags = flags;
1009 tb->cflags = cflags;
1010 cpu_gen_code(env, tb, &code_gen_size);
1011 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1013 /* check next page if needed */
1014 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1015 phys_page2 = -1;
1016 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1017 phys_page2 = get_page_addr_code(env, virt_page2);
1019 tb_link_page(tb, phys_pc, phys_page2);
1020 return tb;
1023 /* invalidate all TBs which intersect with the target physical page
1024 starting in range [start;end[. NOTE: start and end must refer to
1025 the same physical page. 'is_cpu_write_access' should be true if called
1026 from a real cpu write access: the virtual CPU will exit the current
1027 TB if code is modified inside this TB. */
1028 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1029 int is_cpu_write_access)
1031 TranslationBlock *tb, *tb_next, *saved_tb;
1032 CPUState *env = cpu_single_env;
1033 tb_page_addr_t tb_start, tb_end;
1034 PageDesc *p;
1035 int n;
1036 #ifdef TARGET_HAS_PRECISE_SMC
1037 int current_tb_not_found = is_cpu_write_access;
1038 TranslationBlock *current_tb = NULL;
1039 int current_tb_modified = 0;
1040 target_ulong current_pc = 0;
1041 target_ulong current_cs_base = 0;
1042 int current_flags = 0;
1043 #endif /* TARGET_HAS_PRECISE_SMC */
1045 p = page_find(start >> TARGET_PAGE_BITS);
1046 if (!p)
1047 return;
1048 if (!p->code_bitmap &&
1049 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1050 is_cpu_write_access) {
1051 /* build code bitmap */
1052 build_page_bitmap(p);
1055 /* we remove all the TBs in the range [start, end[ */
1056 /* XXX: see if in some cases it could be faster to invalidate all the code */
1057 tb = p->first_tb;
1058 while (tb != NULL) {
1059 n = (long)tb & 3;
1060 tb = (TranslationBlock *)((long)tb & ~3);
1061 tb_next = tb->page_next[n];
1062 /* NOTE: this is subtle as a TB may span two physical pages */
1063 if (n == 0) {
1064 /* NOTE: tb_end may be after the end of the page, but
1065 it is not a problem */
1066 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1067 tb_end = tb_start + tb->size;
1068 } else {
1069 tb_start = tb->page_addr[1];
1070 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1072 if (!(tb_end <= start || tb_start >= end)) {
1073 #ifdef TARGET_HAS_PRECISE_SMC
1074 if (current_tb_not_found) {
1075 current_tb_not_found = 0;
1076 current_tb = NULL;
1077 if (env->mem_io_pc) {
1078 /* now we have a real cpu fault */
1079 current_tb = tb_find_pc(env->mem_io_pc);
1082 if (current_tb == tb &&
1083 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1084 /* If we are modifying the current TB, we must stop
1085 its execution. We could be more precise by checking
1086 that the modification is after the current PC, but it
1087 would require a specialized function to partially
1088 restore the CPU state */
1090 current_tb_modified = 1;
1091 cpu_restore_state(current_tb, env, env->mem_io_pc);
1092 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1093 &current_flags);
1095 #endif /* TARGET_HAS_PRECISE_SMC */
1096 /* we need to do that to handle the case where a signal
1097 occurs while doing tb_phys_invalidate() */
1098 saved_tb = NULL;
1099 if (env) {
1100 saved_tb = env->current_tb;
1101 env->current_tb = NULL;
1103 tb_phys_invalidate(tb, -1);
1104 if (env) {
1105 env->current_tb = saved_tb;
1106 if (env->interrupt_request && env->current_tb)
1107 cpu_interrupt(env, env->interrupt_request);
1110 tb = tb_next;
1112 #if !defined(CONFIG_USER_ONLY)
1113 /* if no code remaining, no need to continue to use slow writes */
1114 if (!p->first_tb) {
1115 invalidate_page_bitmap(p);
1116 if (is_cpu_write_access) {
1117 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1120 #endif
1121 #ifdef TARGET_HAS_PRECISE_SMC
1122 if (current_tb_modified) {
1123 /* we generate a block containing just the instruction
1124 modifying the memory. It will ensure that it cannot modify
1125 itself */
1126 env->current_tb = NULL;
1127 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1128 cpu_resume_from_signal(env, NULL);
1130 #endif
1133 /* len must be <= 8 and start must be a multiple of len */
1134 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1136 PageDesc *p;
1137 int offset, b;
1138 #if 0
1139 if (1) {
1140 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1141 cpu_single_env->mem_io_vaddr, len,
1142 cpu_single_env->eip,
1143 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1145 #endif
1146 p = page_find(start >> TARGET_PAGE_BITS);
1147 if (!p)
1148 return;
1149 if (p->code_bitmap) {
1150 offset = start & ~TARGET_PAGE_MASK;
1151 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1152 if (b & ((1 << len) - 1))
1153 goto do_invalidate;
1154 } else {
1155 do_invalidate:
1156 tb_invalidate_phys_page_range(start, start + len, 1);
1160 #if !defined(CONFIG_SOFTMMU)
1161 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1162 unsigned long pc, void *puc)
1164 TranslationBlock *tb;
1165 PageDesc *p;
1166 int n;
1167 #ifdef TARGET_HAS_PRECISE_SMC
1168 TranslationBlock *current_tb = NULL;
1169 CPUState *env = cpu_single_env;
1170 int current_tb_modified = 0;
1171 target_ulong current_pc = 0;
1172 target_ulong current_cs_base = 0;
1173 int current_flags = 0;
1174 #endif
1176 addr &= TARGET_PAGE_MASK;
1177 p = page_find(addr >> TARGET_PAGE_BITS);
1178 if (!p)
1179 return;
1180 tb = p->first_tb;
1181 #ifdef TARGET_HAS_PRECISE_SMC
1182 if (tb && pc != 0) {
1183 current_tb = tb_find_pc(pc);
1185 #endif
1186 while (tb != NULL) {
1187 n = (long)tb & 3;
1188 tb = (TranslationBlock *)((long)tb & ~3);
1189 #ifdef TARGET_HAS_PRECISE_SMC
1190 if (current_tb == tb &&
1191 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1192 /* If we are modifying the current TB, we must stop
1193 its execution. We could be more precise by checking
1194 that the modification is after the current PC, but it
1195 would require a specialized function to partially
1196 restore the CPU state */
1198 current_tb_modified = 1;
1199 cpu_restore_state(current_tb, env, pc);
1200 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1201 &current_flags);
1203 #endif /* TARGET_HAS_PRECISE_SMC */
1204 tb_phys_invalidate(tb, addr);
1205 tb = tb->page_next[n];
1207 p->first_tb = NULL;
1208 #ifdef TARGET_HAS_PRECISE_SMC
1209 if (current_tb_modified) {
1210 /* we generate a block containing just the instruction
1211 modifying the memory. It will ensure that it cannot modify
1212 itself */
1213 env->current_tb = NULL;
1214 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1215 cpu_resume_from_signal(env, puc);
1217 #endif
1219 #endif
1221 /* add the tb in the target page and protect it if necessary */
1222 static inline void tb_alloc_page(TranslationBlock *tb,
1223 unsigned int n, tb_page_addr_t page_addr)
1225 PageDesc *p;
1226 #ifndef CONFIG_USER_ONLY
1227 bool page_already_protected;
1228 #endif
1230 tb->page_addr[n] = page_addr;
1231 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1232 tb->page_next[n] = p->first_tb;
1233 #ifndef CONFIG_USER_ONLY
1234 page_already_protected = p->first_tb != NULL;
1235 #endif
1236 p->first_tb = (TranslationBlock *)((long)tb | n);
1237 invalidate_page_bitmap(p);
1239 #if defined(TARGET_HAS_SMC) || 1
1241 #if defined(CONFIG_USER_ONLY)
1242 if (p->flags & PAGE_WRITE) {
1243 target_ulong addr;
1244 PageDesc *p2;
1245 int prot;
1247 /* force the host page as non writable (writes will have a
1248 page fault + mprotect overhead) */
1249 page_addr &= qemu_host_page_mask;
1250 prot = 0;
1251 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1252 addr += TARGET_PAGE_SIZE) {
1254 p2 = page_find (addr >> TARGET_PAGE_BITS);
1255 if (!p2)
1256 continue;
1257 prot |= p2->flags;
1258 p2->flags &= ~PAGE_WRITE;
1260 mprotect(g2h(page_addr), qemu_host_page_size,
1261 (prot & PAGE_BITS) & ~PAGE_WRITE);
1262 #ifdef DEBUG_TB_INVALIDATE
1263 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1264 page_addr);
1265 #endif
1267 #else
1268 /* if some code is already present, then the pages are already
1269 protected. So we handle the case where only the first TB is
1270 allocated in a physical page */
1271 if (!page_already_protected) {
1272 tlb_protect_code(page_addr);
1274 #endif
1276 #endif /* TARGET_HAS_SMC */
1279 /* add a new TB and link it to the physical page tables. phys_page2 is
1280 (-1) to indicate that only one page contains the TB. */
1281 void tb_link_page(TranslationBlock *tb,
1282 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1284 unsigned int h;
1285 TranslationBlock **ptb;
1287 /* Grab the mmap lock to stop another thread invalidating this TB
1288 before we are done. */
1289 mmap_lock();
1290 /* add in the physical hash table */
1291 h = tb_phys_hash_func(phys_pc);
1292 ptb = &tb_phys_hash[h];
1293 tb->phys_hash_next = *ptb;
1294 *ptb = tb;
1296 /* add in the page list */
1297 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1298 if (phys_page2 != -1)
1299 tb_alloc_page(tb, 1, phys_page2);
1300 else
1301 tb->page_addr[1] = -1;
1303 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1304 tb->jmp_next[0] = NULL;
1305 tb->jmp_next[1] = NULL;
1307 /* init original jump addresses */
1308 if (tb->tb_next_offset[0] != 0xffff)
1309 tb_reset_jump(tb, 0);
1310 if (tb->tb_next_offset[1] != 0xffff)
1311 tb_reset_jump(tb, 1);
1313 #ifdef DEBUG_TB_CHECK
1314 tb_page_check();
1315 #endif
1316 mmap_unlock();
1319 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1320 tb[1].tc_ptr. Return NULL if not found */
1321 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1323 int m_min, m_max, m;
1324 unsigned long v;
1325 TranslationBlock *tb;
1327 if (nb_tbs <= 0)
1328 return NULL;
1329 if (tc_ptr < (unsigned long)code_gen_buffer ||
1330 tc_ptr >= (unsigned long)code_gen_ptr)
1331 return NULL;
1332 /* binary search (cf Knuth) */
1333 m_min = 0;
1334 m_max = nb_tbs - 1;
1335 while (m_min <= m_max) {
1336 m = (m_min + m_max) >> 1;
1337 tb = &tbs[m];
1338 v = (unsigned long)tb->tc_ptr;
1339 if (v == tc_ptr)
1340 return tb;
1341 else if (tc_ptr < v) {
1342 m_max = m - 1;
1343 } else {
1344 m_min = m + 1;
1347 return &tbs[m_max];
1350 static void tb_reset_jump_recursive(TranslationBlock *tb);
1352 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1354 TranslationBlock *tb1, *tb_next, **ptb;
1355 unsigned int n1;
1357 tb1 = tb->jmp_next[n];
1358 if (tb1 != NULL) {
1359 /* find head of list */
1360 for(;;) {
1361 n1 = (long)tb1 & 3;
1362 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1363 if (n1 == 2)
1364 break;
1365 tb1 = tb1->jmp_next[n1];
1367 /* we are now sure now that tb jumps to tb1 */
1368 tb_next = tb1;
1370 /* remove tb from the jmp_first list */
1371 ptb = &tb_next->jmp_first;
1372 for(;;) {
1373 tb1 = *ptb;
1374 n1 = (long)tb1 & 3;
1375 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1376 if (n1 == n && tb1 == tb)
1377 break;
1378 ptb = &tb1->jmp_next[n1];
1380 *ptb = tb->jmp_next[n];
1381 tb->jmp_next[n] = NULL;
1383 /* suppress the jump to next tb in generated code */
1384 tb_reset_jump(tb, n);
1386 /* suppress jumps in the tb on which we could have jumped */
1387 tb_reset_jump_recursive(tb_next);
1391 static void tb_reset_jump_recursive(TranslationBlock *tb)
1393 tb_reset_jump_recursive2(tb, 0);
1394 tb_reset_jump_recursive2(tb, 1);
1397 #if defined(TARGET_HAS_ICE)
1398 #if defined(CONFIG_USER_ONLY)
1399 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1401 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1403 #else
1404 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1406 target_phys_addr_t addr;
1407 target_ulong pd;
1408 ram_addr_t ram_addr;
1409 PhysPageDesc *p;
1411 addr = cpu_get_phys_page_debug(env, pc);
1412 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1413 if (!p) {
1414 pd = IO_MEM_UNASSIGNED;
1415 } else {
1416 pd = p->phys_offset;
1418 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1419 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1421 #endif
1422 #endif /* TARGET_HAS_ICE */
1424 #if defined(CONFIG_USER_ONLY)
1425 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1430 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1431 int flags, CPUWatchpoint **watchpoint)
1433 return -ENOSYS;
1435 #else
1436 /* Add a watchpoint. */
1437 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1438 int flags, CPUWatchpoint **watchpoint)
1440 target_ulong len_mask = ~(len - 1);
1441 CPUWatchpoint *wp;
1443 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1444 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1445 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1446 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1447 return -EINVAL;
1449 wp = g_malloc(sizeof(*wp));
1451 wp->vaddr = addr;
1452 wp->len_mask = len_mask;
1453 wp->flags = flags;
1455 /* keep all GDB-injected watchpoints in front */
1456 if (flags & BP_GDB)
1457 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1458 else
1459 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1461 tlb_flush_page(env, addr);
1463 if (watchpoint)
1464 *watchpoint = wp;
1465 return 0;
1468 /* Remove a specific watchpoint. */
1469 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1470 int flags)
1472 target_ulong len_mask = ~(len - 1);
1473 CPUWatchpoint *wp;
1475 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1476 if (addr == wp->vaddr && len_mask == wp->len_mask
1477 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1478 cpu_watchpoint_remove_by_ref(env, wp);
1479 return 0;
1482 return -ENOENT;
1485 /* Remove a specific watchpoint by reference. */
1486 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1488 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1490 tlb_flush_page(env, watchpoint->vaddr);
1492 g_free(watchpoint);
1495 /* Remove all matching watchpoints. */
1496 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1498 CPUWatchpoint *wp, *next;
1500 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1501 if (wp->flags & mask)
1502 cpu_watchpoint_remove_by_ref(env, wp);
1505 #endif
1507 /* Add a breakpoint. */
1508 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1509 CPUBreakpoint **breakpoint)
1511 #if defined(TARGET_HAS_ICE)
1512 CPUBreakpoint *bp;
1514 bp = g_malloc(sizeof(*bp));
1516 bp->pc = pc;
1517 bp->flags = flags;
1519 /* keep all GDB-injected breakpoints in front */
1520 if (flags & BP_GDB)
1521 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1522 else
1523 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1525 breakpoint_invalidate(env, pc);
1527 if (breakpoint)
1528 *breakpoint = bp;
1529 return 0;
1530 #else
1531 return -ENOSYS;
1532 #endif
1535 /* Remove a specific breakpoint. */
1536 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1538 #if defined(TARGET_HAS_ICE)
1539 CPUBreakpoint *bp;
1541 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1542 if (bp->pc == pc && bp->flags == flags) {
1543 cpu_breakpoint_remove_by_ref(env, bp);
1544 return 0;
1547 return -ENOENT;
1548 #else
1549 return -ENOSYS;
1550 #endif
1553 /* Remove a specific breakpoint by reference. */
1554 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1556 #if defined(TARGET_HAS_ICE)
1557 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1559 breakpoint_invalidate(env, breakpoint->pc);
1561 g_free(breakpoint);
1562 #endif
1565 /* Remove all matching breakpoints. */
1566 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1568 #if defined(TARGET_HAS_ICE)
1569 CPUBreakpoint *bp, *next;
1571 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1572 if (bp->flags & mask)
1573 cpu_breakpoint_remove_by_ref(env, bp);
1575 #endif
1578 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1579 CPU loop after each instruction */
1580 void cpu_single_step(CPUState *env, int enabled)
1582 #if defined(TARGET_HAS_ICE)
1583 if (env->singlestep_enabled != enabled) {
1584 env->singlestep_enabled = enabled;
1585 if (kvm_enabled())
1586 kvm_update_guest_debug(env, 0);
1587 else {
1588 /* must flush all the translated code to avoid inconsistencies */
1589 /* XXX: only flush what is necessary */
1590 tb_flush(env);
1593 #endif
1596 /* enable or disable low levels log */
1597 void cpu_set_log(int log_flags)
1599 loglevel = log_flags;
1600 if (loglevel && !logfile) {
1601 logfile = fopen(logfilename, log_append ? "a" : "w");
1602 if (!logfile) {
1603 perror(logfilename);
1604 _exit(1);
1606 #if !defined(CONFIG_SOFTMMU)
1607 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1609 static char logfile_buf[4096];
1610 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1612 #elif !defined(_WIN32)
1613 /* Win32 doesn't support line-buffering and requires size >= 2 */
1614 setvbuf(logfile, NULL, _IOLBF, 0);
1615 #endif
1616 log_append = 1;
1618 if (!loglevel && logfile) {
1619 fclose(logfile);
1620 logfile = NULL;
1624 void cpu_set_log_filename(const char *filename)
1626 logfilename = strdup(filename);
1627 if (logfile) {
1628 fclose(logfile);
1629 logfile = NULL;
1631 cpu_set_log(loglevel);
1634 static void cpu_unlink_tb(CPUState *env)
1636 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1637 problem and hope the cpu will stop of its own accord. For userspace
1638 emulation this often isn't actually as bad as it sounds. Often
1639 signals are used primarily to interrupt blocking syscalls. */
1640 TranslationBlock *tb;
1641 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1643 spin_lock(&interrupt_lock);
1644 tb = env->current_tb;
1645 /* if the cpu is currently executing code, we must unlink it and
1646 all the potentially executing TB */
1647 if (tb) {
1648 env->current_tb = NULL;
1649 tb_reset_jump_recursive(tb);
1651 spin_unlock(&interrupt_lock);
1654 #ifndef CONFIG_USER_ONLY
1655 /* mask must never be zero, except for A20 change call */
1656 static void tcg_handle_interrupt(CPUState *env, int mask)
1658 int old_mask;
1660 old_mask = env->interrupt_request;
1661 env->interrupt_request |= mask;
1664 * If called from iothread context, wake the target cpu in
1665 * case its halted.
1667 if (!qemu_cpu_is_self(env)) {
1668 qemu_cpu_kick(env);
1669 return;
1672 if (use_icount) {
1673 env->icount_decr.u16.high = 0xffff;
1674 if (!can_do_io(env)
1675 && (mask & ~old_mask) != 0) {
1676 cpu_abort(env, "Raised interrupt while not in I/O function");
1678 } else {
1679 cpu_unlink_tb(env);
1683 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1685 #else /* CONFIG_USER_ONLY */
1687 void cpu_interrupt(CPUState *env, int mask)
1689 env->interrupt_request |= mask;
1690 cpu_unlink_tb(env);
1692 #endif /* CONFIG_USER_ONLY */
1694 void cpu_reset_interrupt(CPUState *env, int mask)
1696 env->interrupt_request &= ~mask;
1699 void cpu_exit(CPUState *env)
1701 env->exit_request = 1;
1702 cpu_unlink_tb(env);
1705 const CPULogItem cpu_log_items[] = {
1706 { CPU_LOG_TB_OUT_ASM, "out_asm",
1707 "show generated host assembly code for each compiled TB" },
1708 { CPU_LOG_TB_IN_ASM, "in_asm",
1709 "show target assembly code for each compiled TB" },
1710 { CPU_LOG_TB_OP, "op",
1711 "show micro ops for each compiled TB" },
1712 { CPU_LOG_TB_OP_OPT, "op_opt",
1713 "show micro ops "
1714 #ifdef TARGET_I386
1715 "before eflags optimization and "
1716 #endif
1717 "after liveness analysis" },
1718 { CPU_LOG_INT, "int",
1719 "show interrupts/exceptions in short format" },
1720 { CPU_LOG_EXEC, "exec",
1721 "show trace before each executed TB (lots of logs)" },
1722 { CPU_LOG_TB_CPU, "cpu",
1723 "show CPU state before block translation" },
1724 #ifdef TARGET_I386
1725 { CPU_LOG_PCALL, "pcall",
1726 "show protected mode far calls/returns/exceptions" },
1727 { CPU_LOG_RESET, "cpu_reset",
1728 "show CPU state before CPU resets" },
1729 #endif
1730 #ifdef DEBUG_IOPORT
1731 { CPU_LOG_IOPORT, "ioport",
1732 "show all i/o ports accesses" },
1733 #endif
1734 { 0, NULL, NULL },
1737 #ifndef CONFIG_USER_ONLY
1738 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1739 = QLIST_HEAD_INITIALIZER(memory_client_list);
1741 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1742 ram_addr_t size,
1743 ram_addr_t phys_offset,
1744 bool log_dirty)
1746 CPUPhysMemoryClient *client;
1747 QLIST_FOREACH(client, &memory_client_list, list) {
1748 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1752 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1753 target_phys_addr_t end)
1755 CPUPhysMemoryClient *client;
1756 QLIST_FOREACH(client, &memory_client_list, list) {
1757 int r = client->sync_dirty_bitmap(client, start, end);
1758 if (r < 0)
1759 return r;
1761 return 0;
1764 static int cpu_notify_migration_log(int enable)
1766 CPUPhysMemoryClient *client;
1767 QLIST_FOREACH(client, &memory_client_list, list) {
1768 int r = client->migration_log(client, enable);
1769 if (r < 0)
1770 return r;
1772 return 0;
1775 struct last_map {
1776 target_phys_addr_t start_addr;
1777 ram_addr_t size;
1778 ram_addr_t phys_offset;
1781 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1782 * address. Each intermediate table provides the next L2_BITs of guest
1783 * physical address space. The number of levels vary based on host and
1784 * guest configuration, making it efficient to build the final guest
1785 * physical address by seeding the L1 offset and shifting and adding in
1786 * each L2 offset as we recurse through them. */
1787 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1788 void **lp, target_phys_addr_t addr,
1789 struct last_map *map)
1791 int i;
1793 if (*lp == NULL) {
1794 return;
1796 if (level == 0) {
1797 PhysPageDesc *pd = *lp;
1798 addr <<= L2_BITS + TARGET_PAGE_BITS;
1799 for (i = 0; i < L2_SIZE; ++i) {
1800 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1801 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1803 if (map->size &&
1804 start_addr == map->start_addr + map->size &&
1805 pd[i].phys_offset == map->phys_offset + map->size) {
1807 map->size += TARGET_PAGE_SIZE;
1808 continue;
1809 } else if (map->size) {
1810 client->set_memory(client, map->start_addr,
1811 map->size, map->phys_offset, false);
1814 map->start_addr = start_addr;
1815 map->size = TARGET_PAGE_SIZE;
1816 map->phys_offset = pd[i].phys_offset;
1819 } else {
1820 void **pp = *lp;
1821 for (i = 0; i < L2_SIZE; ++i) {
1822 phys_page_for_each_1(client, level - 1, pp + i,
1823 (addr << L2_BITS) | i, map);
1828 static void phys_page_for_each(CPUPhysMemoryClient *client)
1830 int i;
1831 struct last_map map = { };
1833 for (i = 0; i < P_L1_SIZE; ++i) {
1834 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1835 l1_phys_map + i, i, &map);
1837 if (map.size) {
1838 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1839 false);
1843 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1845 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1846 phys_page_for_each(client);
1849 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1851 QLIST_REMOVE(client, list);
1853 #endif
1855 static int cmp1(const char *s1, int n, const char *s2)
1857 if (strlen(s2) != n)
1858 return 0;
1859 return memcmp(s1, s2, n) == 0;
1862 /* takes a comma separated list of log masks. Return 0 if error. */
1863 int cpu_str_to_log_mask(const char *str)
1865 const CPULogItem *item;
1866 int mask;
1867 const char *p, *p1;
1869 p = str;
1870 mask = 0;
1871 for(;;) {
1872 p1 = strchr(p, ',');
1873 if (!p1)
1874 p1 = p + strlen(p);
1875 if(cmp1(p,p1-p,"all")) {
1876 for(item = cpu_log_items; item->mask != 0; item++) {
1877 mask |= item->mask;
1879 } else {
1880 for(item = cpu_log_items; item->mask != 0; item++) {
1881 if (cmp1(p, p1 - p, item->name))
1882 goto found;
1884 return 0;
1886 found:
1887 mask |= item->mask;
1888 if (*p1 != ',')
1889 break;
1890 p = p1 + 1;
1892 return mask;
1895 void cpu_abort(CPUState *env, const char *fmt, ...)
1897 va_list ap;
1898 va_list ap2;
1900 va_start(ap, fmt);
1901 va_copy(ap2, ap);
1902 fprintf(stderr, "qemu: fatal: ");
1903 vfprintf(stderr, fmt, ap);
1904 fprintf(stderr, "\n");
1905 #ifdef TARGET_I386
1906 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1907 #else
1908 cpu_dump_state(env, stderr, fprintf, 0);
1909 #endif
1910 if (qemu_log_enabled()) {
1911 qemu_log("qemu: fatal: ");
1912 qemu_log_vprintf(fmt, ap2);
1913 qemu_log("\n");
1914 #ifdef TARGET_I386
1915 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1916 #else
1917 log_cpu_state(env, 0);
1918 #endif
1919 qemu_log_flush();
1920 qemu_log_close();
1922 va_end(ap2);
1923 va_end(ap);
1924 #if defined(CONFIG_USER_ONLY)
1926 struct sigaction act;
1927 sigfillset(&act.sa_mask);
1928 act.sa_handler = SIG_DFL;
1929 sigaction(SIGABRT, &act, NULL);
1931 #endif
1932 abort();
1935 CPUState *cpu_copy(CPUState *env)
1937 CPUState *new_env = cpu_init(env->cpu_model_str);
1938 CPUState *next_cpu = new_env->next_cpu;
1939 int cpu_index = new_env->cpu_index;
1940 #if defined(TARGET_HAS_ICE)
1941 CPUBreakpoint *bp;
1942 CPUWatchpoint *wp;
1943 #endif
1945 memcpy(new_env, env, sizeof(CPUState));
1947 /* Preserve chaining and index. */
1948 new_env->next_cpu = next_cpu;
1949 new_env->cpu_index = cpu_index;
1951 /* Clone all break/watchpoints.
1952 Note: Once we support ptrace with hw-debug register access, make sure
1953 BP_CPU break/watchpoints are handled correctly on clone. */
1954 QTAILQ_INIT(&env->breakpoints);
1955 QTAILQ_INIT(&env->watchpoints);
1956 #if defined(TARGET_HAS_ICE)
1957 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1958 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1960 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1961 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1962 wp->flags, NULL);
1964 #endif
1966 return new_env;
1969 #if !defined(CONFIG_USER_ONLY)
1971 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1973 unsigned int i;
1975 /* Discard jump cache entries for any tb which might potentially
1976 overlap the flushed page. */
1977 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1978 memset (&env->tb_jmp_cache[i], 0,
1979 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1981 i = tb_jmp_cache_hash_page(addr);
1982 memset (&env->tb_jmp_cache[i], 0,
1983 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1986 static CPUTLBEntry s_cputlb_empty_entry = {
1987 .addr_read = -1,
1988 .addr_write = -1,
1989 .addr_code = -1,
1990 .addend = -1,
1993 /* NOTE: if flush_global is true, also flush global entries (not
1994 implemented yet) */
1995 void tlb_flush(CPUState *env, int flush_global)
1997 int i;
1999 #if defined(DEBUG_TLB)
2000 printf("tlb_flush:\n");
2001 #endif
2002 /* must reset current TB so that interrupts cannot modify the
2003 links while we are modifying them */
2004 env->current_tb = NULL;
2006 for(i = 0; i < CPU_TLB_SIZE; i++) {
2007 int mmu_idx;
2008 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2009 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2013 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2015 env->tlb_flush_addr = -1;
2016 env->tlb_flush_mask = 0;
2017 tlb_flush_count++;
2020 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2022 if (addr == (tlb_entry->addr_read &
2023 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2024 addr == (tlb_entry->addr_write &
2025 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2026 addr == (tlb_entry->addr_code &
2027 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2028 *tlb_entry = s_cputlb_empty_entry;
2032 void tlb_flush_page(CPUState *env, target_ulong addr)
2034 int i;
2035 int mmu_idx;
2037 #if defined(DEBUG_TLB)
2038 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2039 #endif
2040 /* Check if we need to flush due to large pages. */
2041 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2042 #if defined(DEBUG_TLB)
2043 printf("tlb_flush_page: forced full flush ("
2044 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2045 env->tlb_flush_addr, env->tlb_flush_mask);
2046 #endif
2047 tlb_flush(env, 1);
2048 return;
2050 /* must reset current TB so that interrupts cannot modify the
2051 links while we are modifying them */
2052 env->current_tb = NULL;
2054 addr &= TARGET_PAGE_MASK;
2055 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2056 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2057 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2059 tlb_flush_jmp_cache(env, addr);
2062 /* update the TLBs so that writes to code in the virtual page 'addr'
2063 can be detected */
2064 static void tlb_protect_code(ram_addr_t ram_addr)
2066 cpu_physical_memory_reset_dirty(ram_addr,
2067 ram_addr + TARGET_PAGE_SIZE,
2068 CODE_DIRTY_FLAG);
2071 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2072 tested for self modifying code */
2073 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2074 target_ulong vaddr)
2076 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2079 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2080 unsigned long start, unsigned long length)
2082 unsigned long addr;
2083 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2084 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2085 if ((addr - start) < length) {
2086 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2091 /* Note: start and end must be within the same ram block. */
2092 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2093 int dirty_flags)
2095 CPUState *env;
2096 unsigned long length, start1;
2097 int i;
2099 start &= TARGET_PAGE_MASK;
2100 end = TARGET_PAGE_ALIGN(end);
2102 length = end - start;
2103 if (length == 0)
2104 return;
2105 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2107 /* we modify the TLB cache so that the dirty bit will be set again
2108 when accessing the range */
2109 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2110 /* Check that we don't span multiple blocks - this breaks the
2111 address comparisons below. */
2112 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2113 != (end - 1) - start) {
2114 abort();
2117 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2118 int mmu_idx;
2119 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2120 for(i = 0; i < CPU_TLB_SIZE; i++)
2121 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2122 start1, length);
2127 int cpu_physical_memory_set_dirty_tracking(int enable)
2129 int ret = 0;
2130 in_migration = enable;
2131 ret = cpu_notify_migration_log(!!enable);
2132 return ret;
2135 int cpu_physical_memory_get_dirty_tracking(void)
2137 return in_migration;
2140 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2141 target_phys_addr_t end_addr)
2143 int ret;
2145 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2146 return ret;
2149 int cpu_physical_log_start(target_phys_addr_t start_addr,
2150 ram_addr_t size)
2152 CPUPhysMemoryClient *client;
2153 QLIST_FOREACH(client, &memory_client_list, list) {
2154 if (client->log_start) {
2155 int r = client->log_start(client, start_addr, size);
2156 if (r < 0) {
2157 return r;
2161 return 0;
2164 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2165 ram_addr_t size)
2167 CPUPhysMemoryClient *client;
2168 QLIST_FOREACH(client, &memory_client_list, list) {
2169 if (client->log_stop) {
2170 int r = client->log_stop(client, start_addr, size);
2171 if (r < 0) {
2172 return r;
2176 return 0;
2179 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2181 ram_addr_t ram_addr;
2182 void *p;
2184 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2185 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2186 + tlb_entry->addend);
2187 ram_addr = qemu_ram_addr_from_host_nofail(p);
2188 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2189 tlb_entry->addr_write |= TLB_NOTDIRTY;
2194 /* update the TLB according to the current state of the dirty bits */
2195 void cpu_tlb_update_dirty(CPUState *env)
2197 int i;
2198 int mmu_idx;
2199 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2200 for(i = 0; i < CPU_TLB_SIZE; i++)
2201 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2205 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2207 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2208 tlb_entry->addr_write = vaddr;
2211 /* update the TLB corresponding to virtual page vaddr
2212 so that it is no longer dirty */
2213 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2215 int i;
2216 int mmu_idx;
2218 vaddr &= TARGET_PAGE_MASK;
2219 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2220 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2221 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2224 /* Our TLB does not support large pages, so remember the area covered by
2225 large pages and trigger a full TLB flush if these are invalidated. */
2226 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2227 target_ulong size)
2229 target_ulong mask = ~(size - 1);
2231 if (env->tlb_flush_addr == (target_ulong)-1) {
2232 env->tlb_flush_addr = vaddr & mask;
2233 env->tlb_flush_mask = mask;
2234 return;
2236 /* Extend the existing region to include the new page.
2237 This is a compromise between unnecessary flushes and the cost
2238 of maintaining a full variable size TLB. */
2239 mask &= env->tlb_flush_mask;
2240 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2241 mask <<= 1;
2243 env->tlb_flush_addr &= mask;
2244 env->tlb_flush_mask = mask;
2247 /* Add a new TLB entry. At most one entry for a given virtual address
2248 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2249 supplied size is only used by tlb_flush_page. */
2250 void tlb_set_page(CPUState *env, target_ulong vaddr,
2251 target_phys_addr_t paddr, int prot,
2252 int mmu_idx, target_ulong size)
2254 PhysPageDesc *p;
2255 unsigned long pd;
2256 unsigned int index;
2257 target_ulong address;
2258 target_ulong code_address;
2259 unsigned long addend;
2260 CPUTLBEntry *te;
2261 CPUWatchpoint *wp;
2262 target_phys_addr_t iotlb;
2264 assert(size >= TARGET_PAGE_SIZE);
2265 if (size != TARGET_PAGE_SIZE) {
2266 tlb_add_large_page(env, vaddr, size);
2268 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2269 if (!p) {
2270 pd = IO_MEM_UNASSIGNED;
2271 } else {
2272 pd = p->phys_offset;
2274 #if defined(DEBUG_TLB)
2275 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2276 " prot=%x idx=%d pd=0x%08lx\n",
2277 vaddr, paddr, prot, mmu_idx, pd);
2278 #endif
2280 address = vaddr;
2281 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2282 /* IO memory case (romd handled later) */
2283 address |= TLB_MMIO;
2285 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2286 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2287 /* Normal RAM. */
2288 iotlb = pd & TARGET_PAGE_MASK;
2289 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2290 iotlb |= IO_MEM_NOTDIRTY;
2291 else
2292 iotlb |= IO_MEM_ROM;
2293 } else {
2294 /* IO handlers are currently passed a physical address.
2295 It would be nice to pass an offset from the base address
2296 of that region. This would avoid having to special case RAM,
2297 and avoid full address decoding in every device.
2298 We can't use the high bits of pd for this because
2299 IO_MEM_ROMD uses these as a ram address. */
2300 iotlb = (pd & ~TARGET_PAGE_MASK);
2301 if (p) {
2302 iotlb += p->region_offset;
2303 } else {
2304 iotlb += paddr;
2308 code_address = address;
2309 /* Make accesses to pages with watchpoints go via the
2310 watchpoint trap routines. */
2311 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2312 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2313 /* Avoid trapping reads of pages with a write breakpoint. */
2314 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2315 iotlb = io_mem_watch + paddr;
2316 address |= TLB_MMIO;
2317 break;
2322 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2323 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2324 te = &env->tlb_table[mmu_idx][index];
2325 te->addend = addend - vaddr;
2326 if (prot & PAGE_READ) {
2327 te->addr_read = address;
2328 } else {
2329 te->addr_read = -1;
2332 if (prot & PAGE_EXEC) {
2333 te->addr_code = code_address;
2334 } else {
2335 te->addr_code = -1;
2337 if (prot & PAGE_WRITE) {
2338 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2339 (pd & IO_MEM_ROMD)) {
2340 /* Write access calls the I/O callback. */
2341 te->addr_write = address | TLB_MMIO;
2342 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2343 !cpu_physical_memory_is_dirty(pd)) {
2344 te->addr_write = address | TLB_NOTDIRTY;
2345 } else {
2346 te->addr_write = address;
2348 } else {
2349 te->addr_write = -1;
2353 #else
2355 void tlb_flush(CPUState *env, int flush_global)
2359 void tlb_flush_page(CPUState *env, target_ulong addr)
2364 * Walks guest process memory "regions" one by one
2365 * and calls callback function 'fn' for each region.
2368 struct walk_memory_regions_data
2370 walk_memory_regions_fn fn;
2371 void *priv;
2372 unsigned long start;
2373 int prot;
2376 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2377 abi_ulong end, int new_prot)
2379 if (data->start != -1ul) {
2380 int rc = data->fn(data->priv, data->start, end, data->prot);
2381 if (rc != 0) {
2382 return rc;
2386 data->start = (new_prot ? end : -1ul);
2387 data->prot = new_prot;
2389 return 0;
2392 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2393 abi_ulong base, int level, void **lp)
2395 abi_ulong pa;
2396 int i, rc;
2398 if (*lp == NULL) {
2399 return walk_memory_regions_end(data, base, 0);
2402 if (level == 0) {
2403 PageDesc *pd = *lp;
2404 for (i = 0; i < L2_SIZE; ++i) {
2405 int prot = pd[i].flags;
2407 pa = base | (i << TARGET_PAGE_BITS);
2408 if (prot != data->prot) {
2409 rc = walk_memory_regions_end(data, pa, prot);
2410 if (rc != 0) {
2411 return rc;
2415 } else {
2416 void **pp = *lp;
2417 for (i = 0; i < L2_SIZE; ++i) {
2418 pa = base | ((abi_ulong)i <<
2419 (TARGET_PAGE_BITS + L2_BITS * level));
2420 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2421 if (rc != 0) {
2422 return rc;
2427 return 0;
2430 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2432 struct walk_memory_regions_data data;
2433 unsigned long i;
2435 data.fn = fn;
2436 data.priv = priv;
2437 data.start = -1ul;
2438 data.prot = 0;
2440 for (i = 0; i < V_L1_SIZE; i++) {
2441 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2442 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2443 if (rc != 0) {
2444 return rc;
2448 return walk_memory_regions_end(&data, 0, 0);
2451 static int dump_region(void *priv, abi_ulong start,
2452 abi_ulong end, unsigned long prot)
2454 FILE *f = (FILE *)priv;
2456 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2457 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2458 start, end, end - start,
2459 ((prot & PAGE_READ) ? 'r' : '-'),
2460 ((prot & PAGE_WRITE) ? 'w' : '-'),
2461 ((prot & PAGE_EXEC) ? 'x' : '-'));
2463 return (0);
2466 /* dump memory mappings */
2467 void page_dump(FILE *f)
2469 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2470 "start", "end", "size", "prot");
2471 walk_memory_regions(f, dump_region);
2474 int page_get_flags(target_ulong address)
2476 PageDesc *p;
2478 p = page_find(address >> TARGET_PAGE_BITS);
2479 if (!p)
2480 return 0;
2481 return p->flags;
2484 /* Modify the flags of a page and invalidate the code if necessary.
2485 The flag PAGE_WRITE_ORG is positioned automatically depending
2486 on PAGE_WRITE. The mmap_lock should already be held. */
2487 void page_set_flags(target_ulong start, target_ulong end, int flags)
2489 target_ulong addr, len;
2491 /* This function should never be called with addresses outside the
2492 guest address space. If this assert fires, it probably indicates
2493 a missing call to h2g_valid. */
2494 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2495 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2496 #endif
2497 assert(start < end);
2499 start = start & TARGET_PAGE_MASK;
2500 end = TARGET_PAGE_ALIGN(end);
2502 if (flags & PAGE_WRITE) {
2503 flags |= PAGE_WRITE_ORG;
2506 for (addr = start, len = end - start;
2507 len != 0;
2508 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2509 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2511 /* If the write protection bit is set, then we invalidate
2512 the code inside. */
2513 if (!(p->flags & PAGE_WRITE) &&
2514 (flags & PAGE_WRITE) &&
2515 p->first_tb) {
2516 tb_invalidate_phys_page(addr, 0, NULL);
2518 p->flags = flags;
2522 int page_check_range(target_ulong start, target_ulong len, int flags)
2524 PageDesc *p;
2525 target_ulong end;
2526 target_ulong addr;
2528 /* This function should never be called with addresses outside the
2529 guest address space. If this assert fires, it probably indicates
2530 a missing call to h2g_valid. */
2531 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2532 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2533 #endif
2535 if (len == 0) {
2536 return 0;
2538 if (start + len - 1 < start) {
2539 /* We've wrapped around. */
2540 return -1;
2543 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2544 start = start & TARGET_PAGE_MASK;
2546 for (addr = start, len = end - start;
2547 len != 0;
2548 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2549 p = page_find(addr >> TARGET_PAGE_BITS);
2550 if( !p )
2551 return -1;
2552 if( !(p->flags & PAGE_VALID) )
2553 return -1;
2555 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2556 return -1;
2557 if (flags & PAGE_WRITE) {
2558 if (!(p->flags & PAGE_WRITE_ORG))
2559 return -1;
2560 /* unprotect the page if it was put read-only because it
2561 contains translated code */
2562 if (!(p->flags & PAGE_WRITE)) {
2563 if (!page_unprotect(addr, 0, NULL))
2564 return -1;
2566 return 0;
2569 return 0;
2572 /* called from signal handler: invalidate the code and unprotect the
2573 page. Return TRUE if the fault was successfully handled. */
2574 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2576 unsigned int prot;
2577 PageDesc *p;
2578 target_ulong host_start, host_end, addr;
2580 /* Technically this isn't safe inside a signal handler. However we
2581 know this only ever happens in a synchronous SEGV handler, so in
2582 practice it seems to be ok. */
2583 mmap_lock();
2585 p = page_find(address >> TARGET_PAGE_BITS);
2586 if (!p) {
2587 mmap_unlock();
2588 return 0;
2591 /* if the page was really writable, then we change its
2592 protection back to writable */
2593 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2594 host_start = address & qemu_host_page_mask;
2595 host_end = host_start + qemu_host_page_size;
2597 prot = 0;
2598 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2599 p = page_find(addr >> TARGET_PAGE_BITS);
2600 p->flags |= PAGE_WRITE;
2601 prot |= p->flags;
2603 /* and since the content will be modified, we must invalidate
2604 the corresponding translated code. */
2605 tb_invalidate_phys_page(addr, pc, puc);
2606 #ifdef DEBUG_TB_CHECK
2607 tb_invalidate_check(addr);
2608 #endif
2610 mprotect((void *)g2h(host_start), qemu_host_page_size,
2611 prot & PAGE_BITS);
2613 mmap_unlock();
2614 return 1;
2616 mmap_unlock();
2617 return 0;
2620 static inline void tlb_set_dirty(CPUState *env,
2621 unsigned long addr, target_ulong vaddr)
2624 #endif /* defined(CONFIG_USER_ONLY) */
2626 #if !defined(CONFIG_USER_ONLY)
2628 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2629 typedef struct subpage_t {
2630 target_phys_addr_t base;
2631 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2632 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2633 } subpage_t;
2635 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2636 ram_addr_t memory, ram_addr_t region_offset);
2637 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2638 ram_addr_t orig_memory,
2639 ram_addr_t region_offset);
2640 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2641 need_subpage) \
2642 do { \
2643 if (addr > start_addr) \
2644 start_addr2 = 0; \
2645 else { \
2646 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2647 if (start_addr2 > 0) \
2648 need_subpage = 1; \
2651 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2652 end_addr2 = TARGET_PAGE_SIZE - 1; \
2653 else { \
2654 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2655 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2656 need_subpage = 1; \
2658 } while (0)
2660 /* register physical memory.
2661 For RAM, 'size' must be a multiple of the target page size.
2662 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2663 io memory page. The address used when calling the IO function is
2664 the offset from the start of the region, plus region_offset. Both
2665 start_addr and region_offset are rounded down to a page boundary
2666 before calculating this offset. This should not be a problem unless
2667 the low bits of start_addr and region_offset differ. */
2668 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2669 ram_addr_t size,
2670 ram_addr_t phys_offset,
2671 ram_addr_t region_offset,
2672 bool log_dirty)
2674 target_phys_addr_t addr, end_addr;
2675 PhysPageDesc *p;
2676 CPUState *env;
2677 ram_addr_t orig_size = size;
2678 subpage_t *subpage;
2680 assert(size);
2681 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2683 if (phys_offset == IO_MEM_UNASSIGNED) {
2684 region_offset = start_addr;
2686 region_offset &= TARGET_PAGE_MASK;
2687 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2688 end_addr = start_addr + (target_phys_addr_t)size;
2690 addr = start_addr;
2691 do {
2692 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2693 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2694 ram_addr_t orig_memory = p->phys_offset;
2695 target_phys_addr_t start_addr2, end_addr2;
2696 int need_subpage = 0;
2698 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2699 need_subpage);
2700 if (need_subpage) {
2701 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2702 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2703 &p->phys_offset, orig_memory,
2704 p->region_offset);
2705 } else {
2706 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2707 >> IO_MEM_SHIFT];
2709 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2710 region_offset);
2711 p->region_offset = 0;
2712 } else {
2713 p->phys_offset = phys_offset;
2714 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2715 (phys_offset & IO_MEM_ROMD))
2716 phys_offset += TARGET_PAGE_SIZE;
2718 } else {
2719 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2720 p->phys_offset = phys_offset;
2721 p->region_offset = region_offset;
2722 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2723 (phys_offset & IO_MEM_ROMD)) {
2724 phys_offset += TARGET_PAGE_SIZE;
2725 } else {
2726 target_phys_addr_t start_addr2, end_addr2;
2727 int need_subpage = 0;
2729 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2730 end_addr2, need_subpage);
2732 if (need_subpage) {
2733 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2734 &p->phys_offset, IO_MEM_UNASSIGNED,
2735 addr & TARGET_PAGE_MASK);
2736 subpage_register(subpage, start_addr2, end_addr2,
2737 phys_offset, region_offset);
2738 p->region_offset = 0;
2742 region_offset += TARGET_PAGE_SIZE;
2743 addr += TARGET_PAGE_SIZE;
2744 } while (addr != end_addr);
2746 /* since each CPU stores ram addresses in its TLB cache, we must
2747 reset the modified entries */
2748 /* XXX: slow ! */
2749 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2750 tlb_flush(env, 1);
2754 /* XXX: temporary until new memory mapping API */
2755 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2757 PhysPageDesc *p;
2759 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2760 if (!p)
2761 return IO_MEM_UNASSIGNED;
2762 return p->phys_offset;
2765 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2767 if (kvm_enabled())
2768 kvm_coalesce_mmio_region(addr, size);
2771 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2773 if (kvm_enabled())
2774 kvm_uncoalesce_mmio_region(addr, size);
2777 void qemu_flush_coalesced_mmio_buffer(void)
2779 if (kvm_enabled())
2780 kvm_flush_coalesced_mmio_buffer();
2783 #if defined(__linux__) && !defined(TARGET_S390X)
2785 #include <sys/vfs.h>
2787 #define HUGETLBFS_MAGIC 0x958458f6
2789 static long gethugepagesize(const char *path)
2791 struct statfs fs;
2792 int ret;
2794 do {
2795 ret = statfs(path, &fs);
2796 } while (ret != 0 && errno == EINTR);
2798 if (ret != 0) {
2799 perror(path);
2800 return 0;
2803 if (fs.f_type != HUGETLBFS_MAGIC)
2804 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2806 return fs.f_bsize;
2809 static void *file_ram_alloc(RAMBlock *block,
2810 ram_addr_t memory,
2811 const char *path)
2813 char *filename;
2814 void *area;
2815 int fd;
2816 #ifdef MAP_POPULATE
2817 int flags;
2818 #endif
2819 unsigned long hpagesize;
2821 hpagesize = gethugepagesize(path);
2822 if (!hpagesize) {
2823 return NULL;
2826 if (memory < hpagesize) {
2827 return NULL;
2830 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2831 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2832 return NULL;
2835 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2836 return NULL;
2839 fd = mkstemp(filename);
2840 if (fd < 0) {
2841 perror("unable to create backing store for hugepages");
2842 free(filename);
2843 return NULL;
2845 unlink(filename);
2846 free(filename);
2848 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2851 * ftruncate is not supported by hugetlbfs in older
2852 * hosts, so don't bother bailing out on errors.
2853 * If anything goes wrong with it under other filesystems,
2854 * mmap will fail.
2856 if (ftruncate(fd, memory))
2857 perror("ftruncate");
2859 #ifdef MAP_POPULATE
2860 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2861 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2862 * to sidestep this quirk.
2864 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2865 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2866 #else
2867 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2868 #endif
2869 if (area == MAP_FAILED) {
2870 perror("file_ram_alloc: can't mmap RAM pages");
2871 close(fd);
2872 return (NULL);
2874 block->fd = fd;
2875 return area;
2877 #endif
2879 static ram_addr_t find_ram_offset(ram_addr_t size)
2881 RAMBlock *block, *next_block;
2882 ram_addr_t offset = 0, mingap = RAM_ADDR_MAX;
2884 if (QLIST_EMPTY(&ram_list.blocks))
2885 return 0;
2887 QLIST_FOREACH(block, &ram_list.blocks, next) {
2888 ram_addr_t end, next = RAM_ADDR_MAX;
2890 end = block->offset + block->length;
2892 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2893 if (next_block->offset >= end) {
2894 next = MIN(next, next_block->offset);
2897 if (next - end >= size && next - end < mingap) {
2898 offset = end;
2899 mingap = next - end;
2902 return offset;
2905 static ram_addr_t last_ram_offset(void)
2907 RAMBlock *block;
2908 ram_addr_t last = 0;
2910 QLIST_FOREACH(block, &ram_list.blocks, next)
2911 last = MAX(last, block->offset + block->length);
2913 return last;
2916 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2917 ram_addr_t size, void *host)
2919 RAMBlock *new_block, *block;
2921 size = TARGET_PAGE_ALIGN(size);
2922 new_block = g_malloc0(sizeof(*new_block));
2924 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2925 char *id = dev->parent_bus->info->get_dev_path(dev);
2926 if (id) {
2927 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2928 g_free(id);
2931 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2933 QLIST_FOREACH(block, &ram_list.blocks, next) {
2934 if (!strcmp(block->idstr, new_block->idstr)) {
2935 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2936 new_block->idstr);
2937 abort();
2941 new_block->offset = find_ram_offset(size);
2942 if (host) {
2943 new_block->host = host;
2944 new_block->flags |= RAM_PREALLOC_MASK;
2945 } else {
2946 if (mem_path) {
2947 #if defined (__linux__) && !defined(TARGET_S390X)
2948 new_block->host = file_ram_alloc(new_block, size, mem_path);
2949 if (!new_block->host) {
2950 new_block->host = qemu_vmalloc(size);
2951 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2953 #else
2954 fprintf(stderr, "-mem-path option unsupported\n");
2955 exit(1);
2956 #endif
2957 } else {
2958 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2959 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2960 an system defined value, which is at least 256GB. Larger systems
2961 have larger values. We put the guest between the end of data
2962 segment (system break) and this value. We use 32GB as a base to
2963 have enough room for the system break to grow. */
2964 new_block->host = mmap((void*)0x800000000, size,
2965 PROT_EXEC|PROT_READ|PROT_WRITE,
2966 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2967 if (new_block->host == MAP_FAILED) {
2968 fprintf(stderr, "Allocating RAM failed\n");
2969 abort();
2971 #else
2972 if (xen_enabled()) {
2973 xen_ram_alloc(new_block->offset, size);
2974 } else {
2975 new_block->host = qemu_vmalloc(size);
2977 #endif
2978 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2981 new_block->length = size;
2983 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2985 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2986 last_ram_offset() >> TARGET_PAGE_BITS);
2987 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2988 0xff, size >> TARGET_PAGE_BITS);
2990 if (kvm_enabled())
2991 kvm_setup_guest_memory(new_block->host, size);
2993 return new_block->offset;
2996 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2998 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
3001 void qemu_ram_free_from_ptr(ram_addr_t addr)
3003 RAMBlock *block;
3005 QLIST_FOREACH(block, &ram_list.blocks, next) {
3006 if (addr == block->offset) {
3007 QLIST_REMOVE(block, next);
3008 g_free(block);
3009 return;
3014 void qemu_ram_free(ram_addr_t addr)
3016 RAMBlock *block;
3018 QLIST_FOREACH(block, &ram_list.blocks, next) {
3019 if (addr == block->offset) {
3020 QLIST_REMOVE(block, next);
3021 if (block->flags & RAM_PREALLOC_MASK) {
3023 } else if (mem_path) {
3024 #if defined (__linux__) && !defined(TARGET_S390X)
3025 if (block->fd) {
3026 munmap(block->host, block->length);
3027 close(block->fd);
3028 } else {
3029 qemu_vfree(block->host);
3031 #else
3032 abort();
3033 #endif
3034 } else {
3035 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3036 munmap(block->host, block->length);
3037 #else
3038 if (xen_enabled()) {
3039 xen_invalidate_map_cache_entry(block->host);
3040 } else {
3041 qemu_vfree(block->host);
3043 #endif
3045 g_free(block);
3046 return;
3052 #ifndef _WIN32
3053 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3055 RAMBlock *block;
3056 ram_addr_t offset;
3057 int flags;
3058 void *area, *vaddr;
3060 QLIST_FOREACH(block, &ram_list.blocks, next) {
3061 offset = addr - block->offset;
3062 if (offset < block->length) {
3063 vaddr = block->host + offset;
3064 if (block->flags & RAM_PREALLOC_MASK) {
3066 } else {
3067 flags = MAP_FIXED;
3068 munmap(vaddr, length);
3069 if (mem_path) {
3070 #if defined(__linux__) && !defined(TARGET_S390X)
3071 if (block->fd) {
3072 #ifdef MAP_POPULATE
3073 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3074 MAP_PRIVATE;
3075 #else
3076 flags |= MAP_PRIVATE;
3077 #endif
3078 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3079 flags, block->fd, offset);
3080 } else {
3081 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3082 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3083 flags, -1, 0);
3085 #else
3086 abort();
3087 #endif
3088 } else {
3089 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3090 flags |= MAP_SHARED | MAP_ANONYMOUS;
3091 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3092 flags, -1, 0);
3093 #else
3094 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3095 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3096 flags, -1, 0);
3097 #endif
3099 if (area != vaddr) {
3100 fprintf(stderr, "Could not remap addr: "
3101 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3102 length, addr);
3103 exit(1);
3105 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3107 return;
3111 #endif /* !_WIN32 */
3113 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3114 With the exception of the softmmu code in this file, this should
3115 only be used for local memory (e.g. video ram) that the device owns,
3116 and knows it isn't going to access beyond the end of the block.
3118 It should not be used for general purpose DMA.
3119 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3121 void *qemu_get_ram_ptr(ram_addr_t addr)
3123 RAMBlock *block;
3125 QLIST_FOREACH(block, &ram_list.blocks, next) {
3126 if (addr - block->offset < block->length) {
3127 /* Move this entry to to start of the list. */
3128 if (block != QLIST_FIRST(&ram_list.blocks)) {
3129 QLIST_REMOVE(block, next);
3130 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3132 if (xen_enabled()) {
3133 /* We need to check if the requested address is in the RAM
3134 * because we don't want to map the entire memory in QEMU.
3135 * In that case just map until the end of the page.
3137 if (block->offset == 0) {
3138 return xen_map_cache(addr, 0, 0);
3139 } else if (block->host == NULL) {
3140 block->host =
3141 xen_map_cache(block->offset, block->length, 1);
3144 return block->host + (addr - block->offset);
3148 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3149 abort();
3151 return NULL;
3154 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3155 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3157 void *qemu_safe_ram_ptr(ram_addr_t addr)
3159 RAMBlock *block;
3161 QLIST_FOREACH(block, &ram_list.blocks, next) {
3162 if (addr - block->offset < block->length) {
3163 if (xen_enabled()) {
3164 /* We need to check if the requested address is in the RAM
3165 * because we don't want to map the entire memory in QEMU.
3166 * In that case just map until the end of the page.
3168 if (block->offset == 0) {
3169 return xen_map_cache(addr, 0, 0);
3170 } else if (block->host == NULL) {
3171 block->host =
3172 xen_map_cache(block->offset, block->length, 1);
3175 return block->host + (addr - block->offset);
3179 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3180 abort();
3182 return NULL;
3185 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3186 * but takes a size argument */
3187 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3189 if (*size == 0) {
3190 return NULL;
3192 if (xen_enabled()) {
3193 return xen_map_cache(addr, *size, 1);
3194 } else {
3195 RAMBlock *block;
3197 QLIST_FOREACH(block, &ram_list.blocks, next) {
3198 if (addr - block->offset < block->length) {
3199 if (addr - block->offset + *size > block->length)
3200 *size = block->length - addr + block->offset;
3201 return block->host + (addr - block->offset);
3205 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3206 abort();
3210 void qemu_put_ram_ptr(void *addr)
3212 trace_qemu_put_ram_ptr(addr);
3215 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3217 RAMBlock *block;
3218 uint8_t *host = ptr;
3220 if (xen_enabled()) {
3221 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3222 return 0;
3225 QLIST_FOREACH(block, &ram_list.blocks, next) {
3226 /* This case append when the block is not mapped. */
3227 if (block->host == NULL) {
3228 continue;
3230 if (host - block->host < block->length) {
3231 *ram_addr = block->offset + (host - block->host);
3232 return 0;
3236 return -1;
3239 /* Some of the softmmu routines need to translate from a host pointer
3240 (typically a TLB entry) back to a ram offset. */
3241 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3243 ram_addr_t ram_addr;
3245 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3246 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3247 abort();
3249 return ram_addr;
3252 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3254 #ifdef DEBUG_UNASSIGNED
3255 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3256 #endif
3257 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3258 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3259 #endif
3260 return 0;
3263 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3265 #ifdef DEBUG_UNASSIGNED
3266 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3267 #endif
3268 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3269 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3270 #endif
3271 return 0;
3274 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3276 #ifdef DEBUG_UNASSIGNED
3277 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3278 #endif
3279 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3280 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3281 #endif
3282 return 0;
3285 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3287 #ifdef DEBUG_UNASSIGNED
3288 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3289 #endif
3290 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3291 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3292 #endif
3295 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3297 #ifdef DEBUG_UNASSIGNED
3298 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3299 #endif
3300 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3301 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3302 #endif
3305 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3307 #ifdef DEBUG_UNASSIGNED
3308 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3309 #endif
3310 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3311 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3312 #endif
3315 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3316 unassigned_mem_readb,
3317 unassigned_mem_readw,
3318 unassigned_mem_readl,
3321 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3322 unassigned_mem_writeb,
3323 unassigned_mem_writew,
3324 unassigned_mem_writel,
3327 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3328 uint32_t val)
3330 int dirty_flags;
3331 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3332 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3333 #if !defined(CONFIG_USER_ONLY)
3334 tb_invalidate_phys_page_fast(ram_addr, 1);
3335 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3336 #endif
3338 stb_p(qemu_get_ram_ptr(ram_addr), val);
3339 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3340 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3341 /* we remove the notdirty callback only if the code has been
3342 flushed */
3343 if (dirty_flags == 0xff)
3344 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3347 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3348 uint32_t val)
3350 int dirty_flags;
3351 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3352 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3353 #if !defined(CONFIG_USER_ONLY)
3354 tb_invalidate_phys_page_fast(ram_addr, 2);
3355 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3356 #endif
3358 stw_p(qemu_get_ram_ptr(ram_addr), val);
3359 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3360 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3361 /* we remove the notdirty callback only if the code has been
3362 flushed */
3363 if (dirty_flags == 0xff)
3364 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3367 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3368 uint32_t val)
3370 int dirty_flags;
3371 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3372 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3373 #if !defined(CONFIG_USER_ONLY)
3374 tb_invalidate_phys_page_fast(ram_addr, 4);
3375 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3376 #endif
3378 stl_p(qemu_get_ram_ptr(ram_addr), val);
3379 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3380 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3381 /* we remove the notdirty callback only if the code has been
3382 flushed */
3383 if (dirty_flags == 0xff)
3384 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3387 static CPUReadMemoryFunc * const error_mem_read[3] = {
3388 NULL, /* never used */
3389 NULL, /* never used */
3390 NULL, /* never used */
3393 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3394 notdirty_mem_writeb,
3395 notdirty_mem_writew,
3396 notdirty_mem_writel,
3399 /* Generate a debug exception if a watchpoint has been hit. */
3400 static void check_watchpoint(int offset, int len_mask, int flags)
3402 CPUState *env = cpu_single_env;
3403 target_ulong pc, cs_base;
3404 TranslationBlock *tb;
3405 target_ulong vaddr;
3406 CPUWatchpoint *wp;
3407 int cpu_flags;
3409 if (env->watchpoint_hit) {
3410 /* We re-entered the check after replacing the TB. Now raise
3411 * the debug interrupt so that is will trigger after the
3412 * current instruction. */
3413 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3414 return;
3416 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3417 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3418 if ((vaddr == (wp->vaddr & len_mask) ||
3419 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3420 wp->flags |= BP_WATCHPOINT_HIT;
3421 if (!env->watchpoint_hit) {
3422 env->watchpoint_hit = wp;
3423 tb = tb_find_pc(env->mem_io_pc);
3424 if (!tb) {
3425 cpu_abort(env, "check_watchpoint: could not find TB for "
3426 "pc=%p", (void *)env->mem_io_pc);
3428 cpu_restore_state(tb, env, env->mem_io_pc);
3429 tb_phys_invalidate(tb, -1);
3430 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3431 env->exception_index = EXCP_DEBUG;
3432 } else {
3433 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3434 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3436 cpu_resume_from_signal(env, NULL);
3438 } else {
3439 wp->flags &= ~BP_WATCHPOINT_HIT;
3444 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3445 so these check for a hit then pass through to the normal out-of-line
3446 phys routines. */
3447 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3449 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3450 return ldub_phys(addr);
3453 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3455 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3456 return lduw_phys(addr);
3459 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3461 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3462 return ldl_phys(addr);
3465 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3466 uint32_t val)
3468 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3469 stb_phys(addr, val);
3472 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3473 uint32_t val)
3475 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3476 stw_phys(addr, val);
3479 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3480 uint32_t val)
3482 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3483 stl_phys(addr, val);
3486 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3487 watch_mem_readb,
3488 watch_mem_readw,
3489 watch_mem_readl,
3492 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3493 watch_mem_writeb,
3494 watch_mem_writew,
3495 watch_mem_writel,
3498 static inline uint32_t subpage_readlen (subpage_t *mmio,
3499 target_phys_addr_t addr,
3500 unsigned int len)
3502 unsigned int idx = SUBPAGE_IDX(addr);
3503 #if defined(DEBUG_SUBPAGE)
3504 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3505 mmio, len, addr, idx);
3506 #endif
3508 addr += mmio->region_offset[idx];
3509 idx = mmio->sub_io_index[idx];
3510 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3513 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3514 uint32_t value, unsigned int len)
3516 unsigned int idx = SUBPAGE_IDX(addr);
3517 #if defined(DEBUG_SUBPAGE)
3518 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3519 __func__, mmio, len, addr, idx, value);
3520 #endif
3522 addr += mmio->region_offset[idx];
3523 idx = mmio->sub_io_index[idx];
3524 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3527 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3529 return subpage_readlen(opaque, addr, 0);
3532 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3533 uint32_t value)
3535 subpage_writelen(opaque, addr, value, 0);
3538 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3540 return subpage_readlen(opaque, addr, 1);
3543 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3544 uint32_t value)
3546 subpage_writelen(opaque, addr, value, 1);
3549 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3551 return subpage_readlen(opaque, addr, 2);
3554 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3555 uint32_t value)
3557 subpage_writelen(opaque, addr, value, 2);
3560 static CPUReadMemoryFunc * const subpage_read[] = {
3561 &subpage_readb,
3562 &subpage_readw,
3563 &subpage_readl,
3566 static CPUWriteMemoryFunc * const subpage_write[] = {
3567 &subpage_writeb,
3568 &subpage_writew,
3569 &subpage_writel,
3572 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3573 ram_addr_t memory, ram_addr_t region_offset)
3575 int idx, eidx;
3577 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3578 return -1;
3579 idx = SUBPAGE_IDX(start);
3580 eidx = SUBPAGE_IDX(end);
3581 #if defined(DEBUG_SUBPAGE)
3582 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3583 mmio, start, end, idx, eidx, memory);
3584 #endif
3585 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3586 memory = IO_MEM_UNASSIGNED;
3587 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3588 for (; idx <= eidx; idx++) {
3589 mmio->sub_io_index[idx] = memory;
3590 mmio->region_offset[idx] = region_offset;
3593 return 0;
3596 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3597 ram_addr_t orig_memory,
3598 ram_addr_t region_offset)
3600 subpage_t *mmio;
3601 int subpage_memory;
3603 mmio = g_malloc0(sizeof(subpage_t));
3605 mmio->base = base;
3606 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3607 DEVICE_NATIVE_ENDIAN);
3608 #if defined(DEBUG_SUBPAGE)
3609 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3610 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3611 #endif
3612 *phys = subpage_memory | IO_MEM_SUBPAGE;
3613 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3615 return mmio;
3618 static int get_free_io_mem_idx(void)
3620 int i;
3622 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3623 if (!io_mem_used[i]) {
3624 io_mem_used[i] = 1;
3625 return i;
3627 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3628 return -1;
3632 * Usually, devices operate in little endian mode. There are devices out
3633 * there that operate in big endian too. Each device gets byte swapped
3634 * mmio if plugged onto a CPU that does the other endianness.
3636 * CPU Device swap?
3638 * little little no
3639 * little big yes
3640 * big little yes
3641 * big big no
3644 typedef struct SwapEndianContainer {
3645 CPUReadMemoryFunc *read[3];
3646 CPUWriteMemoryFunc *write[3];
3647 void *opaque;
3648 } SwapEndianContainer;
3650 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3652 uint32_t val;
3653 SwapEndianContainer *c = opaque;
3654 val = c->read[0](c->opaque, addr);
3655 return val;
3658 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3660 uint32_t val;
3661 SwapEndianContainer *c = opaque;
3662 val = bswap16(c->read[1](c->opaque, addr));
3663 return val;
3666 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3668 uint32_t val;
3669 SwapEndianContainer *c = opaque;
3670 val = bswap32(c->read[2](c->opaque, addr));
3671 return val;
3674 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3675 swapendian_mem_readb,
3676 swapendian_mem_readw,
3677 swapendian_mem_readl
3680 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3681 uint32_t val)
3683 SwapEndianContainer *c = opaque;
3684 c->write[0](c->opaque, addr, val);
3687 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3688 uint32_t val)
3690 SwapEndianContainer *c = opaque;
3691 c->write[1](c->opaque, addr, bswap16(val));
3694 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3695 uint32_t val)
3697 SwapEndianContainer *c = opaque;
3698 c->write[2](c->opaque, addr, bswap32(val));
3701 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3702 swapendian_mem_writeb,
3703 swapendian_mem_writew,
3704 swapendian_mem_writel
3707 static void swapendian_init(int io_index)
3709 SwapEndianContainer *c = g_malloc(sizeof(SwapEndianContainer));
3710 int i;
3712 /* Swap mmio for big endian targets */
3713 c->opaque = io_mem_opaque[io_index];
3714 for (i = 0; i < 3; i++) {
3715 c->read[i] = io_mem_read[io_index][i];
3716 c->write[i] = io_mem_write[io_index][i];
3718 io_mem_read[io_index][i] = swapendian_readfn[i];
3719 io_mem_write[io_index][i] = swapendian_writefn[i];
3721 io_mem_opaque[io_index] = c;
3724 static void swapendian_del(int io_index)
3726 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3727 g_free(io_mem_opaque[io_index]);
3731 /* mem_read and mem_write are arrays of functions containing the
3732 function to access byte (index 0), word (index 1) and dword (index
3733 2). Functions can be omitted with a NULL function pointer.
3734 If io_index is non zero, the corresponding io zone is
3735 modified. If it is zero, a new io zone is allocated. The return
3736 value can be used with cpu_register_physical_memory(). (-1) is
3737 returned if error. */
3738 static int cpu_register_io_memory_fixed(int io_index,
3739 CPUReadMemoryFunc * const *mem_read,
3740 CPUWriteMemoryFunc * const *mem_write,
3741 void *opaque, enum device_endian endian)
3743 int i;
3745 if (io_index <= 0) {
3746 io_index = get_free_io_mem_idx();
3747 if (io_index == -1)
3748 return io_index;
3749 } else {
3750 io_index >>= IO_MEM_SHIFT;
3751 if (io_index >= IO_MEM_NB_ENTRIES)
3752 return -1;
3755 for (i = 0; i < 3; ++i) {
3756 io_mem_read[io_index][i]
3757 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3759 for (i = 0; i < 3; ++i) {
3760 io_mem_write[io_index][i]
3761 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3763 io_mem_opaque[io_index] = opaque;
3765 switch (endian) {
3766 case DEVICE_BIG_ENDIAN:
3767 #ifndef TARGET_WORDS_BIGENDIAN
3768 swapendian_init(io_index);
3769 #endif
3770 break;
3771 case DEVICE_LITTLE_ENDIAN:
3772 #ifdef TARGET_WORDS_BIGENDIAN
3773 swapendian_init(io_index);
3774 #endif
3775 break;
3776 case DEVICE_NATIVE_ENDIAN:
3777 default:
3778 break;
3781 return (io_index << IO_MEM_SHIFT);
3784 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3785 CPUWriteMemoryFunc * const *mem_write,
3786 void *opaque, enum device_endian endian)
3788 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3791 void cpu_unregister_io_memory(int io_table_address)
3793 int i;
3794 int io_index = io_table_address >> IO_MEM_SHIFT;
3796 swapendian_del(io_index);
3798 for (i=0;i < 3; i++) {
3799 io_mem_read[io_index][i] = unassigned_mem_read[i];
3800 io_mem_write[io_index][i] = unassigned_mem_write[i];
3802 io_mem_opaque[io_index] = NULL;
3803 io_mem_used[io_index] = 0;
3806 static void io_mem_init(void)
3808 int i;
3810 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3811 unassigned_mem_write, NULL,
3812 DEVICE_NATIVE_ENDIAN);
3813 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3814 unassigned_mem_write, NULL,
3815 DEVICE_NATIVE_ENDIAN);
3816 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3817 notdirty_mem_write, NULL,
3818 DEVICE_NATIVE_ENDIAN);
3819 for (i=0; i<5; i++)
3820 io_mem_used[i] = 1;
3822 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3823 watch_mem_write, NULL,
3824 DEVICE_NATIVE_ENDIAN);
3827 static void memory_map_init(void)
3829 system_memory = g_malloc(sizeof(*system_memory));
3830 memory_region_init(system_memory, "system", INT64_MAX);
3831 set_system_memory_map(system_memory);
3833 system_io = g_malloc(sizeof(*system_io));
3834 memory_region_init(system_io, "io", 65536);
3835 set_system_io_map(system_io);
3838 MemoryRegion *get_system_memory(void)
3840 return system_memory;
3843 MemoryRegion *get_system_io(void)
3845 return system_io;
3848 #endif /* !defined(CONFIG_USER_ONLY) */
3850 /* physical memory access (slow version, mainly for debug) */
3851 #if defined(CONFIG_USER_ONLY)
3852 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3853 uint8_t *buf, int len, int is_write)
3855 int l, flags;
3856 target_ulong page;
3857 void * p;
3859 while (len > 0) {
3860 page = addr & TARGET_PAGE_MASK;
3861 l = (page + TARGET_PAGE_SIZE) - addr;
3862 if (l > len)
3863 l = len;
3864 flags = page_get_flags(page);
3865 if (!(flags & PAGE_VALID))
3866 return -1;
3867 if (is_write) {
3868 if (!(flags & PAGE_WRITE))
3869 return -1;
3870 /* XXX: this code should not depend on lock_user */
3871 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3872 return -1;
3873 memcpy(p, buf, l);
3874 unlock_user(p, addr, l);
3875 } else {
3876 if (!(flags & PAGE_READ))
3877 return -1;
3878 /* XXX: this code should not depend on lock_user */
3879 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3880 return -1;
3881 memcpy(buf, p, l);
3882 unlock_user(p, addr, 0);
3884 len -= l;
3885 buf += l;
3886 addr += l;
3888 return 0;
3891 #else
3892 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3893 int len, int is_write)
3895 int l, io_index;
3896 uint8_t *ptr;
3897 uint32_t val;
3898 target_phys_addr_t page;
3899 ram_addr_t pd;
3900 PhysPageDesc *p;
3902 while (len > 0) {
3903 page = addr & TARGET_PAGE_MASK;
3904 l = (page + TARGET_PAGE_SIZE) - addr;
3905 if (l > len)
3906 l = len;
3907 p = phys_page_find(page >> TARGET_PAGE_BITS);
3908 if (!p) {
3909 pd = IO_MEM_UNASSIGNED;
3910 } else {
3911 pd = p->phys_offset;
3914 if (is_write) {
3915 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3916 target_phys_addr_t addr1 = addr;
3917 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3918 if (p)
3919 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3920 /* XXX: could force cpu_single_env to NULL to avoid
3921 potential bugs */
3922 if (l >= 4 && ((addr1 & 3) == 0)) {
3923 /* 32 bit write access */
3924 val = ldl_p(buf);
3925 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3926 l = 4;
3927 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3928 /* 16 bit write access */
3929 val = lduw_p(buf);
3930 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3931 l = 2;
3932 } else {
3933 /* 8 bit write access */
3934 val = ldub_p(buf);
3935 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3936 l = 1;
3938 } else {
3939 ram_addr_t addr1;
3940 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3941 /* RAM case */
3942 ptr = qemu_get_ram_ptr(addr1);
3943 memcpy(ptr, buf, l);
3944 if (!cpu_physical_memory_is_dirty(addr1)) {
3945 /* invalidate code */
3946 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3947 /* set dirty bit */
3948 cpu_physical_memory_set_dirty_flags(
3949 addr1, (0xff & ~CODE_DIRTY_FLAG));
3951 /* qemu doesn't execute guest code directly, but kvm does
3952 therefore flush instruction caches */
3953 if (kvm_enabled())
3954 flush_icache_range((unsigned long)ptr,
3955 ((unsigned long)ptr)+l);
3956 qemu_put_ram_ptr(ptr);
3958 } else {
3959 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3960 !(pd & IO_MEM_ROMD)) {
3961 target_phys_addr_t addr1 = addr;
3962 /* I/O case */
3963 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3964 if (p)
3965 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3966 if (l >= 4 && ((addr1 & 3) == 0)) {
3967 /* 32 bit read access */
3968 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3969 stl_p(buf, val);
3970 l = 4;
3971 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3972 /* 16 bit read access */
3973 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3974 stw_p(buf, val);
3975 l = 2;
3976 } else {
3977 /* 8 bit read access */
3978 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3979 stb_p(buf, val);
3980 l = 1;
3982 } else {
3983 /* RAM case */
3984 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3985 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3986 qemu_put_ram_ptr(ptr);
3989 len -= l;
3990 buf += l;
3991 addr += l;
3995 /* used for ROM loading : can write in RAM and ROM */
3996 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3997 const uint8_t *buf, int len)
3999 int l;
4000 uint8_t *ptr;
4001 target_phys_addr_t page;
4002 unsigned long pd;
4003 PhysPageDesc *p;
4005 while (len > 0) {
4006 page = addr & TARGET_PAGE_MASK;
4007 l = (page + TARGET_PAGE_SIZE) - addr;
4008 if (l > len)
4009 l = len;
4010 p = phys_page_find(page >> TARGET_PAGE_BITS);
4011 if (!p) {
4012 pd = IO_MEM_UNASSIGNED;
4013 } else {
4014 pd = p->phys_offset;
4017 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
4018 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
4019 !(pd & IO_MEM_ROMD)) {
4020 /* do nothing */
4021 } else {
4022 unsigned long addr1;
4023 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4024 /* ROM/RAM case */
4025 ptr = qemu_get_ram_ptr(addr1);
4026 memcpy(ptr, buf, l);
4027 qemu_put_ram_ptr(ptr);
4029 len -= l;
4030 buf += l;
4031 addr += l;
4035 typedef struct {
4036 void *buffer;
4037 target_phys_addr_t addr;
4038 target_phys_addr_t len;
4039 } BounceBuffer;
4041 static BounceBuffer bounce;
4043 typedef struct MapClient {
4044 void *opaque;
4045 void (*callback)(void *opaque);
4046 QLIST_ENTRY(MapClient) link;
4047 } MapClient;
4049 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4050 = QLIST_HEAD_INITIALIZER(map_client_list);
4052 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4054 MapClient *client = g_malloc(sizeof(*client));
4056 client->opaque = opaque;
4057 client->callback = callback;
4058 QLIST_INSERT_HEAD(&map_client_list, client, link);
4059 return client;
4062 void cpu_unregister_map_client(void *_client)
4064 MapClient *client = (MapClient *)_client;
4066 QLIST_REMOVE(client, link);
4067 g_free(client);
4070 static void cpu_notify_map_clients(void)
4072 MapClient *client;
4074 while (!QLIST_EMPTY(&map_client_list)) {
4075 client = QLIST_FIRST(&map_client_list);
4076 client->callback(client->opaque);
4077 cpu_unregister_map_client(client);
4081 /* Map a physical memory region into a host virtual address.
4082 * May map a subset of the requested range, given by and returned in *plen.
4083 * May return NULL if resources needed to perform the mapping are exhausted.
4084 * Use only for reads OR writes - not for read-modify-write operations.
4085 * Use cpu_register_map_client() to know when retrying the map operation is
4086 * likely to succeed.
4088 void *cpu_physical_memory_map(target_phys_addr_t addr,
4089 target_phys_addr_t *plen,
4090 int is_write)
4092 target_phys_addr_t len = *plen;
4093 target_phys_addr_t todo = 0;
4094 int l;
4095 target_phys_addr_t page;
4096 unsigned long pd;
4097 PhysPageDesc *p;
4098 ram_addr_t raddr = RAM_ADDR_MAX;
4099 ram_addr_t rlen;
4100 void *ret;
4102 while (len > 0) {
4103 page = addr & TARGET_PAGE_MASK;
4104 l = (page + TARGET_PAGE_SIZE) - addr;
4105 if (l > len)
4106 l = len;
4107 p = phys_page_find(page >> TARGET_PAGE_BITS);
4108 if (!p) {
4109 pd = IO_MEM_UNASSIGNED;
4110 } else {
4111 pd = p->phys_offset;
4114 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4115 if (todo || bounce.buffer) {
4116 break;
4118 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4119 bounce.addr = addr;
4120 bounce.len = l;
4121 if (!is_write) {
4122 cpu_physical_memory_read(addr, bounce.buffer, l);
4125 *plen = l;
4126 return bounce.buffer;
4128 if (!todo) {
4129 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4132 len -= l;
4133 addr += l;
4134 todo += l;
4136 rlen = todo;
4137 ret = qemu_ram_ptr_length(raddr, &rlen);
4138 *plen = rlen;
4139 return ret;
4142 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4143 * Will also mark the memory as dirty if is_write == 1. access_len gives
4144 * the amount of memory that was actually read or written by the caller.
4146 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4147 int is_write, target_phys_addr_t access_len)
4149 if (buffer != bounce.buffer) {
4150 if (is_write) {
4151 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4152 while (access_len) {
4153 unsigned l;
4154 l = TARGET_PAGE_SIZE;
4155 if (l > access_len)
4156 l = access_len;
4157 if (!cpu_physical_memory_is_dirty(addr1)) {
4158 /* invalidate code */
4159 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4160 /* set dirty bit */
4161 cpu_physical_memory_set_dirty_flags(
4162 addr1, (0xff & ~CODE_DIRTY_FLAG));
4164 addr1 += l;
4165 access_len -= l;
4168 if (xen_enabled()) {
4169 xen_invalidate_map_cache_entry(buffer);
4171 return;
4173 if (is_write) {
4174 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4176 qemu_vfree(bounce.buffer);
4177 bounce.buffer = NULL;
4178 cpu_notify_map_clients();
4181 /* warning: addr must be aligned */
4182 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4183 enum device_endian endian)
4185 int io_index;
4186 uint8_t *ptr;
4187 uint32_t val;
4188 unsigned long pd;
4189 PhysPageDesc *p;
4191 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4192 if (!p) {
4193 pd = IO_MEM_UNASSIGNED;
4194 } else {
4195 pd = p->phys_offset;
4198 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4199 !(pd & IO_MEM_ROMD)) {
4200 /* I/O case */
4201 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4202 if (p)
4203 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4204 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4205 #if defined(TARGET_WORDS_BIGENDIAN)
4206 if (endian == DEVICE_LITTLE_ENDIAN) {
4207 val = bswap32(val);
4209 #else
4210 if (endian == DEVICE_BIG_ENDIAN) {
4211 val = bswap32(val);
4213 #endif
4214 } else {
4215 /* RAM case */
4216 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4217 (addr & ~TARGET_PAGE_MASK);
4218 switch (endian) {
4219 case DEVICE_LITTLE_ENDIAN:
4220 val = ldl_le_p(ptr);
4221 break;
4222 case DEVICE_BIG_ENDIAN:
4223 val = ldl_be_p(ptr);
4224 break;
4225 default:
4226 val = ldl_p(ptr);
4227 break;
4230 return val;
4233 uint32_t ldl_phys(target_phys_addr_t addr)
4235 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4238 uint32_t ldl_le_phys(target_phys_addr_t addr)
4240 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4243 uint32_t ldl_be_phys(target_phys_addr_t addr)
4245 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4248 /* warning: addr must be aligned */
4249 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4250 enum device_endian endian)
4252 int io_index;
4253 uint8_t *ptr;
4254 uint64_t val;
4255 unsigned long pd;
4256 PhysPageDesc *p;
4258 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4259 if (!p) {
4260 pd = IO_MEM_UNASSIGNED;
4261 } else {
4262 pd = p->phys_offset;
4265 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4266 !(pd & IO_MEM_ROMD)) {
4267 /* I/O case */
4268 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4269 if (p)
4270 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4272 /* XXX This is broken when device endian != cpu endian.
4273 Fix and add "endian" variable check */
4274 #ifdef TARGET_WORDS_BIGENDIAN
4275 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4276 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4277 #else
4278 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4279 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4280 #endif
4281 } else {
4282 /* RAM case */
4283 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4284 (addr & ~TARGET_PAGE_MASK);
4285 switch (endian) {
4286 case DEVICE_LITTLE_ENDIAN:
4287 val = ldq_le_p(ptr);
4288 break;
4289 case DEVICE_BIG_ENDIAN:
4290 val = ldq_be_p(ptr);
4291 break;
4292 default:
4293 val = ldq_p(ptr);
4294 break;
4297 return val;
4300 uint64_t ldq_phys(target_phys_addr_t addr)
4302 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4305 uint64_t ldq_le_phys(target_phys_addr_t addr)
4307 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4310 uint64_t ldq_be_phys(target_phys_addr_t addr)
4312 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4315 /* XXX: optimize */
4316 uint32_t ldub_phys(target_phys_addr_t addr)
4318 uint8_t val;
4319 cpu_physical_memory_read(addr, &val, 1);
4320 return val;
4323 /* warning: addr must be aligned */
4324 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4325 enum device_endian endian)
4327 int io_index;
4328 uint8_t *ptr;
4329 uint64_t val;
4330 unsigned long pd;
4331 PhysPageDesc *p;
4333 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4334 if (!p) {
4335 pd = IO_MEM_UNASSIGNED;
4336 } else {
4337 pd = p->phys_offset;
4340 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4341 !(pd & IO_MEM_ROMD)) {
4342 /* I/O case */
4343 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4344 if (p)
4345 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4346 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4347 #if defined(TARGET_WORDS_BIGENDIAN)
4348 if (endian == DEVICE_LITTLE_ENDIAN) {
4349 val = bswap16(val);
4351 #else
4352 if (endian == DEVICE_BIG_ENDIAN) {
4353 val = bswap16(val);
4355 #endif
4356 } else {
4357 /* RAM case */
4358 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4359 (addr & ~TARGET_PAGE_MASK);
4360 switch (endian) {
4361 case DEVICE_LITTLE_ENDIAN:
4362 val = lduw_le_p(ptr);
4363 break;
4364 case DEVICE_BIG_ENDIAN:
4365 val = lduw_be_p(ptr);
4366 break;
4367 default:
4368 val = lduw_p(ptr);
4369 break;
4372 return val;
4375 uint32_t lduw_phys(target_phys_addr_t addr)
4377 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4380 uint32_t lduw_le_phys(target_phys_addr_t addr)
4382 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4385 uint32_t lduw_be_phys(target_phys_addr_t addr)
4387 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4390 /* warning: addr must be aligned. The ram page is not masked as dirty
4391 and the code inside is not invalidated. It is useful if the dirty
4392 bits are used to track modified PTEs */
4393 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4395 int io_index;
4396 uint8_t *ptr;
4397 unsigned long pd;
4398 PhysPageDesc *p;
4400 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4401 if (!p) {
4402 pd = IO_MEM_UNASSIGNED;
4403 } else {
4404 pd = p->phys_offset;
4407 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4408 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4409 if (p)
4410 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4411 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4412 } else {
4413 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4414 ptr = qemu_get_ram_ptr(addr1);
4415 stl_p(ptr, val);
4417 if (unlikely(in_migration)) {
4418 if (!cpu_physical_memory_is_dirty(addr1)) {
4419 /* invalidate code */
4420 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4421 /* set dirty bit */
4422 cpu_physical_memory_set_dirty_flags(
4423 addr1, (0xff & ~CODE_DIRTY_FLAG));
4429 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4431 int io_index;
4432 uint8_t *ptr;
4433 unsigned long pd;
4434 PhysPageDesc *p;
4436 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4437 if (!p) {
4438 pd = IO_MEM_UNASSIGNED;
4439 } else {
4440 pd = p->phys_offset;
4443 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4444 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4445 if (p)
4446 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4447 #ifdef TARGET_WORDS_BIGENDIAN
4448 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4449 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4450 #else
4451 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4452 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4453 #endif
4454 } else {
4455 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4456 (addr & ~TARGET_PAGE_MASK);
4457 stq_p(ptr, val);
4461 /* warning: addr must be aligned */
4462 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4463 enum device_endian endian)
4465 int io_index;
4466 uint8_t *ptr;
4467 unsigned long pd;
4468 PhysPageDesc *p;
4470 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4471 if (!p) {
4472 pd = IO_MEM_UNASSIGNED;
4473 } else {
4474 pd = p->phys_offset;
4477 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4478 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4479 if (p)
4480 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4481 #if defined(TARGET_WORDS_BIGENDIAN)
4482 if (endian == DEVICE_LITTLE_ENDIAN) {
4483 val = bswap32(val);
4485 #else
4486 if (endian == DEVICE_BIG_ENDIAN) {
4487 val = bswap32(val);
4489 #endif
4490 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4491 } else {
4492 unsigned long addr1;
4493 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4494 /* RAM case */
4495 ptr = qemu_get_ram_ptr(addr1);
4496 switch (endian) {
4497 case DEVICE_LITTLE_ENDIAN:
4498 stl_le_p(ptr, val);
4499 break;
4500 case DEVICE_BIG_ENDIAN:
4501 stl_be_p(ptr, val);
4502 break;
4503 default:
4504 stl_p(ptr, val);
4505 break;
4507 if (!cpu_physical_memory_is_dirty(addr1)) {
4508 /* invalidate code */
4509 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4510 /* set dirty bit */
4511 cpu_physical_memory_set_dirty_flags(addr1,
4512 (0xff & ~CODE_DIRTY_FLAG));
4517 void stl_phys(target_phys_addr_t addr, uint32_t val)
4519 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4522 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4524 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4527 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4529 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4532 /* XXX: optimize */
4533 void stb_phys(target_phys_addr_t addr, uint32_t val)
4535 uint8_t v = val;
4536 cpu_physical_memory_write(addr, &v, 1);
4539 /* warning: addr must be aligned */
4540 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4541 enum device_endian endian)
4543 int io_index;
4544 uint8_t *ptr;
4545 unsigned long pd;
4546 PhysPageDesc *p;
4548 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4549 if (!p) {
4550 pd = IO_MEM_UNASSIGNED;
4551 } else {
4552 pd = p->phys_offset;
4555 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4556 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4557 if (p)
4558 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4559 #if defined(TARGET_WORDS_BIGENDIAN)
4560 if (endian == DEVICE_LITTLE_ENDIAN) {
4561 val = bswap16(val);
4563 #else
4564 if (endian == DEVICE_BIG_ENDIAN) {
4565 val = bswap16(val);
4567 #endif
4568 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4569 } else {
4570 unsigned long addr1;
4571 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4572 /* RAM case */
4573 ptr = qemu_get_ram_ptr(addr1);
4574 switch (endian) {
4575 case DEVICE_LITTLE_ENDIAN:
4576 stw_le_p(ptr, val);
4577 break;
4578 case DEVICE_BIG_ENDIAN:
4579 stw_be_p(ptr, val);
4580 break;
4581 default:
4582 stw_p(ptr, val);
4583 break;
4585 if (!cpu_physical_memory_is_dirty(addr1)) {
4586 /* invalidate code */
4587 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4588 /* set dirty bit */
4589 cpu_physical_memory_set_dirty_flags(addr1,
4590 (0xff & ~CODE_DIRTY_FLAG));
4595 void stw_phys(target_phys_addr_t addr, uint32_t val)
4597 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4600 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4602 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4605 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4607 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4610 /* XXX: optimize */
4611 void stq_phys(target_phys_addr_t addr, uint64_t val)
4613 val = tswap64(val);
4614 cpu_physical_memory_write(addr, &val, 8);
4617 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4619 val = cpu_to_le64(val);
4620 cpu_physical_memory_write(addr, &val, 8);
4623 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4625 val = cpu_to_be64(val);
4626 cpu_physical_memory_write(addr, &val, 8);
4629 /* virtual memory access for debug (includes writing to ROM) */
4630 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4631 uint8_t *buf, int len, int is_write)
4633 int l;
4634 target_phys_addr_t phys_addr;
4635 target_ulong page;
4637 while (len > 0) {
4638 page = addr & TARGET_PAGE_MASK;
4639 phys_addr = cpu_get_phys_page_debug(env, page);
4640 /* if no physical page mapped, return an error */
4641 if (phys_addr == -1)
4642 return -1;
4643 l = (page + TARGET_PAGE_SIZE) - addr;
4644 if (l > len)
4645 l = len;
4646 phys_addr += (addr & ~TARGET_PAGE_MASK);
4647 if (is_write)
4648 cpu_physical_memory_write_rom(phys_addr, buf, l);
4649 else
4650 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4651 len -= l;
4652 buf += l;
4653 addr += l;
4655 return 0;
4657 #endif
4659 /* in deterministic execution mode, instructions doing device I/Os
4660 must be at the end of the TB */
4661 void cpu_io_recompile(CPUState *env, void *retaddr)
4663 TranslationBlock *tb;
4664 uint32_t n, cflags;
4665 target_ulong pc, cs_base;
4666 uint64_t flags;
4668 tb = tb_find_pc((unsigned long)retaddr);
4669 if (!tb) {
4670 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4671 retaddr);
4673 n = env->icount_decr.u16.low + tb->icount;
4674 cpu_restore_state(tb, env, (unsigned long)retaddr);
4675 /* Calculate how many instructions had been executed before the fault
4676 occurred. */
4677 n = n - env->icount_decr.u16.low;
4678 /* Generate a new TB ending on the I/O insn. */
4679 n++;
4680 /* On MIPS and SH, delay slot instructions can only be restarted if
4681 they were already the first instruction in the TB. If this is not
4682 the first instruction in a TB then re-execute the preceding
4683 branch. */
4684 #if defined(TARGET_MIPS)
4685 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4686 env->active_tc.PC -= 4;
4687 env->icount_decr.u16.low++;
4688 env->hflags &= ~MIPS_HFLAG_BMASK;
4690 #elif defined(TARGET_SH4)
4691 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4692 && n > 1) {
4693 env->pc -= 2;
4694 env->icount_decr.u16.low++;
4695 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4697 #endif
4698 /* This should never happen. */
4699 if (n > CF_COUNT_MASK)
4700 cpu_abort(env, "TB too big during recompile");
4702 cflags = n | CF_LAST_IO;
4703 pc = tb->pc;
4704 cs_base = tb->cs_base;
4705 flags = tb->flags;
4706 tb_phys_invalidate(tb, -1);
4707 /* FIXME: In theory this could raise an exception. In practice
4708 we have already translated the block once so it's probably ok. */
4709 tb_gen_code(env, pc, cs_base, flags, cflags);
4710 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4711 the first in the TB) then we end up generating a whole new TB and
4712 repeating the fault, which is horribly inefficient.
4713 Better would be to execute just this insn uncached, or generate a
4714 second new TB. */
4715 cpu_resume_from_signal(env, NULL);
4718 #if !defined(CONFIG_USER_ONLY)
4720 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4722 int i, target_code_size, max_target_code_size;
4723 int direct_jmp_count, direct_jmp2_count, cross_page;
4724 TranslationBlock *tb;
4726 target_code_size = 0;
4727 max_target_code_size = 0;
4728 cross_page = 0;
4729 direct_jmp_count = 0;
4730 direct_jmp2_count = 0;
4731 for(i = 0; i < nb_tbs; i++) {
4732 tb = &tbs[i];
4733 target_code_size += tb->size;
4734 if (tb->size > max_target_code_size)
4735 max_target_code_size = tb->size;
4736 if (tb->page_addr[1] != -1)
4737 cross_page++;
4738 if (tb->tb_next_offset[0] != 0xffff) {
4739 direct_jmp_count++;
4740 if (tb->tb_next_offset[1] != 0xffff) {
4741 direct_jmp2_count++;
4745 /* XXX: avoid using doubles ? */
4746 cpu_fprintf(f, "Translation buffer state:\n");
4747 cpu_fprintf(f, "gen code size %td/%ld\n",
4748 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4749 cpu_fprintf(f, "TB count %d/%d\n",
4750 nb_tbs, code_gen_max_blocks);
4751 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4752 nb_tbs ? target_code_size / nb_tbs : 0,
4753 max_target_code_size);
4754 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4755 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4756 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4757 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4758 cross_page,
4759 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4760 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4761 direct_jmp_count,
4762 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4763 direct_jmp2_count,
4764 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4765 cpu_fprintf(f, "\nStatistics:\n");
4766 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4767 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4768 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4769 #ifdef CONFIG_PROFILER
4770 tcg_dump_info(f, cpu_fprintf);
4771 #endif
4774 #define MMUSUFFIX _cmmu
4775 #define GETPC() NULL
4776 #define env cpu_single_env
4777 #define SOFTMMU_CODE_ACCESS
4779 #define SHIFT 0
4780 #include "softmmu_template.h"
4782 #define SHIFT 1
4783 #include "softmmu_template.h"
4785 #define SHIFT 2
4786 #include "softmmu_template.h"
4788 #define SHIFT 3
4789 #include "softmmu_template.h"
4791 #undef env
4793 #endif