kvm: switch kvm slots to use host virtual address instead of ram_addr_t
[qemu.git] / exec.c
blob36b61c91ac38a8279e6b012bc181fb3c266fe320
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #define WANT_EXEC_OBSOLETE
61 #include "exec-obsolete.h"
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
118 static MemoryRegion *system_memory;
119 static MemoryRegion *system_io;
121 #endif
123 CPUState *first_cpu;
124 /* current CPU in the current thread. It is only valid inside
125 cpu_exec() */
126 DEFINE_TLS(CPUState *,cpu_single_env);
127 /* 0 = Do not count executed instructions.
128 1 = Precise instruction counting.
129 2 = Adaptive rate instruction counting. */
130 int use_icount = 0;
132 typedef struct PageDesc {
133 /* list of TBs intersecting this ram page */
134 TranslationBlock *first_tb;
135 /* in order to optimize self modifying code, we count the number
136 of lookups we do to a given page to use a bitmap */
137 unsigned int code_write_count;
138 uint8_t *code_bitmap;
139 #if defined(CONFIG_USER_ONLY)
140 unsigned long flags;
141 #endif
142 } PageDesc;
144 /* In system mode we want L1_MAP to be based on ram offsets,
145 while in user mode we want it to be based on virtual addresses. */
146 #if !defined(CONFIG_USER_ONLY)
147 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
148 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
149 #else
150 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
151 #endif
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
154 #endif
156 /* Size of the L2 (and L3, etc) page tables. */
157 #define L2_BITS 10
158 #define L2_SIZE (1 << L2_BITS)
160 /* The bits remaining after N lower levels of page tables. */
161 #define P_L1_BITS_REM \
162 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
163 #define V_L1_BITS_REM \
164 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
166 /* Size of the L1 page table. Avoid silly small sizes. */
167 #if P_L1_BITS_REM < 4
168 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
169 #else
170 #define P_L1_BITS P_L1_BITS_REM
171 #endif
173 #if V_L1_BITS_REM < 4
174 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
175 #else
176 #define V_L1_BITS V_L1_BITS_REM
177 #endif
179 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
180 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
182 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
183 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
185 unsigned long qemu_real_host_page_size;
186 unsigned long qemu_host_page_size;
187 unsigned long qemu_host_page_mask;
189 /* This is a multi-level map on the virtual address space.
190 The bottom level has pointers to PageDesc. */
191 static void *l1_map[V_L1_SIZE];
193 #if !defined(CONFIG_USER_ONLY)
194 typedef struct PhysPageDesc {
195 /* offset in host memory of the page + io_index in the low bits */
196 ram_addr_t phys_offset;
197 ram_addr_t region_offset;
198 } PhysPageDesc;
200 /* This is a multi-level map on the physical address space.
201 The bottom level has pointers to PhysPageDesc. */
202 static void *l1_phys_map[P_L1_SIZE];
204 static void io_mem_init(void);
205 static void memory_map_init(void);
207 /* io memory support */
208 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
209 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
210 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
211 static char io_mem_used[IO_MEM_NB_ENTRIES];
212 static int io_mem_watch;
213 #endif
215 /* log support */
216 #ifdef WIN32
217 static const char *logfilename = "qemu.log";
218 #else
219 static const char *logfilename = "/tmp/qemu.log";
220 #endif
221 FILE *logfile;
222 int loglevel;
223 static int log_append = 0;
225 /* statistics */
226 #if !defined(CONFIG_USER_ONLY)
227 static int tlb_flush_count;
228 #endif
229 static int tb_flush_count;
230 static int tb_phys_invalidate_count;
232 #ifdef _WIN32
233 static void map_exec(void *addr, long size)
235 DWORD old_protect;
236 VirtualProtect(addr, size,
237 PAGE_EXECUTE_READWRITE, &old_protect);
240 #else
241 static void map_exec(void *addr, long size)
243 unsigned long start, end, page_size;
245 page_size = getpagesize();
246 start = (unsigned long)addr;
247 start &= ~(page_size - 1);
249 end = (unsigned long)addr + size;
250 end += page_size - 1;
251 end &= ~(page_size - 1);
253 mprotect((void *)start, end - start,
254 PROT_READ | PROT_WRITE | PROT_EXEC);
256 #endif
258 static void page_init(void)
260 /* NOTE: we can always suppose that qemu_host_page_size >=
261 TARGET_PAGE_SIZE */
262 #ifdef _WIN32
264 SYSTEM_INFO system_info;
266 GetSystemInfo(&system_info);
267 qemu_real_host_page_size = system_info.dwPageSize;
269 #else
270 qemu_real_host_page_size = getpagesize();
271 #endif
272 if (qemu_host_page_size == 0)
273 qemu_host_page_size = qemu_real_host_page_size;
274 if (qemu_host_page_size < TARGET_PAGE_SIZE)
275 qemu_host_page_size = TARGET_PAGE_SIZE;
276 qemu_host_page_mask = ~(qemu_host_page_size - 1);
278 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
280 #ifdef HAVE_KINFO_GETVMMAP
281 struct kinfo_vmentry *freep;
282 int i, cnt;
284 freep = kinfo_getvmmap(getpid(), &cnt);
285 if (freep) {
286 mmap_lock();
287 for (i = 0; i < cnt; i++) {
288 unsigned long startaddr, endaddr;
290 startaddr = freep[i].kve_start;
291 endaddr = freep[i].kve_end;
292 if (h2g_valid(startaddr)) {
293 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
295 if (h2g_valid(endaddr)) {
296 endaddr = h2g(endaddr);
297 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
298 } else {
299 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
300 endaddr = ~0ul;
301 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
302 #endif
306 free(freep);
307 mmap_unlock();
309 #else
310 FILE *f;
312 last_brk = (unsigned long)sbrk(0);
314 f = fopen("/compat/linux/proc/self/maps", "r");
315 if (f) {
316 mmap_lock();
318 do {
319 unsigned long startaddr, endaddr;
320 int n;
322 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
324 if (n == 2 && h2g_valid(startaddr)) {
325 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
327 if (h2g_valid(endaddr)) {
328 endaddr = h2g(endaddr);
329 } else {
330 endaddr = ~0ul;
332 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
334 } while (!feof(f));
336 fclose(f);
337 mmap_unlock();
339 #endif
341 #endif
344 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
346 PageDesc *pd;
347 void **lp;
348 int i;
350 #if defined(CONFIG_USER_ONLY)
351 /* We can't use g_malloc because it may recurse into a locked mutex. */
352 # define ALLOC(P, SIZE) \
353 do { \
354 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
355 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
356 } while (0)
357 #else
358 # define ALLOC(P, SIZE) \
359 do { P = g_malloc0(SIZE); } while (0)
360 #endif
362 /* Level 1. Always allocated. */
363 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
365 /* Level 2..N-1. */
366 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
367 void **p = *lp;
369 if (p == NULL) {
370 if (!alloc) {
371 return NULL;
373 ALLOC(p, sizeof(void *) * L2_SIZE);
374 *lp = p;
377 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
380 pd = *lp;
381 if (pd == NULL) {
382 if (!alloc) {
383 return NULL;
385 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
386 *lp = pd;
389 #undef ALLOC
391 return pd + (index & (L2_SIZE - 1));
394 static inline PageDesc *page_find(tb_page_addr_t index)
396 return page_find_alloc(index, 0);
399 #if !defined(CONFIG_USER_ONLY)
400 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
402 PhysPageDesc *pd;
403 void **lp;
404 int i;
406 /* Level 1. Always allocated. */
407 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
409 /* Level 2..N-1. */
410 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
411 void **p = *lp;
412 if (p == NULL) {
413 if (!alloc) {
414 return NULL;
416 *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
418 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
421 pd = *lp;
422 if (pd == NULL) {
423 int i;
424 int first_index = index & ~(L2_SIZE - 1);
426 if (!alloc) {
427 return NULL;
430 *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
432 for (i = 0; i < L2_SIZE; i++) {
433 pd[i].phys_offset = IO_MEM_UNASSIGNED;
434 pd[i].region_offset = (first_index + i) << TARGET_PAGE_BITS;
438 return pd + (index & (L2_SIZE - 1));
441 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
443 return phys_page_find_alloc(index, 0);
446 static void tlb_protect_code(ram_addr_t ram_addr);
447 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
448 target_ulong vaddr);
449 #define mmap_lock() do { } while(0)
450 #define mmap_unlock() do { } while(0)
451 #endif
453 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
455 #if defined(CONFIG_USER_ONLY)
456 /* Currently it is not recommended to allocate big chunks of data in
457 user mode. It will change when a dedicated libc will be used */
458 #define USE_STATIC_CODE_GEN_BUFFER
459 #endif
461 #ifdef USE_STATIC_CODE_GEN_BUFFER
462 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
463 __attribute__((aligned (CODE_GEN_ALIGN)));
464 #endif
466 static void code_gen_alloc(unsigned long tb_size)
468 #ifdef USE_STATIC_CODE_GEN_BUFFER
469 code_gen_buffer = static_code_gen_buffer;
470 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
471 map_exec(code_gen_buffer, code_gen_buffer_size);
472 #else
473 code_gen_buffer_size = tb_size;
474 if (code_gen_buffer_size == 0) {
475 #if defined(CONFIG_USER_ONLY)
476 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
477 #else
478 /* XXX: needs adjustments */
479 code_gen_buffer_size = (unsigned long)(ram_size / 4);
480 #endif
482 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
483 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
484 /* The code gen buffer location may have constraints depending on
485 the host cpu and OS */
486 #if defined(__linux__)
488 int flags;
489 void *start = NULL;
491 flags = MAP_PRIVATE | MAP_ANONYMOUS;
492 #if defined(__x86_64__)
493 flags |= MAP_32BIT;
494 /* Cannot map more than that */
495 if (code_gen_buffer_size > (800 * 1024 * 1024))
496 code_gen_buffer_size = (800 * 1024 * 1024);
497 #elif defined(__sparc_v9__)
498 // Map the buffer below 2G, so we can use direct calls and branches
499 flags |= MAP_FIXED;
500 start = (void *) 0x60000000UL;
501 if (code_gen_buffer_size > (512 * 1024 * 1024))
502 code_gen_buffer_size = (512 * 1024 * 1024);
503 #elif defined(__arm__)
504 /* Keep the buffer no bigger than 16GB to branch between blocks */
505 if (code_gen_buffer_size > 16 * 1024 * 1024)
506 code_gen_buffer_size = 16 * 1024 * 1024;
507 #elif defined(__s390x__)
508 /* Map the buffer so that we can use direct calls and branches. */
509 /* We have a +- 4GB range on the branches; leave some slop. */
510 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
511 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
513 start = (void *)0x90000000UL;
514 #endif
515 code_gen_buffer = mmap(start, code_gen_buffer_size,
516 PROT_WRITE | PROT_READ | PROT_EXEC,
517 flags, -1, 0);
518 if (code_gen_buffer == MAP_FAILED) {
519 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
520 exit(1);
523 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
524 || defined(__DragonFly__) || defined(__OpenBSD__) \
525 || defined(__NetBSD__)
527 int flags;
528 void *addr = NULL;
529 flags = MAP_PRIVATE | MAP_ANONYMOUS;
530 #if defined(__x86_64__)
531 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
532 * 0x40000000 is free */
533 flags |= MAP_FIXED;
534 addr = (void *)0x40000000;
535 /* Cannot map more than that */
536 if (code_gen_buffer_size > (800 * 1024 * 1024))
537 code_gen_buffer_size = (800 * 1024 * 1024);
538 #elif defined(__sparc_v9__)
539 // Map the buffer below 2G, so we can use direct calls and branches
540 flags |= MAP_FIXED;
541 addr = (void *) 0x60000000UL;
542 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
543 code_gen_buffer_size = (512 * 1024 * 1024);
545 #endif
546 code_gen_buffer = mmap(addr, code_gen_buffer_size,
547 PROT_WRITE | PROT_READ | PROT_EXEC,
548 flags, -1, 0);
549 if (code_gen_buffer == MAP_FAILED) {
550 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
551 exit(1);
554 #else
555 code_gen_buffer = g_malloc(code_gen_buffer_size);
556 map_exec(code_gen_buffer, code_gen_buffer_size);
557 #endif
558 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
559 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
560 code_gen_buffer_max_size = code_gen_buffer_size -
561 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
562 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
563 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
566 /* Must be called before using the QEMU cpus. 'tb_size' is the size
567 (in bytes) allocated to the translation buffer. Zero means default
568 size. */
569 void tcg_exec_init(unsigned long tb_size)
571 cpu_gen_init();
572 code_gen_alloc(tb_size);
573 code_gen_ptr = code_gen_buffer;
574 page_init();
575 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
576 /* There's no guest base to take into account, so go ahead and
577 initialize the prologue now. */
578 tcg_prologue_init(&tcg_ctx);
579 #endif
582 bool tcg_enabled(void)
584 return code_gen_buffer != NULL;
587 void cpu_exec_init_all(void)
589 #if !defined(CONFIG_USER_ONLY)
590 memory_map_init();
591 io_mem_init();
592 #endif
595 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
597 static int cpu_common_post_load(void *opaque, int version_id)
599 CPUState *env = opaque;
601 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
602 version_id is increased. */
603 env->interrupt_request &= ~0x01;
604 tlb_flush(env, 1);
606 return 0;
609 static const VMStateDescription vmstate_cpu_common = {
610 .name = "cpu_common",
611 .version_id = 1,
612 .minimum_version_id = 1,
613 .minimum_version_id_old = 1,
614 .post_load = cpu_common_post_load,
615 .fields = (VMStateField []) {
616 VMSTATE_UINT32(halted, CPUState),
617 VMSTATE_UINT32(interrupt_request, CPUState),
618 VMSTATE_END_OF_LIST()
621 #endif
623 CPUState *qemu_get_cpu(int cpu)
625 CPUState *env = first_cpu;
627 while (env) {
628 if (env->cpu_index == cpu)
629 break;
630 env = env->next_cpu;
633 return env;
636 void cpu_exec_init(CPUState *env)
638 CPUState **penv;
639 int cpu_index;
641 #if defined(CONFIG_USER_ONLY)
642 cpu_list_lock();
643 #endif
644 env->next_cpu = NULL;
645 penv = &first_cpu;
646 cpu_index = 0;
647 while (*penv != NULL) {
648 penv = &(*penv)->next_cpu;
649 cpu_index++;
651 env->cpu_index = cpu_index;
652 env->numa_node = 0;
653 QTAILQ_INIT(&env->breakpoints);
654 QTAILQ_INIT(&env->watchpoints);
655 #ifndef CONFIG_USER_ONLY
656 env->thread_id = qemu_get_thread_id();
657 #endif
658 *penv = env;
659 #if defined(CONFIG_USER_ONLY)
660 cpu_list_unlock();
661 #endif
662 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
663 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
664 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
665 cpu_save, cpu_load, env);
666 #endif
669 /* Allocate a new translation block. Flush the translation buffer if
670 too many translation blocks or too much generated code. */
671 static TranslationBlock *tb_alloc(target_ulong pc)
673 TranslationBlock *tb;
675 if (nb_tbs >= code_gen_max_blocks ||
676 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
677 return NULL;
678 tb = &tbs[nb_tbs++];
679 tb->pc = pc;
680 tb->cflags = 0;
681 return tb;
684 void tb_free(TranslationBlock *tb)
686 /* In practice this is mostly used for single use temporary TB
687 Ignore the hard cases and just back up if this TB happens to
688 be the last one generated. */
689 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
690 code_gen_ptr = tb->tc_ptr;
691 nb_tbs--;
695 static inline void invalidate_page_bitmap(PageDesc *p)
697 if (p->code_bitmap) {
698 g_free(p->code_bitmap);
699 p->code_bitmap = NULL;
701 p->code_write_count = 0;
704 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
706 static void page_flush_tb_1 (int level, void **lp)
708 int i;
710 if (*lp == NULL) {
711 return;
713 if (level == 0) {
714 PageDesc *pd = *lp;
715 for (i = 0; i < L2_SIZE; ++i) {
716 pd[i].first_tb = NULL;
717 invalidate_page_bitmap(pd + i);
719 } else {
720 void **pp = *lp;
721 for (i = 0; i < L2_SIZE; ++i) {
722 page_flush_tb_1 (level - 1, pp + i);
727 static void page_flush_tb(void)
729 int i;
730 for (i = 0; i < V_L1_SIZE; i++) {
731 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
735 /* flush all the translation blocks */
736 /* XXX: tb_flush is currently not thread safe */
737 void tb_flush(CPUState *env1)
739 CPUState *env;
740 #if defined(DEBUG_FLUSH)
741 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
742 (unsigned long)(code_gen_ptr - code_gen_buffer),
743 nb_tbs, nb_tbs > 0 ?
744 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
745 #endif
746 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
747 cpu_abort(env1, "Internal error: code buffer overflow\n");
749 nb_tbs = 0;
751 for(env = first_cpu; env != NULL; env = env->next_cpu) {
752 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
755 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
756 page_flush_tb();
758 code_gen_ptr = code_gen_buffer;
759 /* XXX: flush processor icache at this point if cache flush is
760 expensive */
761 tb_flush_count++;
764 #ifdef DEBUG_TB_CHECK
766 static void tb_invalidate_check(target_ulong address)
768 TranslationBlock *tb;
769 int i;
770 address &= TARGET_PAGE_MASK;
771 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
772 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
773 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
774 address >= tb->pc + tb->size)) {
775 printf("ERROR invalidate: address=" TARGET_FMT_lx
776 " PC=%08lx size=%04x\n",
777 address, (long)tb->pc, tb->size);
783 /* verify that all the pages have correct rights for code */
784 static void tb_page_check(void)
786 TranslationBlock *tb;
787 int i, flags1, flags2;
789 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
790 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
791 flags1 = page_get_flags(tb->pc);
792 flags2 = page_get_flags(tb->pc + tb->size - 1);
793 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
794 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
795 (long)tb->pc, tb->size, flags1, flags2);
801 #endif
803 /* invalidate one TB */
804 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
805 int next_offset)
807 TranslationBlock *tb1;
808 for(;;) {
809 tb1 = *ptb;
810 if (tb1 == tb) {
811 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
812 break;
814 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
818 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
820 TranslationBlock *tb1;
821 unsigned int n1;
823 for(;;) {
824 tb1 = *ptb;
825 n1 = (long)tb1 & 3;
826 tb1 = (TranslationBlock *)((long)tb1 & ~3);
827 if (tb1 == tb) {
828 *ptb = tb1->page_next[n1];
829 break;
831 ptb = &tb1->page_next[n1];
835 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
837 TranslationBlock *tb1, **ptb;
838 unsigned int n1;
840 ptb = &tb->jmp_next[n];
841 tb1 = *ptb;
842 if (tb1) {
843 /* find tb(n) in circular list */
844 for(;;) {
845 tb1 = *ptb;
846 n1 = (long)tb1 & 3;
847 tb1 = (TranslationBlock *)((long)tb1 & ~3);
848 if (n1 == n && tb1 == tb)
849 break;
850 if (n1 == 2) {
851 ptb = &tb1->jmp_first;
852 } else {
853 ptb = &tb1->jmp_next[n1];
856 /* now we can suppress tb(n) from the list */
857 *ptb = tb->jmp_next[n];
859 tb->jmp_next[n] = NULL;
863 /* reset the jump entry 'n' of a TB so that it is not chained to
864 another TB */
865 static inline void tb_reset_jump(TranslationBlock *tb, int n)
867 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
870 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
872 CPUState *env;
873 PageDesc *p;
874 unsigned int h, n1;
875 tb_page_addr_t phys_pc;
876 TranslationBlock *tb1, *tb2;
878 /* remove the TB from the hash list */
879 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
880 h = tb_phys_hash_func(phys_pc);
881 tb_remove(&tb_phys_hash[h], tb,
882 offsetof(TranslationBlock, phys_hash_next));
884 /* remove the TB from the page list */
885 if (tb->page_addr[0] != page_addr) {
886 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
887 tb_page_remove(&p->first_tb, tb);
888 invalidate_page_bitmap(p);
890 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
891 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
892 tb_page_remove(&p->first_tb, tb);
893 invalidate_page_bitmap(p);
896 tb_invalidated_flag = 1;
898 /* remove the TB from the hash list */
899 h = tb_jmp_cache_hash_func(tb->pc);
900 for(env = first_cpu; env != NULL; env = env->next_cpu) {
901 if (env->tb_jmp_cache[h] == tb)
902 env->tb_jmp_cache[h] = NULL;
905 /* suppress this TB from the two jump lists */
906 tb_jmp_remove(tb, 0);
907 tb_jmp_remove(tb, 1);
909 /* suppress any remaining jumps to this TB */
910 tb1 = tb->jmp_first;
911 for(;;) {
912 n1 = (long)tb1 & 3;
913 if (n1 == 2)
914 break;
915 tb1 = (TranslationBlock *)((long)tb1 & ~3);
916 tb2 = tb1->jmp_next[n1];
917 tb_reset_jump(tb1, n1);
918 tb1->jmp_next[n1] = NULL;
919 tb1 = tb2;
921 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
923 tb_phys_invalidate_count++;
926 static inline void set_bits(uint8_t *tab, int start, int len)
928 int end, mask, end1;
930 end = start + len;
931 tab += start >> 3;
932 mask = 0xff << (start & 7);
933 if ((start & ~7) == (end & ~7)) {
934 if (start < end) {
935 mask &= ~(0xff << (end & 7));
936 *tab |= mask;
938 } else {
939 *tab++ |= mask;
940 start = (start + 8) & ~7;
941 end1 = end & ~7;
942 while (start < end1) {
943 *tab++ = 0xff;
944 start += 8;
946 if (start < end) {
947 mask = ~(0xff << (end & 7));
948 *tab |= mask;
953 static void build_page_bitmap(PageDesc *p)
955 int n, tb_start, tb_end;
956 TranslationBlock *tb;
958 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
960 tb = p->first_tb;
961 while (tb != NULL) {
962 n = (long)tb & 3;
963 tb = (TranslationBlock *)((long)tb & ~3);
964 /* NOTE: this is subtle as a TB may span two physical pages */
965 if (n == 0) {
966 /* NOTE: tb_end may be after the end of the page, but
967 it is not a problem */
968 tb_start = tb->pc & ~TARGET_PAGE_MASK;
969 tb_end = tb_start + tb->size;
970 if (tb_end > TARGET_PAGE_SIZE)
971 tb_end = TARGET_PAGE_SIZE;
972 } else {
973 tb_start = 0;
974 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
976 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
977 tb = tb->page_next[n];
981 TranslationBlock *tb_gen_code(CPUState *env,
982 target_ulong pc, target_ulong cs_base,
983 int flags, int cflags)
985 TranslationBlock *tb;
986 uint8_t *tc_ptr;
987 tb_page_addr_t phys_pc, phys_page2;
988 target_ulong virt_page2;
989 int code_gen_size;
991 phys_pc = get_page_addr_code(env, pc);
992 tb = tb_alloc(pc);
993 if (!tb) {
994 /* flush must be done */
995 tb_flush(env);
996 /* cannot fail at this point */
997 tb = tb_alloc(pc);
998 /* Don't forget to invalidate previous TB info. */
999 tb_invalidated_flag = 1;
1001 tc_ptr = code_gen_ptr;
1002 tb->tc_ptr = tc_ptr;
1003 tb->cs_base = cs_base;
1004 tb->flags = flags;
1005 tb->cflags = cflags;
1006 cpu_gen_code(env, tb, &code_gen_size);
1007 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1009 /* check next page if needed */
1010 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1011 phys_page2 = -1;
1012 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1013 phys_page2 = get_page_addr_code(env, virt_page2);
1015 tb_link_page(tb, phys_pc, phys_page2);
1016 return tb;
1019 /* invalidate all TBs which intersect with the target physical page
1020 starting in range [start;end[. NOTE: start and end must refer to
1021 the same physical page. 'is_cpu_write_access' should be true if called
1022 from a real cpu write access: the virtual CPU will exit the current
1023 TB if code is modified inside this TB. */
1024 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1025 int is_cpu_write_access)
1027 TranslationBlock *tb, *tb_next, *saved_tb;
1028 CPUState *env = cpu_single_env;
1029 tb_page_addr_t tb_start, tb_end;
1030 PageDesc *p;
1031 int n;
1032 #ifdef TARGET_HAS_PRECISE_SMC
1033 int current_tb_not_found = is_cpu_write_access;
1034 TranslationBlock *current_tb = NULL;
1035 int current_tb_modified = 0;
1036 target_ulong current_pc = 0;
1037 target_ulong current_cs_base = 0;
1038 int current_flags = 0;
1039 #endif /* TARGET_HAS_PRECISE_SMC */
1041 p = page_find(start >> TARGET_PAGE_BITS);
1042 if (!p)
1043 return;
1044 if (!p->code_bitmap &&
1045 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1046 is_cpu_write_access) {
1047 /* build code bitmap */
1048 build_page_bitmap(p);
1051 /* we remove all the TBs in the range [start, end[ */
1052 /* XXX: see if in some cases it could be faster to invalidate all the code */
1053 tb = p->first_tb;
1054 while (tb != NULL) {
1055 n = (long)tb & 3;
1056 tb = (TranslationBlock *)((long)tb & ~3);
1057 tb_next = tb->page_next[n];
1058 /* NOTE: this is subtle as a TB may span two physical pages */
1059 if (n == 0) {
1060 /* NOTE: tb_end may be after the end of the page, but
1061 it is not a problem */
1062 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1063 tb_end = tb_start + tb->size;
1064 } else {
1065 tb_start = tb->page_addr[1];
1066 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1068 if (!(tb_end <= start || tb_start >= end)) {
1069 #ifdef TARGET_HAS_PRECISE_SMC
1070 if (current_tb_not_found) {
1071 current_tb_not_found = 0;
1072 current_tb = NULL;
1073 if (env->mem_io_pc) {
1074 /* now we have a real cpu fault */
1075 current_tb = tb_find_pc(env->mem_io_pc);
1078 if (current_tb == tb &&
1079 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1080 /* If we are modifying the current TB, we must stop
1081 its execution. We could be more precise by checking
1082 that the modification is after the current PC, but it
1083 would require a specialized function to partially
1084 restore the CPU state */
1086 current_tb_modified = 1;
1087 cpu_restore_state(current_tb, env, env->mem_io_pc);
1088 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1089 &current_flags);
1091 #endif /* TARGET_HAS_PRECISE_SMC */
1092 /* we need to do that to handle the case where a signal
1093 occurs while doing tb_phys_invalidate() */
1094 saved_tb = NULL;
1095 if (env) {
1096 saved_tb = env->current_tb;
1097 env->current_tb = NULL;
1099 tb_phys_invalidate(tb, -1);
1100 if (env) {
1101 env->current_tb = saved_tb;
1102 if (env->interrupt_request && env->current_tb)
1103 cpu_interrupt(env, env->interrupt_request);
1106 tb = tb_next;
1108 #if !defined(CONFIG_USER_ONLY)
1109 /* if no code remaining, no need to continue to use slow writes */
1110 if (!p->first_tb) {
1111 invalidate_page_bitmap(p);
1112 if (is_cpu_write_access) {
1113 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1116 #endif
1117 #ifdef TARGET_HAS_PRECISE_SMC
1118 if (current_tb_modified) {
1119 /* we generate a block containing just the instruction
1120 modifying the memory. It will ensure that it cannot modify
1121 itself */
1122 env->current_tb = NULL;
1123 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1124 cpu_resume_from_signal(env, NULL);
1126 #endif
1129 /* len must be <= 8 and start must be a multiple of len */
1130 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1132 PageDesc *p;
1133 int offset, b;
1134 #if 0
1135 if (1) {
1136 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1137 cpu_single_env->mem_io_vaddr, len,
1138 cpu_single_env->eip,
1139 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1141 #endif
1142 p = page_find(start >> TARGET_PAGE_BITS);
1143 if (!p)
1144 return;
1145 if (p->code_bitmap) {
1146 offset = start & ~TARGET_PAGE_MASK;
1147 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1148 if (b & ((1 << len) - 1))
1149 goto do_invalidate;
1150 } else {
1151 do_invalidate:
1152 tb_invalidate_phys_page_range(start, start + len, 1);
1156 #if !defined(CONFIG_SOFTMMU)
1157 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1158 unsigned long pc, void *puc)
1160 TranslationBlock *tb;
1161 PageDesc *p;
1162 int n;
1163 #ifdef TARGET_HAS_PRECISE_SMC
1164 TranslationBlock *current_tb = NULL;
1165 CPUState *env = cpu_single_env;
1166 int current_tb_modified = 0;
1167 target_ulong current_pc = 0;
1168 target_ulong current_cs_base = 0;
1169 int current_flags = 0;
1170 #endif
1172 addr &= TARGET_PAGE_MASK;
1173 p = page_find(addr >> TARGET_PAGE_BITS);
1174 if (!p)
1175 return;
1176 tb = p->first_tb;
1177 #ifdef TARGET_HAS_PRECISE_SMC
1178 if (tb && pc != 0) {
1179 current_tb = tb_find_pc(pc);
1181 #endif
1182 while (tb != NULL) {
1183 n = (long)tb & 3;
1184 tb = (TranslationBlock *)((long)tb & ~3);
1185 #ifdef TARGET_HAS_PRECISE_SMC
1186 if (current_tb == tb &&
1187 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1188 /* If we are modifying the current TB, we must stop
1189 its execution. We could be more precise by checking
1190 that the modification is after the current PC, but it
1191 would require a specialized function to partially
1192 restore the CPU state */
1194 current_tb_modified = 1;
1195 cpu_restore_state(current_tb, env, pc);
1196 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1197 &current_flags);
1199 #endif /* TARGET_HAS_PRECISE_SMC */
1200 tb_phys_invalidate(tb, addr);
1201 tb = tb->page_next[n];
1203 p->first_tb = NULL;
1204 #ifdef TARGET_HAS_PRECISE_SMC
1205 if (current_tb_modified) {
1206 /* we generate a block containing just the instruction
1207 modifying the memory. It will ensure that it cannot modify
1208 itself */
1209 env->current_tb = NULL;
1210 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1211 cpu_resume_from_signal(env, puc);
1213 #endif
1215 #endif
1217 /* add the tb in the target page and protect it if necessary */
1218 static inline void tb_alloc_page(TranslationBlock *tb,
1219 unsigned int n, tb_page_addr_t page_addr)
1221 PageDesc *p;
1222 #ifndef CONFIG_USER_ONLY
1223 bool page_already_protected;
1224 #endif
1226 tb->page_addr[n] = page_addr;
1227 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1228 tb->page_next[n] = p->first_tb;
1229 #ifndef CONFIG_USER_ONLY
1230 page_already_protected = p->first_tb != NULL;
1231 #endif
1232 p->first_tb = (TranslationBlock *)((long)tb | n);
1233 invalidate_page_bitmap(p);
1235 #if defined(TARGET_HAS_SMC) || 1
1237 #if defined(CONFIG_USER_ONLY)
1238 if (p->flags & PAGE_WRITE) {
1239 target_ulong addr;
1240 PageDesc *p2;
1241 int prot;
1243 /* force the host page as non writable (writes will have a
1244 page fault + mprotect overhead) */
1245 page_addr &= qemu_host_page_mask;
1246 prot = 0;
1247 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1248 addr += TARGET_PAGE_SIZE) {
1250 p2 = page_find (addr >> TARGET_PAGE_BITS);
1251 if (!p2)
1252 continue;
1253 prot |= p2->flags;
1254 p2->flags &= ~PAGE_WRITE;
1256 mprotect(g2h(page_addr), qemu_host_page_size,
1257 (prot & PAGE_BITS) & ~PAGE_WRITE);
1258 #ifdef DEBUG_TB_INVALIDATE
1259 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1260 page_addr);
1261 #endif
1263 #else
1264 /* if some code is already present, then the pages are already
1265 protected. So we handle the case where only the first TB is
1266 allocated in a physical page */
1267 if (!page_already_protected) {
1268 tlb_protect_code(page_addr);
1270 #endif
1272 #endif /* TARGET_HAS_SMC */
1275 /* add a new TB and link it to the physical page tables. phys_page2 is
1276 (-1) to indicate that only one page contains the TB. */
1277 void tb_link_page(TranslationBlock *tb,
1278 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1280 unsigned int h;
1281 TranslationBlock **ptb;
1283 /* Grab the mmap lock to stop another thread invalidating this TB
1284 before we are done. */
1285 mmap_lock();
1286 /* add in the physical hash table */
1287 h = tb_phys_hash_func(phys_pc);
1288 ptb = &tb_phys_hash[h];
1289 tb->phys_hash_next = *ptb;
1290 *ptb = tb;
1292 /* add in the page list */
1293 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1294 if (phys_page2 != -1)
1295 tb_alloc_page(tb, 1, phys_page2);
1296 else
1297 tb->page_addr[1] = -1;
1299 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1300 tb->jmp_next[0] = NULL;
1301 tb->jmp_next[1] = NULL;
1303 /* init original jump addresses */
1304 if (tb->tb_next_offset[0] != 0xffff)
1305 tb_reset_jump(tb, 0);
1306 if (tb->tb_next_offset[1] != 0xffff)
1307 tb_reset_jump(tb, 1);
1309 #ifdef DEBUG_TB_CHECK
1310 tb_page_check();
1311 #endif
1312 mmap_unlock();
1315 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1316 tb[1].tc_ptr. Return NULL if not found */
1317 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1319 int m_min, m_max, m;
1320 unsigned long v;
1321 TranslationBlock *tb;
1323 if (nb_tbs <= 0)
1324 return NULL;
1325 if (tc_ptr < (unsigned long)code_gen_buffer ||
1326 tc_ptr >= (unsigned long)code_gen_ptr)
1327 return NULL;
1328 /* binary search (cf Knuth) */
1329 m_min = 0;
1330 m_max = nb_tbs - 1;
1331 while (m_min <= m_max) {
1332 m = (m_min + m_max) >> 1;
1333 tb = &tbs[m];
1334 v = (unsigned long)tb->tc_ptr;
1335 if (v == tc_ptr)
1336 return tb;
1337 else if (tc_ptr < v) {
1338 m_max = m - 1;
1339 } else {
1340 m_min = m + 1;
1343 return &tbs[m_max];
1346 static void tb_reset_jump_recursive(TranslationBlock *tb);
1348 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1350 TranslationBlock *tb1, *tb_next, **ptb;
1351 unsigned int n1;
1353 tb1 = tb->jmp_next[n];
1354 if (tb1 != NULL) {
1355 /* find head of list */
1356 for(;;) {
1357 n1 = (long)tb1 & 3;
1358 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1359 if (n1 == 2)
1360 break;
1361 tb1 = tb1->jmp_next[n1];
1363 /* we are now sure now that tb jumps to tb1 */
1364 tb_next = tb1;
1366 /* remove tb from the jmp_first list */
1367 ptb = &tb_next->jmp_first;
1368 for(;;) {
1369 tb1 = *ptb;
1370 n1 = (long)tb1 & 3;
1371 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1372 if (n1 == n && tb1 == tb)
1373 break;
1374 ptb = &tb1->jmp_next[n1];
1376 *ptb = tb->jmp_next[n];
1377 tb->jmp_next[n] = NULL;
1379 /* suppress the jump to next tb in generated code */
1380 tb_reset_jump(tb, n);
1382 /* suppress jumps in the tb on which we could have jumped */
1383 tb_reset_jump_recursive(tb_next);
1387 static void tb_reset_jump_recursive(TranslationBlock *tb)
1389 tb_reset_jump_recursive2(tb, 0);
1390 tb_reset_jump_recursive2(tb, 1);
1393 #if defined(TARGET_HAS_ICE)
1394 #if defined(CONFIG_USER_ONLY)
1395 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1397 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1399 #else
1400 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1402 target_phys_addr_t addr;
1403 target_ulong pd;
1404 ram_addr_t ram_addr;
1405 PhysPageDesc *p;
1407 addr = cpu_get_phys_page_debug(env, pc);
1408 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1409 if (!p) {
1410 pd = IO_MEM_UNASSIGNED;
1411 } else {
1412 pd = p->phys_offset;
1414 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1415 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1417 #endif
1418 #endif /* TARGET_HAS_ICE */
1420 #if defined(CONFIG_USER_ONLY)
1421 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1426 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1427 int flags, CPUWatchpoint **watchpoint)
1429 return -ENOSYS;
1431 #else
1432 /* Add a watchpoint. */
1433 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1434 int flags, CPUWatchpoint **watchpoint)
1436 target_ulong len_mask = ~(len - 1);
1437 CPUWatchpoint *wp;
1439 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1440 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1441 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1442 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1443 return -EINVAL;
1445 wp = g_malloc(sizeof(*wp));
1447 wp->vaddr = addr;
1448 wp->len_mask = len_mask;
1449 wp->flags = flags;
1451 /* keep all GDB-injected watchpoints in front */
1452 if (flags & BP_GDB)
1453 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1454 else
1455 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1457 tlb_flush_page(env, addr);
1459 if (watchpoint)
1460 *watchpoint = wp;
1461 return 0;
1464 /* Remove a specific watchpoint. */
1465 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1466 int flags)
1468 target_ulong len_mask = ~(len - 1);
1469 CPUWatchpoint *wp;
1471 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1472 if (addr == wp->vaddr && len_mask == wp->len_mask
1473 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1474 cpu_watchpoint_remove_by_ref(env, wp);
1475 return 0;
1478 return -ENOENT;
1481 /* Remove a specific watchpoint by reference. */
1482 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1484 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1486 tlb_flush_page(env, watchpoint->vaddr);
1488 g_free(watchpoint);
1491 /* Remove all matching watchpoints. */
1492 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1494 CPUWatchpoint *wp, *next;
1496 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1497 if (wp->flags & mask)
1498 cpu_watchpoint_remove_by_ref(env, wp);
1501 #endif
1503 /* Add a breakpoint. */
1504 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1505 CPUBreakpoint **breakpoint)
1507 #if defined(TARGET_HAS_ICE)
1508 CPUBreakpoint *bp;
1510 bp = g_malloc(sizeof(*bp));
1512 bp->pc = pc;
1513 bp->flags = flags;
1515 /* keep all GDB-injected breakpoints in front */
1516 if (flags & BP_GDB)
1517 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1518 else
1519 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1521 breakpoint_invalidate(env, pc);
1523 if (breakpoint)
1524 *breakpoint = bp;
1525 return 0;
1526 #else
1527 return -ENOSYS;
1528 #endif
1531 /* Remove a specific breakpoint. */
1532 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1534 #if defined(TARGET_HAS_ICE)
1535 CPUBreakpoint *bp;
1537 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1538 if (bp->pc == pc && bp->flags == flags) {
1539 cpu_breakpoint_remove_by_ref(env, bp);
1540 return 0;
1543 return -ENOENT;
1544 #else
1545 return -ENOSYS;
1546 #endif
1549 /* Remove a specific breakpoint by reference. */
1550 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1552 #if defined(TARGET_HAS_ICE)
1553 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1555 breakpoint_invalidate(env, breakpoint->pc);
1557 g_free(breakpoint);
1558 #endif
1561 /* Remove all matching breakpoints. */
1562 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1564 #if defined(TARGET_HAS_ICE)
1565 CPUBreakpoint *bp, *next;
1567 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1568 if (bp->flags & mask)
1569 cpu_breakpoint_remove_by_ref(env, bp);
1571 #endif
1574 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1575 CPU loop after each instruction */
1576 void cpu_single_step(CPUState *env, int enabled)
1578 #if defined(TARGET_HAS_ICE)
1579 if (env->singlestep_enabled != enabled) {
1580 env->singlestep_enabled = enabled;
1581 if (kvm_enabled())
1582 kvm_update_guest_debug(env, 0);
1583 else {
1584 /* must flush all the translated code to avoid inconsistencies */
1585 /* XXX: only flush what is necessary */
1586 tb_flush(env);
1589 #endif
1592 /* enable or disable low levels log */
1593 void cpu_set_log(int log_flags)
1595 loglevel = log_flags;
1596 if (loglevel && !logfile) {
1597 logfile = fopen(logfilename, log_append ? "a" : "w");
1598 if (!logfile) {
1599 perror(logfilename);
1600 _exit(1);
1602 #if !defined(CONFIG_SOFTMMU)
1603 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1605 static char logfile_buf[4096];
1606 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1608 #elif defined(_WIN32)
1609 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1610 setvbuf(logfile, NULL, _IONBF, 0);
1611 #else
1612 setvbuf(logfile, NULL, _IOLBF, 0);
1613 #endif
1614 log_append = 1;
1616 if (!loglevel && logfile) {
1617 fclose(logfile);
1618 logfile = NULL;
1622 void cpu_set_log_filename(const char *filename)
1624 logfilename = strdup(filename);
1625 if (logfile) {
1626 fclose(logfile);
1627 logfile = NULL;
1629 cpu_set_log(loglevel);
1632 static void cpu_unlink_tb(CPUState *env)
1634 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1635 problem and hope the cpu will stop of its own accord. For userspace
1636 emulation this often isn't actually as bad as it sounds. Often
1637 signals are used primarily to interrupt blocking syscalls. */
1638 TranslationBlock *tb;
1639 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1641 spin_lock(&interrupt_lock);
1642 tb = env->current_tb;
1643 /* if the cpu is currently executing code, we must unlink it and
1644 all the potentially executing TB */
1645 if (tb) {
1646 env->current_tb = NULL;
1647 tb_reset_jump_recursive(tb);
1649 spin_unlock(&interrupt_lock);
1652 #ifndef CONFIG_USER_ONLY
1653 /* mask must never be zero, except for A20 change call */
1654 static void tcg_handle_interrupt(CPUState *env, int mask)
1656 int old_mask;
1658 old_mask = env->interrupt_request;
1659 env->interrupt_request |= mask;
1662 * If called from iothread context, wake the target cpu in
1663 * case its halted.
1665 if (!qemu_cpu_is_self(env)) {
1666 qemu_cpu_kick(env);
1667 return;
1670 if (use_icount) {
1671 env->icount_decr.u16.high = 0xffff;
1672 if (!can_do_io(env)
1673 && (mask & ~old_mask) != 0) {
1674 cpu_abort(env, "Raised interrupt while not in I/O function");
1676 } else {
1677 cpu_unlink_tb(env);
1681 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1683 #else /* CONFIG_USER_ONLY */
1685 void cpu_interrupt(CPUState *env, int mask)
1687 env->interrupt_request |= mask;
1688 cpu_unlink_tb(env);
1690 #endif /* CONFIG_USER_ONLY */
1692 void cpu_reset_interrupt(CPUState *env, int mask)
1694 env->interrupt_request &= ~mask;
1697 void cpu_exit(CPUState *env)
1699 env->exit_request = 1;
1700 cpu_unlink_tb(env);
1703 const CPULogItem cpu_log_items[] = {
1704 { CPU_LOG_TB_OUT_ASM, "out_asm",
1705 "show generated host assembly code for each compiled TB" },
1706 { CPU_LOG_TB_IN_ASM, "in_asm",
1707 "show target assembly code for each compiled TB" },
1708 { CPU_LOG_TB_OP, "op",
1709 "show micro ops for each compiled TB" },
1710 { CPU_LOG_TB_OP_OPT, "op_opt",
1711 "show micro ops "
1712 #ifdef TARGET_I386
1713 "before eflags optimization and "
1714 #endif
1715 "after liveness analysis" },
1716 { CPU_LOG_INT, "int",
1717 "show interrupts/exceptions in short format" },
1718 { CPU_LOG_EXEC, "exec",
1719 "show trace before each executed TB (lots of logs)" },
1720 { CPU_LOG_TB_CPU, "cpu",
1721 "show CPU state before block translation" },
1722 #ifdef TARGET_I386
1723 { CPU_LOG_PCALL, "pcall",
1724 "show protected mode far calls/returns/exceptions" },
1725 { CPU_LOG_RESET, "cpu_reset",
1726 "show CPU state before CPU resets" },
1727 #endif
1728 #ifdef DEBUG_IOPORT
1729 { CPU_LOG_IOPORT, "ioport",
1730 "show all i/o ports accesses" },
1731 #endif
1732 { 0, NULL, NULL },
1735 #ifndef CONFIG_USER_ONLY
1736 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1737 = QLIST_HEAD_INITIALIZER(memory_client_list);
1739 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1740 ram_addr_t size,
1741 ram_addr_t phys_offset,
1742 bool log_dirty)
1744 CPUPhysMemoryClient *client;
1745 QLIST_FOREACH(client, &memory_client_list, list) {
1746 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1750 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1751 target_phys_addr_t end)
1753 CPUPhysMemoryClient *client;
1754 QLIST_FOREACH(client, &memory_client_list, list) {
1755 int r = client->sync_dirty_bitmap(client, start, end);
1756 if (r < 0)
1757 return r;
1759 return 0;
1762 static int cpu_notify_migration_log(int enable)
1764 CPUPhysMemoryClient *client;
1765 if (enable) {
1766 memory_global_dirty_log_start();
1767 } else {
1768 memory_global_dirty_log_stop();
1770 QLIST_FOREACH(client, &memory_client_list, list) {
1771 int r = client->migration_log(client, enable);
1772 if (r < 0)
1773 return r;
1775 return 0;
1778 struct last_map {
1779 target_phys_addr_t start_addr;
1780 ram_addr_t size;
1781 ram_addr_t phys_offset;
1784 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1785 * address. Each intermediate table provides the next L2_BITs of guest
1786 * physical address space. The number of levels vary based on host and
1787 * guest configuration, making it efficient to build the final guest
1788 * physical address by seeding the L1 offset and shifting and adding in
1789 * each L2 offset as we recurse through them. */
1790 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1791 void **lp, target_phys_addr_t addr,
1792 struct last_map *map)
1794 int i;
1796 if (*lp == NULL) {
1797 return;
1799 if (level == 0) {
1800 PhysPageDesc *pd = *lp;
1801 addr <<= L2_BITS + TARGET_PAGE_BITS;
1802 for (i = 0; i < L2_SIZE; ++i) {
1803 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1804 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1806 if (map->size &&
1807 start_addr == map->start_addr + map->size &&
1808 pd[i].phys_offset == map->phys_offset + map->size) {
1810 map->size += TARGET_PAGE_SIZE;
1811 continue;
1812 } else if (map->size) {
1813 client->set_memory(client, map->start_addr,
1814 map->size, map->phys_offset, false);
1817 map->start_addr = start_addr;
1818 map->size = TARGET_PAGE_SIZE;
1819 map->phys_offset = pd[i].phys_offset;
1822 } else {
1823 void **pp = *lp;
1824 for (i = 0; i < L2_SIZE; ++i) {
1825 phys_page_for_each_1(client, level - 1, pp + i,
1826 (addr << L2_BITS) | i, map);
1831 static void phys_page_for_each(CPUPhysMemoryClient *client)
1833 int i;
1834 struct last_map map = { };
1836 for (i = 0; i < P_L1_SIZE; ++i) {
1837 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1838 l1_phys_map + i, i, &map);
1840 if (map.size) {
1841 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1842 false);
1846 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1848 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1849 phys_page_for_each(client);
1852 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1854 QLIST_REMOVE(client, list);
1856 #endif
1858 static int cmp1(const char *s1, int n, const char *s2)
1860 if (strlen(s2) != n)
1861 return 0;
1862 return memcmp(s1, s2, n) == 0;
1865 /* takes a comma separated list of log masks. Return 0 if error. */
1866 int cpu_str_to_log_mask(const char *str)
1868 const CPULogItem *item;
1869 int mask;
1870 const char *p, *p1;
1872 p = str;
1873 mask = 0;
1874 for(;;) {
1875 p1 = strchr(p, ',');
1876 if (!p1)
1877 p1 = p + strlen(p);
1878 if(cmp1(p,p1-p,"all")) {
1879 for(item = cpu_log_items; item->mask != 0; item++) {
1880 mask |= item->mask;
1882 } else {
1883 for(item = cpu_log_items; item->mask != 0; item++) {
1884 if (cmp1(p, p1 - p, item->name))
1885 goto found;
1887 return 0;
1889 found:
1890 mask |= item->mask;
1891 if (*p1 != ',')
1892 break;
1893 p = p1 + 1;
1895 return mask;
1898 void cpu_abort(CPUState *env, const char *fmt, ...)
1900 va_list ap;
1901 va_list ap2;
1903 va_start(ap, fmt);
1904 va_copy(ap2, ap);
1905 fprintf(stderr, "qemu: fatal: ");
1906 vfprintf(stderr, fmt, ap);
1907 fprintf(stderr, "\n");
1908 #ifdef TARGET_I386
1909 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1910 #else
1911 cpu_dump_state(env, stderr, fprintf, 0);
1912 #endif
1913 if (qemu_log_enabled()) {
1914 qemu_log("qemu: fatal: ");
1915 qemu_log_vprintf(fmt, ap2);
1916 qemu_log("\n");
1917 #ifdef TARGET_I386
1918 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1919 #else
1920 log_cpu_state(env, 0);
1921 #endif
1922 qemu_log_flush();
1923 qemu_log_close();
1925 va_end(ap2);
1926 va_end(ap);
1927 #if defined(CONFIG_USER_ONLY)
1929 struct sigaction act;
1930 sigfillset(&act.sa_mask);
1931 act.sa_handler = SIG_DFL;
1932 sigaction(SIGABRT, &act, NULL);
1934 #endif
1935 abort();
1938 CPUState *cpu_copy(CPUState *env)
1940 CPUState *new_env = cpu_init(env->cpu_model_str);
1941 CPUState *next_cpu = new_env->next_cpu;
1942 int cpu_index = new_env->cpu_index;
1943 #if defined(TARGET_HAS_ICE)
1944 CPUBreakpoint *bp;
1945 CPUWatchpoint *wp;
1946 #endif
1948 memcpy(new_env, env, sizeof(CPUState));
1950 /* Preserve chaining and index. */
1951 new_env->next_cpu = next_cpu;
1952 new_env->cpu_index = cpu_index;
1954 /* Clone all break/watchpoints.
1955 Note: Once we support ptrace with hw-debug register access, make sure
1956 BP_CPU break/watchpoints are handled correctly on clone. */
1957 QTAILQ_INIT(&env->breakpoints);
1958 QTAILQ_INIT(&env->watchpoints);
1959 #if defined(TARGET_HAS_ICE)
1960 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1961 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1963 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1964 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1965 wp->flags, NULL);
1967 #endif
1969 return new_env;
1972 #if !defined(CONFIG_USER_ONLY)
1974 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1976 unsigned int i;
1978 /* Discard jump cache entries for any tb which might potentially
1979 overlap the flushed page. */
1980 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1981 memset (&env->tb_jmp_cache[i], 0,
1982 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1984 i = tb_jmp_cache_hash_page(addr);
1985 memset (&env->tb_jmp_cache[i], 0,
1986 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1989 static CPUTLBEntry s_cputlb_empty_entry = {
1990 .addr_read = -1,
1991 .addr_write = -1,
1992 .addr_code = -1,
1993 .addend = -1,
1996 /* NOTE: if flush_global is true, also flush global entries (not
1997 implemented yet) */
1998 void tlb_flush(CPUState *env, int flush_global)
2000 int i;
2002 #if defined(DEBUG_TLB)
2003 printf("tlb_flush:\n");
2004 #endif
2005 /* must reset current TB so that interrupts cannot modify the
2006 links while we are modifying them */
2007 env->current_tb = NULL;
2009 for(i = 0; i < CPU_TLB_SIZE; i++) {
2010 int mmu_idx;
2011 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2012 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2016 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2018 env->tlb_flush_addr = -1;
2019 env->tlb_flush_mask = 0;
2020 tlb_flush_count++;
2023 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2025 if (addr == (tlb_entry->addr_read &
2026 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2027 addr == (tlb_entry->addr_write &
2028 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2029 addr == (tlb_entry->addr_code &
2030 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2031 *tlb_entry = s_cputlb_empty_entry;
2035 void tlb_flush_page(CPUState *env, target_ulong addr)
2037 int i;
2038 int mmu_idx;
2040 #if defined(DEBUG_TLB)
2041 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2042 #endif
2043 /* Check if we need to flush due to large pages. */
2044 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2045 #if defined(DEBUG_TLB)
2046 printf("tlb_flush_page: forced full flush ("
2047 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2048 env->tlb_flush_addr, env->tlb_flush_mask);
2049 #endif
2050 tlb_flush(env, 1);
2051 return;
2053 /* must reset current TB so that interrupts cannot modify the
2054 links while we are modifying them */
2055 env->current_tb = NULL;
2057 addr &= TARGET_PAGE_MASK;
2058 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2059 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2060 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2062 tlb_flush_jmp_cache(env, addr);
2065 /* update the TLBs so that writes to code in the virtual page 'addr'
2066 can be detected */
2067 static void tlb_protect_code(ram_addr_t ram_addr)
2069 cpu_physical_memory_reset_dirty(ram_addr,
2070 ram_addr + TARGET_PAGE_SIZE,
2071 CODE_DIRTY_FLAG);
2074 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2075 tested for self modifying code */
2076 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2077 target_ulong vaddr)
2079 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2082 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2083 unsigned long start, unsigned long length)
2085 unsigned long addr;
2086 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2087 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2088 if ((addr - start) < length) {
2089 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2094 /* Note: start and end must be within the same ram block. */
2095 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2096 int dirty_flags)
2098 CPUState *env;
2099 unsigned long length, start1;
2100 int i;
2102 start &= TARGET_PAGE_MASK;
2103 end = TARGET_PAGE_ALIGN(end);
2105 length = end - start;
2106 if (length == 0)
2107 return;
2108 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2110 /* we modify the TLB cache so that the dirty bit will be set again
2111 when accessing the range */
2112 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2113 /* Check that we don't span multiple blocks - this breaks the
2114 address comparisons below. */
2115 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2116 != (end - 1) - start) {
2117 abort();
2120 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2121 int mmu_idx;
2122 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2123 for(i = 0; i < CPU_TLB_SIZE; i++)
2124 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2125 start1, length);
2130 int cpu_physical_memory_set_dirty_tracking(int enable)
2132 int ret = 0;
2133 in_migration = enable;
2134 ret = cpu_notify_migration_log(!!enable);
2135 return ret;
2138 int cpu_physical_memory_get_dirty_tracking(void)
2140 return in_migration;
2143 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2144 target_phys_addr_t end_addr)
2146 int ret;
2148 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2149 return ret;
2152 int cpu_physical_log_start(target_phys_addr_t start_addr,
2153 ram_addr_t size)
2155 CPUPhysMemoryClient *client;
2156 QLIST_FOREACH(client, &memory_client_list, list) {
2157 if (client->log_start) {
2158 int r = client->log_start(client, start_addr, size);
2159 if (r < 0) {
2160 return r;
2164 return 0;
2167 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2168 ram_addr_t size)
2170 CPUPhysMemoryClient *client;
2171 QLIST_FOREACH(client, &memory_client_list, list) {
2172 if (client->log_stop) {
2173 int r = client->log_stop(client, start_addr, size);
2174 if (r < 0) {
2175 return r;
2179 return 0;
2182 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2184 ram_addr_t ram_addr;
2185 void *p;
2187 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2188 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2189 + tlb_entry->addend);
2190 ram_addr = qemu_ram_addr_from_host_nofail(p);
2191 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2192 tlb_entry->addr_write |= TLB_NOTDIRTY;
2197 /* update the TLB according to the current state of the dirty bits */
2198 void cpu_tlb_update_dirty(CPUState *env)
2200 int i;
2201 int mmu_idx;
2202 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2203 for(i = 0; i < CPU_TLB_SIZE; i++)
2204 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2208 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2210 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2211 tlb_entry->addr_write = vaddr;
2214 /* update the TLB corresponding to virtual page vaddr
2215 so that it is no longer dirty */
2216 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2218 int i;
2219 int mmu_idx;
2221 vaddr &= TARGET_PAGE_MASK;
2222 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2223 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2224 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2227 /* Our TLB does not support large pages, so remember the area covered by
2228 large pages and trigger a full TLB flush if these are invalidated. */
2229 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2230 target_ulong size)
2232 target_ulong mask = ~(size - 1);
2234 if (env->tlb_flush_addr == (target_ulong)-1) {
2235 env->tlb_flush_addr = vaddr & mask;
2236 env->tlb_flush_mask = mask;
2237 return;
2239 /* Extend the existing region to include the new page.
2240 This is a compromise between unnecessary flushes and the cost
2241 of maintaining a full variable size TLB. */
2242 mask &= env->tlb_flush_mask;
2243 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2244 mask <<= 1;
2246 env->tlb_flush_addr &= mask;
2247 env->tlb_flush_mask = mask;
2250 /* Add a new TLB entry. At most one entry for a given virtual address
2251 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2252 supplied size is only used by tlb_flush_page. */
2253 void tlb_set_page(CPUState *env, target_ulong vaddr,
2254 target_phys_addr_t paddr, int prot,
2255 int mmu_idx, target_ulong size)
2257 PhysPageDesc *p;
2258 unsigned long pd;
2259 unsigned int index;
2260 target_ulong address;
2261 target_ulong code_address;
2262 unsigned long addend;
2263 CPUTLBEntry *te;
2264 CPUWatchpoint *wp;
2265 target_phys_addr_t iotlb;
2267 assert(size >= TARGET_PAGE_SIZE);
2268 if (size != TARGET_PAGE_SIZE) {
2269 tlb_add_large_page(env, vaddr, size);
2271 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2272 if (!p) {
2273 pd = IO_MEM_UNASSIGNED;
2274 } else {
2275 pd = p->phys_offset;
2277 #if defined(DEBUG_TLB)
2278 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2279 " prot=%x idx=%d pd=0x%08lx\n",
2280 vaddr, paddr, prot, mmu_idx, pd);
2281 #endif
2283 address = vaddr;
2284 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2285 /* IO memory case (romd handled later) */
2286 address |= TLB_MMIO;
2288 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2289 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2290 /* Normal RAM. */
2291 iotlb = pd & TARGET_PAGE_MASK;
2292 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2293 iotlb |= IO_MEM_NOTDIRTY;
2294 else
2295 iotlb |= IO_MEM_ROM;
2296 } else {
2297 /* IO handlers are currently passed a physical address.
2298 It would be nice to pass an offset from the base address
2299 of that region. This would avoid having to special case RAM,
2300 and avoid full address decoding in every device.
2301 We can't use the high bits of pd for this because
2302 IO_MEM_ROMD uses these as a ram address. */
2303 iotlb = (pd & ~TARGET_PAGE_MASK);
2304 if (p) {
2305 iotlb += p->region_offset;
2306 } else {
2307 iotlb += paddr;
2311 code_address = address;
2312 /* Make accesses to pages with watchpoints go via the
2313 watchpoint trap routines. */
2314 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2315 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2316 /* Avoid trapping reads of pages with a write breakpoint. */
2317 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2318 iotlb = io_mem_watch + paddr;
2319 address |= TLB_MMIO;
2320 break;
2325 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2326 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2327 te = &env->tlb_table[mmu_idx][index];
2328 te->addend = addend - vaddr;
2329 if (prot & PAGE_READ) {
2330 te->addr_read = address;
2331 } else {
2332 te->addr_read = -1;
2335 if (prot & PAGE_EXEC) {
2336 te->addr_code = code_address;
2337 } else {
2338 te->addr_code = -1;
2340 if (prot & PAGE_WRITE) {
2341 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2342 (pd & IO_MEM_ROMD)) {
2343 /* Write access calls the I/O callback. */
2344 te->addr_write = address | TLB_MMIO;
2345 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2346 !cpu_physical_memory_is_dirty(pd)) {
2347 te->addr_write = address | TLB_NOTDIRTY;
2348 } else {
2349 te->addr_write = address;
2351 } else {
2352 te->addr_write = -1;
2356 #else
2358 void tlb_flush(CPUState *env, int flush_global)
2362 void tlb_flush_page(CPUState *env, target_ulong addr)
2367 * Walks guest process memory "regions" one by one
2368 * and calls callback function 'fn' for each region.
2371 struct walk_memory_regions_data
2373 walk_memory_regions_fn fn;
2374 void *priv;
2375 unsigned long start;
2376 int prot;
2379 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2380 abi_ulong end, int new_prot)
2382 if (data->start != -1ul) {
2383 int rc = data->fn(data->priv, data->start, end, data->prot);
2384 if (rc != 0) {
2385 return rc;
2389 data->start = (new_prot ? end : -1ul);
2390 data->prot = new_prot;
2392 return 0;
2395 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2396 abi_ulong base, int level, void **lp)
2398 abi_ulong pa;
2399 int i, rc;
2401 if (*lp == NULL) {
2402 return walk_memory_regions_end(data, base, 0);
2405 if (level == 0) {
2406 PageDesc *pd = *lp;
2407 for (i = 0; i < L2_SIZE; ++i) {
2408 int prot = pd[i].flags;
2410 pa = base | (i << TARGET_PAGE_BITS);
2411 if (prot != data->prot) {
2412 rc = walk_memory_regions_end(data, pa, prot);
2413 if (rc != 0) {
2414 return rc;
2418 } else {
2419 void **pp = *lp;
2420 for (i = 0; i < L2_SIZE; ++i) {
2421 pa = base | ((abi_ulong)i <<
2422 (TARGET_PAGE_BITS + L2_BITS * level));
2423 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2424 if (rc != 0) {
2425 return rc;
2430 return 0;
2433 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2435 struct walk_memory_regions_data data;
2436 unsigned long i;
2438 data.fn = fn;
2439 data.priv = priv;
2440 data.start = -1ul;
2441 data.prot = 0;
2443 for (i = 0; i < V_L1_SIZE; i++) {
2444 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2445 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2446 if (rc != 0) {
2447 return rc;
2451 return walk_memory_regions_end(&data, 0, 0);
2454 static int dump_region(void *priv, abi_ulong start,
2455 abi_ulong end, unsigned long prot)
2457 FILE *f = (FILE *)priv;
2459 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2460 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2461 start, end, end - start,
2462 ((prot & PAGE_READ) ? 'r' : '-'),
2463 ((prot & PAGE_WRITE) ? 'w' : '-'),
2464 ((prot & PAGE_EXEC) ? 'x' : '-'));
2466 return (0);
2469 /* dump memory mappings */
2470 void page_dump(FILE *f)
2472 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2473 "start", "end", "size", "prot");
2474 walk_memory_regions(f, dump_region);
2477 int page_get_flags(target_ulong address)
2479 PageDesc *p;
2481 p = page_find(address >> TARGET_PAGE_BITS);
2482 if (!p)
2483 return 0;
2484 return p->flags;
2487 /* Modify the flags of a page and invalidate the code if necessary.
2488 The flag PAGE_WRITE_ORG is positioned automatically depending
2489 on PAGE_WRITE. The mmap_lock should already be held. */
2490 void page_set_flags(target_ulong start, target_ulong end, int flags)
2492 target_ulong addr, len;
2494 /* This function should never be called with addresses outside the
2495 guest address space. If this assert fires, it probably indicates
2496 a missing call to h2g_valid. */
2497 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2498 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2499 #endif
2500 assert(start < end);
2502 start = start & TARGET_PAGE_MASK;
2503 end = TARGET_PAGE_ALIGN(end);
2505 if (flags & PAGE_WRITE) {
2506 flags |= PAGE_WRITE_ORG;
2509 for (addr = start, len = end - start;
2510 len != 0;
2511 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2512 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2514 /* If the write protection bit is set, then we invalidate
2515 the code inside. */
2516 if (!(p->flags & PAGE_WRITE) &&
2517 (flags & PAGE_WRITE) &&
2518 p->first_tb) {
2519 tb_invalidate_phys_page(addr, 0, NULL);
2521 p->flags = flags;
2525 int page_check_range(target_ulong start, target_ulong len, int flags)
2527 PageDesc *p;
2528 target_ulong end;
2529 target_ulong addr;
2531 /* This function should never be called with addresses outside the
2532 guest address space. If this assert fires, it probably indicates
2533 a missing call to h2g_valid. */
2534 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2535 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2536 #endif
2538 if (len == 0) {
2539 return 0;
2541 if (start + len - 1 < start) {
2542 /* We've wrapped around. */
2543 return -1;
2546 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2547 start = start & TARGET_PAGE_MASK;
2549 for (addr = start, len = end - start;
2550 len != 0;
2551 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2552 p = page_find(addr >> TARGET_PAGE_BITS);
2553 if( !p )
2554 return -1;
2555 if( !(p->flags & PAGE_VALID) )
2556 return -1;
2558 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2559 return -1;
2560 if (flags & PAGE_WRITE) {
2561 if (!(p->flags & PAGE_WRITE_ORG))
2562 return -1;
2563 /* unprotect the page if it was put read-only because it
2564 contains translated code */
2565 if (!(p->flags & PAGE_WRITE)) {
2566 if (!page_unprotect(addr, 0, NULL))
2567 return -1;
2569 return 0;
2572 return 0;
2575 /* called from signal handler: invalidate the code and unprotect the
2576 page. Return TRUE if the fault was successfully handled. */
2577 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2579 unsigned int prot;
2580 PageDesc *p;
2581 target_ulong host_start, host_end, addr;
2583 /* Technically this isn't safe inside a signal handler. However we
2584 know this only ever happens in a synchronous SEGV handler, so in
2585 practice it seems to be ok. */
2586 mmap_lock();
2588 p = page_find(address >> TARGET_PAGE_BITS);
2589 if (!p) {
2590 mmap_unlock();
2591 return 0;
2594 /* if the page was really writable, then we change its
2595 protection back to writable */
2596 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2597 host_start = address & qemu_host_page_mask;
2598 host_end = host_start + qemu_host_page_size;
2600 prot = 0;
2601 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2602 p = page_find(addr >> TARGET_PAGE_BITS);
2603 p->flags |= PAGE_WRITE;
2604 prot |= p->flags;
2606 /* and since the content will be modified, we must invalidate
2607 the corresponding translated code. */
2608 tb_invalidate_phys_page(addr, pc, puc);
2609 #ifdef DEBUG_TB_CHECK
2610 tb_invalidate_check(addr);
2611 #endif
2613 mprotect((void *)g2h(host_start), qemu_host_page_size,
2614 prot & PAGE_BITS);
2616 mmap_unlock();
2617 return 1;
2619 mmap_unlock();
2620 return 0;
2623 static inline void tlb_set_dirty(CPUState *env,
2624 unsigned long addr, target_ulong vaddr)
2627 #endif /* defined(CONFIG_USER_ONLY) */
2629 #if !defined(CONFIG_USER_ONLY)
2631 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2632 typedef struct subpage_t {
2633 target_phys_addr_t base;
2634 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2635 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2636 } subpage_t;
2638 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2639 ram_addr_t memory, ram_addr_t region_offset);
2640 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2641 ram_addr_t orig_memory,
2642 ram_addr_t region_offset);
2643 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2644 need_subpage) \
2645 do { \
2646 if (addr > start_addr) \
2647 start_addr2 = 0; \
2648 else { \
2649 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2650 if (start_addr2 > 0) \
2651 need_subpage = 1; \
2654 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2655 end_addr2 = TARGET_PAGE_SIZE - 1; \
2656 else { \
2657 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2658 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2659 need_subpage = 1; \
2661 } while (0)
2663 /* register physical memory.
2664 For RAM, 'size' must be a multiple of the target page size.
2665 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2666 io memory page. The address used when calling the IO function is
2667 the offset from the start of the region, plus region_offset. Both
2668 start_addr and region_offset are rounded down to a page boundary
2669 before calculating this offset. This should not be a problem unless
2670 the low bits of start_addr and region_offset differ. */
2671 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2672 ram_addr_t size,
2673 ram_addr_t phys_offset,
2674 ram_addr_t region_offset,
2675 bool log_dirty)
2677 target_phys_addr_t addr, end_addr;
2678 PhysPageDesc *p;
2679 CPUState *env;
2680 ram_addr_t orig_size = size;
2681 subpage_t *subpage;
2683 assert(size);
2684 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2686 if (phys_offset == IO_MEM_UNASSIGNED) {
2687 region_offset = start_addr;
2689 region_offset &= TARGET_PAGE_MASK;
2690 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2691 end_addr = start_addr + (target_phys_addr_t)size;
2693 addr = start_addr;
2694 do {
2695 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2696 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2697 ram_addr_t orig_memory = p->phys_offset;
2698 target_phys_addr_t start_addr2, end_addr2;
2699 int need_subpage = 0;
2701 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2702 need_subpage);
2703 if (need_subpage) {
2704 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2705 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2706 &p->phys_offset, orig_memory,
2707 p->region_offset);
2708 } else {
2709 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2710 >> IO_MEM_SHIFT];
2712 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2713 region_offset);
2714 p->region_offset = 0;
2715 } else {
2716 p->phys_offset = phys_offset;
2717 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2718 (phys_offset & IO_MEM_ROMD))
2719 phys_offset += TARGET_PAGE_SIZE;
2721 } else {
2722 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2723 p->phys_offset = phys_offset;
2724 p->region_offset = region_offset;
2725 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2726 (phys_offset & IO_MEM_ROMD)) {
2727 phys_offset += TARGET_PAGE_SIZE;
2728 } else {
2729 target_phys_addr_t start_addr2, end_addr2;
2730 int need_subpage = 0;
2732 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2733 end_addr2, need_subpage);
2735 if (need_subpage) {
2736 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2737 &p->phys_offset, IO_MEM_UNASSIGNED,
2738 addr & TARGET_PAGE_MASK);
2739 subpage_register(subpage, start_addr2, end_addr2,
2740 phys_offset, region_offset);
2741 p->region_offset = 0;
2745 region_offset += TARGET_PAGE_SIZE;
2746 addr += TARGET_PAGE_SIZE;
2747 } while (addr != end_addr);
2749 /* since each CPU stores ram addresses in its TLB cache, we must
2750 reset the modified entries */
2751 /* XXX: slow ! */
2752 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2753 tlb_flush(env, 1);
2757 /* XXX: temporary until new memory mapping API */
2758 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2760 PhysPageDesc *p;
2762 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2763 if (!p)
2764 return IO_MEM_UNASSIGNED;
2765 return p->phys_offset;
2768 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2770 if (kvm_enabled())
2771 kvm_coalesce_mmio_region(addr, size);
2774 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2776 if (kvm_enabled())
2777 kvm_uncoalesce_mmio_region(addr, size);
2780 void qemu_flush_coalesced_mmio_buffer(void)
2782 if (kvm_enabled())
2783 kvm_flush_coalesced_mmio_buffer();
2786 #if defined(__linux__) && !defined(TARGET_S390X)
2788 #include <sys/vfs.h>
2790 #define HUGETLBFS_MAGIC 0x958458f6
2792 static long gethugepagesize(const char *path)
2794 struct statfs fs;
2795 int ret;
2797 do {
2798 ret = statfs(path, &fs);
2799 } while (ret != 0 && errno == EINTR);
2801 if (ret != 0) {
2802 perror(path);
2803 return 0;
2806 if (fs.f_type != HUGETLBFS_MAGIC)
2807 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2809 return fs.f_bsize;
2812 static void *file_ram_alloc(RAMBlock *block,
2813 ram_addr_t memory,
2814 const char *path)
2816 char *filename;
2817 void *area;
2818 int fd;
2819 #ifdef MAP_POPULATE
2820 int flags;
2821 #endif
2822 unsigned long hpagesize;
2824 hpagesize = gethugepagesize(path);
2825 if (!hpagesize) {
2826 return NULL;
2829 if (memory < hpagesize) {
2830 return NULL;
2833 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2834 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2835 return NULL;
2838 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2839 return NULL;
2842 fd = mkstemp(filename);
2843 if (fd < 0) {
2844 perror("unable to create backing store for hugepages");
2845 free(filename);
2846 return NULL;
2848 unlink(filename);
2849 free(filename);
2851 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2854 * ftruncate is not supported by hugetlbfs in older
2855 * hosts, so don't bother bailing out on errors.
2856 * If anything goes wrong with it under other filesystems,
2857 * mmap will fail.
2859 if (ftruncate(fd, memory))
2860 perror("ftruncate");
2862 #ifdef MAP_POPULATE
2863 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2864 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2865 * to sidestep this quirk.
2867 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2868 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2869 #else
2870 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2871 #endif
2872 if (area == MAP_FAILED) {
2873 perror("file_ram_alloc: can't mmap RAM pages");
2874 close(fd);
2875 return (NULL);
2877 block->fd = fd;
2878 return area;
2880 #endif
2882 static ram_addr_t find_ram_offset(ram_addr_t size)
2884 RAMBlock *block, *next_block;
2885 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2887 if (QLIST_EMPTY(&ram_list.blocks))
2888 return 0;
2890 QLIST_FOREACH(block, &ram_list.blocks, next) {
2891 ram_addr_t end, next = RAM_ADDR_MAX;
2893 end = block->offset + block->length;
2895 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2896 if (next_block->offset >= end) {
2897 next = MIN(next, next_block->offset);
2900 if (next - end >= size && next - end < mingap) {
2901 offset = end;
2902 mingap = next - end;
2906 if (offset == RAM_ADDR_MAX) {
2907 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2908 (uint64_t)size);
2909 abort();
2912 return offset;
2915 static ram_addr_t last_ram_offset(void)
2917 RAMBlock *block;
2918 ram_addr_t last = 0;
2920 QLIST_FOREACH(block, &ram_list.blocks, next)
2921 last = MAX(last, block->offset + block->length);
2923 return last;
2926 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2927 ram_addr_t size, void *host,
2928 MemoryRegion *mr)
2930 RAMBlock *new_block, *block;
2932 size = TARGET_PAGE_ALIGN(size);
2933 new_block = g_malloc0(sizeof(*new_block));
2935 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2936 char *id = dev->parent_bus->info->get_dev_path(dev);
2937 if (id) {
2938 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2939 g_free(id);
2942 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2944 QLIST_FOREACH(block, &ram_list.blocks, next) {
2945 if (!strcmp(block->idstr, new_block->idstr)) {
2946 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2947 new_block->idstr);
2948 abort();
2952 new_block->offset = find_ram_offset(size);
2953 if (host) {
2954 new_block->host = host;
2955 new_block->flags |= RAM_PREALLOC_MASK;
2956 } else {
2957 if (mem_path) {
2958 #if defined (__linux__) && !defined(TARGET_S390X)
2959 new_block->host = file_ram_alloc(new_block, size, mem_path);
2960 if (!new_block->host) {
2961 new_block->host = qemu_vmalloc(size);
2962 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2964 #else
2965 fprintf(stderr, "-mem-path option unsupported\n");
2966 exit(1);
2967 #endif
2968 } else {
2969 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2970 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2971 an system defined value, which is at least 256GB. Larger systems
2972 have larger values. We put the guest between the end of data
2973 segment (system break) and this value. We use 32GB as a base to
2974 have enough room for the system break to grow. */
2975 new_block->host = mmap((void*)0x800000000, size,
2976 PROT_EXEC|PROT_READ|PROT_WRITE,
2977 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2978 if (new_block->host == MAP_FAILED) {
2979 fprintf(stderr, "Allocating RAM failed\n");
2980 abort();
2982 #else
2983 if (xen_enabled()) {
2984 xen_ram_alloc(new_block->offset, size, mr);
2985 } else {
2986 new_block->host = qemu_vmalloc(size);
2988 #endif
2989 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2992 new_block->length = size;
2994 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2996 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2997 last_ram_offset() >> TARGET_PAGE_BITS);
2998 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2999 0xff, size >> TARGET_PAGE_BITS);
3001 if (kvm_enabled())
3002 kvm_setup_guest_memory(new_block->host, size);
3004 return new_block->offset;
3007 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size,
3008 MemoryRegion *mr)
3010 return qemu_ram_alloc_from_ptr(dev, name, size, NULL, mr);
3013 void qemu_ram_free_from_ptr(ram_addr_t addr)
3015 RAMBlock *block;
3017 QLIST_FOREACH(block, &ram_list.blocks, next) {
3018 if (addr == block->offset) {
3019 QLIST_REMOVE(block, next);
3020 g_free(block);
3021 return;
3026 void qemu_ram_free(ram_addr_t addr)
3028 RAMBlock *block;
3030 QLIST_FOREACH(block, &ram_list.blocks, next) {
3031 if (addr == block->offset) {
3032 QLIST_REMOVE(block, next);
3033 if (block->flags & RAM_PREALLOC_MASK) {
3035 } else if (mem_path) {
3036 #if defined (__linux__) && !defined(TARGET_S390X)
3037 if (block->fd) {
3038 munmap(block->host, block->length);
3039 close(block->fd);
3040 } else {
3041 qemu_vfree(block->host);
3043 #else
3044 abort();
3045 #endif
3046 } else {
3047 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3048 munmap(block->host, block->length);
3049 #else
3050 if (xen_enabled()) {
3051 xen_invalidate_map_cache_entry(block->host);
3052 } else {
3053 qemu_vfree(block->host);
3055 #endif
3057 g_free(block);
3058 return;
3064 #ifndef _WIN32
3065 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3067 RAMBlock *block;
3068 ram_addr_t offset;
3069 int flags;
3070 void *area, *vaddr;
3072 QLIST_FOREACH(block, &ram_list.blocks, next) {
3073 offset = addr - block->offset;
3074 if (offset < block->length) {
3075 vaddr = block->host + offset;
3076 if (block->flags & RAM_PREALLOC_MASK) {
3078 } else {
3079 flags = MAP_FIXED;
3080 munmap(vaddr, length);
3081 if (mem_path) {
3082 #if defined(__linux__) && !defined(TARGET_S390X)
3083 if (block->fd) {
3084 #ifdef MAP_POPULATE
3085 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3086 MAP_PRIVATE;
3087 #else
3088 flags |= MAP_PRIVATE;
3089 #endif
3090 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3091 flags, block->fd, offset);
3092 } else {
3093 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3094 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3095 flags, -1, 0);
3097 #else
3098 abort();
3099 #endif
3100 } else {
3101 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3102 flags |= MAP_SHARED | MAP_ANONYMOUS;
3103 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3104 flags, -1, 0);
3105 #else
3106 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3107 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3108 flags, -1, 0);
3109 #endif
3111 if (area != vaddr) {
3112 fprintf(stderr, "Could not remap addr: "
3113 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3114 length, addr);
3115 exit(1);
3117 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3119 return;
3123 #endif /* !_WIN32 */
3125 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3126 With the exception of the softmmu code in this file, this should
3127 only be used for local memory (e.g. video ram) that the device owns,
3128 and knows it isn't going to access beyond the end of the block.
3130 It should not be used for general purpose DMA.
3131 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3133 void *qemu_get_ram_ptr(ram_addr_t addr)
3135 RAMBlock *block;
3137 QLIST_FOREACH(block, &ram_list.blocks, next) {
3138 if (addr - block->offset < block->length) {
3139 /* Move this entry to to start of the list. */
3140 if (block != QLIST_FIRST(&ram_list.blocks)) {
3141 QLIST_REMOVE(block, next);
3142 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3144 if (xen_enabled()) {
3145 /* We need to check if the requested address is in the RAM
3146 * because we don't want to map the entire memory in QEMU.
3147 * In that case just map until the end of the page.
3149 if (block->offset == 0) {
3150 return xen_map_cache(addr, 0, 0);
3151 } else if (block->host == NULL) {
3152 block->host =
3153 xen_map_cache(block->offset, block->length, 1);
3156 return block->host + (addr - block->offset);
3160 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3161 abort();
3163 return NULL;
3166 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3167 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3169 void *qemu_safe_ram_ptr(ram_addr_t addr)
3171 RAMBlock *block;
3173 QLIST_FOREACH(block, &ram_list.blocks, next) {
3174 if (addr - block->offset < block->length) {
3175 if (xen_enabled()) {
3176 /* We need to check if the requested address is in the RAM
3177 * because we don't want to map the entire memory in QEMU.
3178 * In that case just map until the end of the page.
3180 if (block->offset == 0) {
3181 return xen_map_cache(addr, 0, 0);
3182 } else if (block->host == NULL) {
3183 block->host =
3184 xen_map_cache(block->offset, block->length, 1);
3187 return block->host + (addr - block->offset);
3191 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3192 abort();
3194 return NULL;
3197 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3198 * but takes a size argument */
3199 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3201 if (*size == 0) {
3202 return NULL;
3204 if (xen_enabled()) {
3205 return xen_map_cache(addr, *size, 1);
3206 } else {
3207 RAMBlock *block;
3209 QLIST_FOREACH(block, &ram_list.blocks, next) {
3210 if (addr - block->offset < block->length) {
3211 if (addr - block->offset + *size > block->length)
3212 *size = block->length - addr + block->offset;
3213 return block->host + (addr - block->offset);
3217 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3218 abort();
3222 void qemu_put_ram_ptr(void *addr)
3224 trace_qemu_put_ram_ptr(addr);
3227 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3229 RAMBlock *block;
3230 uint8_t *host = ptr;
3232 if (xen_enabled()) {
3233 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3234 return 0;
3237 QLIST_FOREACH(block, &ram_list.blocks, next) {
3238 /* This case append when the block is not mapped. */
3239 if (block->host == NULL) {
3240 continue;
3242 if (host - block->host < block->length) {
3243 *ram_addr = block->offset + (host - block->host);
3244 return 0;
3248 return -1;
3251 /* Some of the softmmu routines need to translate from a host pointer
3252 (typically a TLB entry) back to a ram offset. */
3253 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3255 ram_addr_t ram_addr;
3257 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3258 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3259 abort();
3261 return ram_addr;
3264 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3266 #ifdef DEBUG_UNASSIGNED
3267 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3268 #endif
3269 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3270 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3271 #endif
3272 return 0;
3275 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3277 #ifdef DEBUG_UNASSIGNED
3278 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3279 #endif
3280 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3281 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3282 #endif
3283 return 0;
3286 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3288 #ifdef DEBUG_UNASSIGNED
3289 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3290 #endif
3291 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3292 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3293 #endif
3294 return 0;
3297 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3299 #ifdef DEBUG_UNASSIGNED
3300 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3301 #endif
3302 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3303 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3304 #endif
3307 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3309 #ifdef DEBUG_UNASSIGNED
3310 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3311 #endif
3312 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3313 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3314 #endif
3317 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3319 #ifdef DEBUG_UNASSIGNED
3320 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3321 #endif
3322 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3323 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3324 #endif
3327 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3328 unassigned_mem_readb,
3329 unassigned_mem_readw,
3330 unassigned_mem_readl,
3333 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3334 unassigned_mem_writeb,
3335 unassigned_mem_writew,
3336 unassigned_mem_writel,
3339 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3340 uint32_t val)
3342 int dirty_flags;
3343 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3344 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3345 #if !defined(CONFIG_USER_ONLY)
3346 tb_invalidate_phys_page_fast(ram_addr, 1);
3347 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3348 #endif
3350 stb_p(qemu_get_ram_ptr(ram_addr), val);
3351 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3352 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3353 /* we remove the notdirty callback only if the code has been
3354 flushed */
3355 if (dirty_flags == 0xff)
3356 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3359 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3360 uint32_t val)
3362 int dirty_flags;
3363 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3364 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3365 #if !defined(CONFIG_USER_ONLY)
3366 tb_invalidate_phys_page_fast(ram_addr, 2);
3367 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3368 #endif
3370 stw_p(qemu_get_ram_ptr(ram_addr), val);
3371 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3372 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3373 /* we remove the notdirty callback only if the code has been
3374 flushed */
3375 if (dirty_flags == 0xff)
3376 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3379 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3380 uint32_t val)
3382 int dirty_flags;
3383 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3384 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3385 #if !defined(CONFIG_USER_ONLY)
3386 tb_invalidate_phys_page_fast(ram_addr, 4);
3387 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3388 #endif
3390 stl_p(qemu_get_ram_ptr(ram_addr), val);
3391 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3392 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3393 /* we remove the notdirty callback only if the code has been
3394 flushed */
3395 if (dirty_flags == 0xff)
3396 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3399 static CPUReadMemoryFunc * const error_mem_read[3] = {
3400 NULL, /* never used */
3401 NULL, /* never used */
3402 NULL, /* never used */
3405 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3406 notdirty_mem_writeb,
3407 notdirty_mem_writew,
3408 notdirty_mem_writel,
3411 /* Generate a debug exception if a watchpoint has been hit. */
3412 static void check_watchpoint(int offset, int len_mask, int flags)
3414 CPUState *env = cpu_single_env;
3415 target_ulong pc, cs_base;
3416 TranslationBlock *tb;
3417 target_ulong vaddr;
3418 CPUWatchpoint *wp;
3419 int cpu_flags;
3421 if (env->watchpoint_hit) {
3422 /* We re-entered the check after replacing the TB. Now raise
3423 * the debug interrupt so that is will trigger after the
3424 * current instruction. */
3425 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3426 return;
3428 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3429 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3430 if ((vaddr == (wp->vaddr & len_mask) ||
3431 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3432 wp->flags |= BP_WATCHPOINT_HIT;
3433 if (!env->watchpoint_hit) {
3434 env->watchpoint_hit = wp;
3435 tb = tb_find_pc(env->mem_io_pc);
3436 if (!tb) {
3437 cpu_abort(env, "check_watchpoint: could not find TB for "
3438 "pc=%p", (void *)env->mem_io_pc);
3440 cpu_restore_state(tb, env, env->mem_io_pc);
3441 tb_phys_invalidate(tb, -1);
3442 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3443 env->exception_index = EXCP_DEBUG;
3444 } else {
3445 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3446 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3448 cpu_resume_from_signal(env, NULL);
3450 } else {
3451 wp->flags &= ~BP_WATCHPOINT_HIT;
3456 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3457 so these check for a hit then pass through to the normal out-of-line
3458 phys routines. */
3459 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3461 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3462 return ldub_phys(addr);
3465 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3467 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3468 return lduw_phys(addr);
3471 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3473 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3474 return ldl_phys(addr);
3477 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3478 uint32_t val)
3480 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3481 stb_phys(addr, val);
3484 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3485 uint32_t val)
3487 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3488 stw_phys(addr, val);
3491 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3492 uint32_t val)
3494 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3495 stl_phys(addr, val);
3498 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3499 watch_mem_readb,
3500 watch_mem_readw,
3501 watch_mem_readl,
3504 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3505 watch_mem_writeb,
3506 watch_mem_writew,
3507 watch_mem_writel,
3510 static inline uint32_t subpage_readlen (subpage_t *mmio,
3511 target_phys_addr_t addr,
3512 unsigned int len)
3514 unsigned int idx = SUBPAGE_IDX(addr);
3515 #if defined(DEBUG_SUBPAGE)
3516 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3517 mmio, len, addr, idx);
3518 #endif
3520 addr += mmio->region_offset[idx];
3521 idx = mmio->sub_io_index[idx];
3522 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3525 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3526 uint32_t value, unsigned int len)
3528 unsigned int idx = SUBPAGE_IDX(addr);
3529 #if defined(DEBUG_SUBPAGE)
3530 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3531 __func__, mmio, len, addr, idx, value);
3532 #endif
3534 addr += mmio->region_offset[idx];
3535 idx = mmio->sub_io_index[idx];
3536 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3539 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3541 return subpage_readlen(opaque, addr, 0);
3544 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3545 uint32_t value)
3547 subpage_writelen(opaque, addr, value, 0);
3550 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3552 return subpage_readlen(opaque, addr, 1);
3555 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3556 uint32_t value)
3558 subpage_writelen(opaque, addr, value, 1);
3561 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3563 return subpage_readlen(opaque, addr, 2);
3566 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3567 uint32_t value)
3569 subpage_writelen(opaque, addr, value, 2);
3572 static CPUReadMemoryFunc * const subpage_read[] = {
3573 &subpage_readb,
3574 &subpage_readw,
3575 &subpage_readl,
3578 static CPUWriteMemoryFunc * const subpage_write[] = {
3579 &subpage_writeb,
3580 &subpage_writew,
3581 &subpage_writel,
3584 static uint32_t subpage_ram_readb(void *opaque, target_phys_addr_t addr)
3586 ram_addr_t raddr = addr;
3587 void *ptr = qemu_get_ram_ptr(raddr);
3588 return ldub_p(ptr);
3591 static void subpage_ram_writeb(void *opaque, target_phys_addr_t addr,
3592 uint32_t value)
3594 ram_addr_t raddr = addr;
3595 void *ptr = qemu_get_ram_ptr(raddr);
3596 stb_p(ptr, value);
3599 static uint32_t subpage_ram_readw(void *opaque, target_phys_addr_t addr)
3601 ram_addr_t raddr = addr;
3602 void *ptr = qemu_get_ram_ptr(raddr);
3603 return lduw_p(ptr);
3606 static void subpage_ram_writew(void *opaque, target_phys_addr_t addr,
3607 uint32_t value)
3609 ram_addr_t raddr = addr;
3610 void *ptr = qemu_get_ram_ptr(raddr);
3611 stw_p(ptr, value);
3614 static uint32_t subpage_ram_readl(void *opaque, target_phys_addr_t addr)
3616 ram_addr_t raddr = addr;
3617 void *ptr = qemu_get_ram_ptr(raddr);
3618 return ldl_p(ptr);
3621 static void subpage_ram_writel(void *opaque, target_phys_addr_t addr,
3622 uint32_t value)
3624 ram_addr_t raddr = addr;
3625 void *ptr = qemu_get_ram_ptr(raddr);
3626 stl_p(ptr, value);
3629 static CPUReadMemoryFunc * const subpage_ram_read[] = {
3630 &subpage_ram_readb,
3631 &subpage_ram_readw,
3632 &subpage_ram_readl,
3635 static CPUWriteMemoryFunc * const subpage_ram_write[] = {
3636 &subpage_ram_writeb,
3637 &subpage_ram_writew,
3638 &subpage_ram_writel,
3641 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3642 ram_addr_t memory, ram_addr_t region_offset)
3644 int idx, eidx;
3646 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3647 return -1;
3648 idx = SUBPAGE_IDX(start);
3649 eidx = SUBPAGE_IDX(end);
3650 #if defined(DEBUG_SUBPAGE)
3651 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3652 mmio, start, end, idx, eidx, memory);
3653 #endif
3654 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
3655 memory = IO_MEM_SUBPAGE_RAM;
3657 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3658 for (; idx <= eidx; idx++) {
3659 mmio->sub_io_index[idx] = memory;
3660 mmio->region_offset[idx] = region_offset;
3663 return 0;
3666 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3667 ram_addr_t orig_memory,
3668 ram_addr_t region_offset)
3670 subpage_t *mmio;
3671 int subpage_memory;
3673 mmio = g_malloc0(sizeof(subpage_t));
3675 mmio->base = base;
3676 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3677 DEVICE_NATIVE_ENDIAN);
3678 #if defined(DEBUG_SUBPAGE)
3679 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3680 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3681 #endif
3682 *phys = subpage_memory | IO_MEM_SUBPAGE;
3683 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3685 return mmio;
3688 static int get_free_io_mem_idx(void)
3690 int i;
3692 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3693 if (!io_mem_used[i]) {
3694 io_mem_used[i] = 1;
3695 return i;
3697 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3698 return -1;
3702 * Usually, devices operate in little endian mode. There are devices out
3703 * there that operate in big endian too. Each device gets byte swapped
3704 * mmio if plugged onto a CPU that does the other endianness.
3706 * CPU Device swap?
3708 * little little no
3709 * little big yes
3710 * big little yes
3711 * big big no
3714 typedef struct SwapEndianContainer {
3715 CPUReadMemoryFunc *read[3];
3716 CPUWriteMemoryFunc *write[3];
3717 void *opaque;
3718 } SwapEndianContainer;
3720 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3722 uint32_t val;
3723 SwapEndianContainer *c = opaque;
3724 val = c->read[0](c->opaque, addr);
3725 return val;
3728 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3730 uint32_t val;
3731 SwapEndianContainer *c = opaque;
3732 val = bswap16(c->read[1](c->opaque, addr));
3733 return val;
3736 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3738 uint32_t val;
3739 SwapEndianContainer *c = opaque;
3740 val = bswap32(c->read[2](c->opaque, addr));
3741 return val;
3744 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3745 swapendian_mem_readb,
3746 swapendian_mem_readw,
3747 swapendian_mem_readl
3750 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3751 uint32_t val)
3753 SwapEndianContainer *c = opaque;
3754 c->write[0](c->opaque, addr, val);
3757 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3758 uint32_t val)
3760 SwapEndianContainer *c = opaque;
3761 c->write[1](c->opaque, addr, bswap16(val));
3764 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3765 uint32_t val)
3767 SwapEndianContainer *c = opaque;
3768 c->write[2](c->opaque, addr, bswap32(val));
3771 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3772 swapendian_mem_writeb,
3773 swapendian_mem_writew,
3774 swapendian_mem_writel
3777 static void swapendian_init(int io_index)
3779 SwapEndianContainer *c = g_malloc(sizeof(SwapEndianContainer));
3780 int i;
3782 /* Swap mmio for big endian targets */
3783 c->opaque = io_mem_opaque[io_index];
3784 for (i = 0; i < 3; i++) {
3785 c->read[i] = io_mem_read[io_index][i];
3786 c->write[i] = io_mem_write[io_index][i];
3788 io_mem_read[io_index][i] = swapendian_readfn[i];
3789 io_mem_write[io_index][i] = swapendian_writefn[i];
3791 io_mem_opaque[io_index] = c;
3794 static void swapendian_del(int io_index)
3796 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3797 g_free(io_mem_opaque[io_index]);
3801 /* mem_read and mem_write are arrays of functions containing the
3802 function to access byte (index 0), word (index 1) and dword (index
3803 2). Functions can be omitted with a NULL function pointer.
3804 If io_index is non zero, the corresponding io zone is
3805 modified. If it is zero, a new io zone is allocated. The return
3806 value can be used with cpu_register_physical_memory(). (-1) is
3807 returned if error. */
3808 static int cpu_register_io_memory_fixed(int io_index,
3809 CPUReadMemoryFunc * const *mem_read,
3810 CPUWriteMemoryFunc * const *mem_write,
3811 void *opaque, enum device_endian endian)
3813 int i;
3815 if (io_index <= 0) {
3816 io_index = get_free_io_mem_idx();
3817 if (io_index == -1)
3818 return io_index;
3819 } else {
3820 io_index >>= IO_MEM_SHIFT;
3821 if (io_index >= IO_MEM_NB_ENTRIES)
3822 return -1;
3825 for (i = 0; i < 3; ++i) {
3826 io_mem_read[io_index][i]
3827 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3829 for (i = 0; i < 3; ++i) {
3830 io_mem_write[io_index][i]
3831 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3833 io_mem_opaque[io_index] = opaque;
3835 switch (endian) {
3836 case DEVICE_BIG_ENDIAN:
3837 #ifndef TARGET_WORDS_BIGENDIAN
3838 swapendian_init(io_index);
3839 #endif
3840 break;
3841 case DEVICE_LITTLE_ENDIAN:
3842 #ifdef TARGET_WORDS_BIGENDIAN
3843 swapendian_init(io_index);
3844 #endif
3845 break;
3846 case DEVICE_NATIVE_ENDIAN:
3847 default:
3848 break;
3851 return (io_index << IO_MEM_SHIFT);
3854 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3855 CPUWriteMemoryFunc * const *mem_write,
3856 void *opaque, enum device_endian endian)
3858 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3861 void cpu_unregister_io_memory(int io_table_address)
3863 int i;
3864 int io_index = io_table_address >> IO_MEM_SHIFT;
3866 swapendian_del(io_index);
3868 for (i=0;i < 3; i++) {
3869 io_mem_read[io_index][i] = unassigned_mem_read[i];
3870 io_mem_write[io_index][i] = unassigned_mem_write[i];
3872 io_mem_opaque[io_index] = NULL;
3873 io_mem_used[io_index] = 0;
3876 static void io_mem_init(void)
3878 int i;
3880 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3881 unassigned_mem_write, NULL,
3882 DEVICE_NATIVE_ENDIAN);
3883 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3884 unassigned_mem_write, NULL,
3885 DEVICE_NATIVE_ENDIAN);
3886 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3887 notdirty_mem_write, NULL,
3888 DEVICE_NATIVE_ENDIAN);
3889 cpu_register_io_memory_fixed(IO_MEM_SUBPAGE_RAM, subpage_ram_read,
3890 subpage_ram_write, NULL,
3891 DEVICE_NATIVE_ENDIAN);
3892 for (i=0; i<5; i++)
3893 io_mem_used[i] = 1;
3895 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3896 watch_mem_write, NULL,
3897 DEVICE_NATIVE_ENDIAN);
3900 static void memory_map_init(void)
3902 system_memory = g_malloc(sizeof(*system_memory));
3903 memory_region_init(system_memory, "system", INT64_MAX);
3904 set_system_memory_map(system_memory);
3906 system_io = g_malloc(sizeof(*system_io));
3907 memory_region_init(system_io, "io", 65536);
3908 set_system_io_map(system_io);
3911 MemoryRegion *get_system_memory(void)
3913 return system_memory;
3916 MemoryRegion *get_system_io(void)
3918 return system_io;
3921 #endif /* !defined(CONFIG_USER_ONLY) */
3923 /* physical memory access (slow version, mainly for debug) */
3924 #if defined(CONFIG_USER_ONLY)
3925 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3926 uint8_t *buf, int len, int is_write)
3928 int l, flags;
3929 target_ulong page;
3930 void * p;
3932 while (len > 0) {
3933 page = addr & TARGET_PAGE_MASK;
3934 l = (page + TARGET_PAGE_SIZE) - addr;
3935 if (l > len)
3936 l = len;
3937 flags = page_get_flags(page);
3938 if (!(flags & PAGE_VALID))
3939 return -1;
3940 if (is_write) {
3941 if (!(flags & PAGE_WRITE))
3942 return -1;
3943 /* XXX: this code should not depend on lock_user */
3944 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3945 return -1;
3946 memcpy(p, buf, l);
3947 unlock_user(p, addr, l);
3948 } else {
3949 if (!(flags & PAGE_READ))
3950 return -1;
3951 /* XXX: this code should not depend on lock_user */
3952 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3953 return -1;
3954 memcpy(buf, p, l);
3955 unlock_user(p, addr, 0);
3957 len -= l;
3958 buf += l;
3959 addr += l;
3961 return 0;
3964 #else
3965 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3966 int len, int is_write)
3968 int l, io_index;
3969 uint8_t *ptr;
3970 uint32_t val;
3971 target_phys_addr_t page;
3972 ram_addr_t pd;
3973 PhysPageDesc *p;
3975 while (len > 0) {
3976 page = addr & TARGET_PAGE_MASK;
3977 l = (page + TARGET_PAGE_SIZE) - addr;
3978 if (l > len)
3979 l = len;
3980 p = phys_page_find(page >> TARGET_PAGE_BITS);
3981 if (!p) {
3982 pd = IO_MEM_UNASSIGNED;
3983 } else {
3984 pd = p->phys_offset;
3987 if (is_write) {
3988 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3989 target_phys_addr_t addr1 = addr;
3990 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3991 if (p)
3992 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3993 /* XXX: could force cpu_single_env to NULL to avoid
3994 potential bugs */
3995 if (l >= 4 && ((addr1 & 3) == 0)) {
3996 /* 32 bit write access */
3997 val = ldl_p(buf);
3998 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3999 l = 4;
4000 } else if (l >= 2 && ((addr1 & 1) == 0)) {
4001 /* 16 bit write access */
4002 val = lduw_p(buf);
4003 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
4004 l = 2;
4005 } else {
4006 /* 8 bit write access */
4007 val = ldub_p(buf);
4008 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
4009 l = 1;
4011 } else {
4012 ram_addr_t addr1;
4013 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4014 /* RAM case */
4015 ptr = qemu_get_ram_ptr(addr1);
4016 memcpy(ptr, buf, l);
4017 if (!cpu_physical_memory_is_dirty(addr1)) {
4018 /* invalidate code */
4019 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4020 /* set dirty bit */
4021 cpu_physical_memory_set_dirty_flags(
4022 addr1, (0xff & ~CODE_DIRTY_FLAG));
4024 qemu_put_ram_ptr(ptr);
4026 } else {
4027 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4028 !(pd & IO_MEM_ROMD)) {
4029 target_phys_addr_t addr1 = addr;
4030 /* I/O case */
4031 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4032 if (p)
4033 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4034 if (l >= 4 && ((addr1 & 3) == 0)) {
4035 /* 32 bit read access */
4036 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
4037 stl_p(buf, val);
4038 l = 4;
4039 } else if (l >= 2 && ((addr1 & 1) == 0)) {
4040 /* 16 bit read access */
4041 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
4042 stw_p(buf, val);
4043 l = 2;
4044 } else {
4045 /* 8 bit read access */
4046 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
4047 stb_p(buf, val);
4048 l = 1;
4050 } else {
4051 /* RAM case */
4052 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
4053 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
4054 qemu_put_ram_ptr(ptr);
4057 len -= l;
4058 buf += l;
4059 addr += l;
4063 /* used for ROM loading : can write in RAM and ROM */
4064 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
4065 const uint8_t *buf, int len)
4067 int l;
4068 uint8_t *ptr;
4069 target_phys_addr_t page;
4070 unsigned long pd;
4071 PhysPageDesc *p;
4073 while (len > 0) {
4074 page = addr & TARGET_PAGE_MASK;
4075 l = (page + TARGET_PAGE_SIZE) - addr;
4076 if (l > len)
4077 l = len;
4078 p = phys_page_find(page >> TARGET_PAGE_BITS);
4079 if (!p) {
4080 pd = IO_MEM_UNASSIGNED;
4081 } else {
4082 pd = p->phys_offset;
4085 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
4086 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
4087 !(pd & IO_MEM_ROMD)) {
4088 /* do nothing */
4089 } else {
4090 unsigned long addr1;
4091 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4092 /* ROM/RAM case */
4093 ptr = qemu_get_ram_ptr(addr1);
4094 memcpy(ptr, buf, l);
4095 qemu_put_ram_ptr(ptr);
4097 len -= l;
4098 buf += l;
4099 addr += l;
4103 typedef struct {
4104 void *buffer;
4105 target_phys_addr_t addr;
4106 target_phys_addr_t len;
4107 } BounceBuffer;
4109 static BounceBuffer bounce;
4111 typedef struct MapClient {
4112 void *opaque;
4113 void (*callback)(void *opaque);
4114 QLIST_ENTRY(MapClient) link;
4115 } MapClient;
4117 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4118 = QLIST_HEAD_INITIALIZER(map_client_list);
4120 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4122 MapClient *client = g_malloc(sizeof(*client));
4124 client->opaque = opaque;
4125 client->callback = callback;
4126 QLIST_INSERT_HEAD(&map_client_list, client, link);
4127 return client;
4130 void cpu_unregister_map_client(void *_client)
4132 MapClient *client = (MapClient *)_client;
4134 QLIST_REMOVE(client, link);
4135 g_free(client);
4138 static void cpu_notify_map_clients(void)
4140 MapClient *client;
4142 while (!QLIST_EMPTY(&map_client_list)) {
4143 client = QLIST_FIRST(&map_client_list);
4144 client->callback(client->opaque);
4145 cpu_unregister_map_client(client);
4149 /* Map a physical memory region into a host virtual address.
4150 * May map a subset of the requested range, given by and returned in *plen.
4151 * May return NULL if resources needed to perform the mapping are exhausted.
4152 * Use only for reads OR writes - not for read-modify-write operations.
4153 * Use cpu_register_map_client() to know when retrying the map operation is
4154 * likely to succeed.
4156 void *cpu_physical_memory_map(target_phys_addr_t addr,
4157 target_phys_addr_t *plen,
4158 int is_write)
4160 target_phys_addr_t len = *plen;
4161 target_phys_addr_t todo = 0;
4162 int l;
4163 target_phys_addr_t page;
4164 unsigned long pd;
4165 PhysPageDesc *p;
4166 ram_addr_t raddr = RAM_ADDR_MAX;
4167 ram_addr_t rlen;
4168 void *ret;
4170 while (len > 0) {
4171 page = addr & TARGET_PAGE_MASK;
4172 l = (page + TARGET_PAGE_SIZE) - addr;
4173 if (l > len)
4174 l = len;
4175 p = phys_page_find(page >> TARGET_PAGE_BITS);
4176 if (!p) {
4177 pd = IO_MEM_UNASSIGNED;
4178 } else {
4179 pd = p->phys_offset;
4182 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4183 if (todo || bounce.buffer) {
4184 break;
4186 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4187 bounce.addr = addr;
4188 bounce.len = l;
4189 if (!is_write) {
4190 cpu_physical_memory_read(addr, bounce.buffer, l);
4193 *plen = l;
4194 return bounce.buffer;
4196 if (!todo) {
4197 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4200 len -= l;
4201 addr += l;
4202 todo += l;
4204 rlen = todo;
4205 ret = qemu_ram_ptr_length(raddr, &rlen);
4206 *plen = rlen;
4207 return ret;
4210 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4211 * Will also mark the memory as dirty if is_write == 1. access_len gives
4212 * the amount of memory that was actually read or written by the caller.
4214 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4215 int is_write, target_phys_addr_t access_len)
4217 if (buffer != bounce.buffer) {
4218 if (is_write) {
4219 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4220 while (access_len) {
4221 unsigned l;
4222 l = TARGET_PAGE_SIZE;
4223 if (l > access_len)
4224 l = access_len;
4225 if (!cpu_physical_memory_is_dirty(addr1)) {
4226 /* invalidate code */
4227 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4228 /* set dirty bit */
4229 cpu_physical_memory_set_dirty_flags(
4230 addr1, (0xff & ~CODE_DIRTY_FLAG));
4232 addr1 += l;
4233 access_len -= l;
4236 if (xen_enabled()) {
4237 xen_invalidate_map_cache_entry(buffer);
4239 return;
4241 if (is_write) {
4242 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4244 qemu_vfree(bounce.buffer);
4245 bounce.buffer = NULL;
4246 cpu_notify_map_clients();
4249 /* warning: addr must be aligned */
4250 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4251 enum device_endian endian)
4253 int io_index;
4254 uint8_t *ptr;
4255 uint32_t val;
4256 unsigned long pd;
4257 PhysPageDesc *p;
4259 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4260 if (!p) {
4261 pd = IO_MEM_UNASSIGNED;
4262 } else {
4263 pd = p->phys_offset;
4266 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4267 !(pd & IO_MEM_ROMD)) {
4268 /* I/O case */
4269 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4270 if (p)
4271 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4272 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4273 #if defined(TARGET_WORDS_BIGENDIAN)
4274 if (endian == DEVICE_LITTLE_ENDIAN) {
4275 val = bswap32(val);
4277 #else
4278 if (endian == DEVICE_BIG_ENDIAN) {
4279 val = bswap32(val);
4281 #endif
4282 } else {
4283 /* RAM case */
4284 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4285 (addr & ~TARGET_PAGE_MASK);
4286 switch (endian) {
4287 case DEVICE_LITTLE_ENDIAN:
4288 val = ldl_le_p(ptr);
4289 break;
4290 case DEVICE_BIG_ENDIAN:
4291 val = ldl_be_p(ptr);
4292 break;
4293 default:
4294 val = ldl_p(ptr);
4295 break;
4298 return val;
4301 uint32_t ldl_phys(target_phys_addr_t addr)
4303 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4306 uint32_t ldl_le_phys(target_phys_addr_t addr)
4308 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4311 uint32_t ldl_be_phys(target_phys_addr_t addr)
4313 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4316 /* warning: addr must be aligned */
4317 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4318 enum device_endian endian)
4320 int io_index;
4321 uint8_t *ptr;
4322 uint64_t val;
4323 unsigned long pd;
4324 PhysPageDesc *p;
4326 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4327 if (!p) {
4328 pd = IO_MEM_UNASSIGNED;
4329 } else {
4330 pd = p->phys_offset;
4333 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4334 !(pd & IO_MEM_ROMD)) {
4335 /* I/O case */
4336 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4337 if (p)
4338 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4340 /* XXX This is broken when device endian != cpu endian.
4341 Fix and add "endian" variable check */
4342 #ifdef TARGET_WORDS_BIGENDIAN
4343 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4344 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4345 #else
4346 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4347 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4348 #endif
4349 } else {
4350 /* RAM case */
4351 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4352 (addr & ~TARGET_PAGE_MASK);
4353 switch (endian) {
4354 case DEVICE_LITTLE_ENDIAN:
4355 val = ldq_le_p(ptr);
4356 break;
4357 case DEVICE_BIG_ENDIAN:
4358 val = ldq_be_p(ptr);
4359 break;
4360 default:
4361 val = ldq_p(ptr);
4362 break;
4365 return val;
4368 uint64_t ldq_phys(target_phys_addr_t addr)
4370 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4373 uint64_t ldq_le_phys(target_phys_addr_t addr)
4375 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4378 uint64_t ldq_be_phys(target_phys_addr_t addr)
4380 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4383 /* XXX: optimize */
4384 uint32_t ldub_phys(target_phys_addr_t addr)
4386 uint8_t val;
4387 cpu_physical_memory_read(addr, &val, 1);
4388 return val;
4391 /* warning: addr must be aligned */
4392 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4393 enum device_endian endian)
4395 int io_index;
4396 uint8_t *ptr;
4397 uint64_t val;
4398 unsigned long pd;
4399 PhysPageDesc *p;
4401 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4402 if (!p) {
4403 pd = IO_MEM_UNASSIGNED;
4404 } else {
4405 pd = p->phys_offset;
4408 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4409 !(pd & IO_MEM_ROMD)) {
4410 /* I/O case */
4411 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4412 if (p)
4413 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4414 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4415 #if defined(TARGET_WORDS_BIGENDIAN)
4416 if (endian == DEVICE_LITTLE_ENDIAN) {
4417 val = bswap16(val);
4419 #else
4420 if (endian == DEVICE_BIG_ENDIAN) {
4421 val = bswap16(val);
4423 #endif
4424 } else {
4425 /* RAM case */
4426 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4427 (addr & ~TARGET_PAGE_MASK);
4428 switch (endian) {
4429 case DEVICE_LITTLE_ENDIAN:
4430 val = lduw_le_p(ptr);
4431 break;
4432 case DEVICE_BIG_ENDIAN:
4433 val = lduw_be_p(ptr);
4434 break;
4435 default:
4436 val = lduw_p(ptr);
4437 break;
4440 return val;
4443 uint32_t lduw_phys(target_phys_addr_t addr)
4445 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4448 uint32_t lduw_le_phys(target_phys_addr_t addr)
4450 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4453 uint32_t lduw_be_phys(target_phys_addr_t addr)
4455 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4458 /* warning: addr must be aligned. The ram page is not masked as dirty
4459 and the code inside is not invalidated. It is useful if the dirty
4460 bits are used to track modified PTEs */
4461 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4463 int io_index;
4464 uint8_t *ptr;
4465 unsigned long pd;
4466 PhysPageDesc *p;
4468 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4469 if (!p) {
4470 pd = IO_MEM_UNASSIGNED;
4471 } else {
4472 pd = p->phys_offset;
4475 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4476 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4477 if (p)
4478 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4479 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4480 } else {
4481 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4482 ptr = qemu_get_ram_ptr(addr1);
4483 stl_p(ptr, val);
4485 if (unlikely(in_migration)) {
4486 if (!cpu_physical_memory_is_dirty(addr1)) {
4487 /* invalidate code */
4488 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4489 /* set dirty bit */
4490 cpu_physical_memory_set_dirty_flags(
4491 addr1, (0xff & ~CODE_DIRTY_FLAG));
4497 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4499 int io_index;
4500 uint8_t *ptr;
4501 unsigned long pd;
4502 PhysPageDesc *p;
4504 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4505 if (!p) {
4506 pd = IO_MEM_UNASSIGNED;
4507 } else {
4508 pd = p->phys_offset;
4511 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4512 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4513 if (p)
4514 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4515 #ifdef TARGET_WORDS_BIGENDIAN
4516 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4517 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4518 #else
4519 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4520 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4521 #endif
4522 } else {
4523 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4524 (addr & ~TARGET_PAGE_MASK);
4525 stq_p(ptr, val);
4529 /* warning: addr must be aligned */
4530 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4531 enum device_endian endian)
4533 int io_index;
4534 uint8_t *ptr;
4535 unsigned long pd;
4536 PhysPageDesc *p;
4538 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4539 if (!p) {
4540 pd = IO_MEM_UNASSIGNED;
4541 } else {
4542 pd = p->phys_offset;
4545 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4546 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4547 if (p)
4548 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4549 #if defined(TARGET_WORDS_BIGENDIAN)
4550 if (endian == DEVICE_LITTLE_ENDIAN) {
4551 val = bswap32(val);
4553 #else
4554 if (endian == DEVICE_BIG_ENDIAN) {
4555 val = bswap32(val);
4557 #endif
4558 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4559 } else {
4560 unsigned long addr1;
4561 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4562 /* RAM case */
4563 ptr = qemu_get_ram_ptr(addr1);
4564 switch (endian) {
4565 case DEVICE_LITTLE_ENDIAN:
4566 stl_le_p(ptr, val);
4567 break;
4568 case DEVICE_BIG_ENDIAN:
4569 stl_be_p(ptr, val);
4570 break;
4571 default:
4572 stl_p(ptr, val);
4573 break;
4575 if (!cpu_physical_memory_is_dirty(addr1)) {
4576 /* invalidate code */
4577 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4578 /* set dirty bit */
4579 cpu_physical_memory_set_dirty_flags(addr1,
4580 (0xff & ~CODE_DIRTY_FLAG));
4585 void stl_phys(target_phys_addr_t addr, uint32_t val)
4587 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4590 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4592 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4595 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4597 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4600 /* XXX: optimize */
4601 void stb_phys(target_phys_addr_t addr, uint32_t val)
4603 uint8_t v = val;
4604 cpu_physical_memory_write(addr, &v, 1);
4607 /* warning: addr must be aligned */
4608 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4609 enum device_endian endian)
4611 int io_index;
4612 uint8_t *ptr;
4613 unsigned long pd;
4614 PhysPageDesc *p;
4616 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4617 if (!p) {
4618 pd = IO_MEM_UNASSIGNED;
4619 } else {
4620 pd = p->phys_offset;
4623 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4624 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4625 if (p)
4626 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4627 #if defined(TARGET_WORDS_BIGENDIAN)
4628 if (endian == DEVICE_LITTLE_ENDIAN) {
4629 val = bswap16(val);
4631 #else
4632 if (endian == DEVICE_BIG_ENDIAN) {
4633 val = bswap16(val);
4635 #endif
4636 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4637 } else {
4638 unsigned long addr1;
4639 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4640 /* RAM case */
4641 ptr = qemu_get_ram_ptr(addr1);
4642 switch (endian) {
4643 case DEVICE_LITTLE_ENDIAN:
4644 stw_le_p(ptr, val);
4645 break;
4646 case DEVICE_BIG_ENDIAN:
4647 stw_be_p(ptr, val);
4648 break;
4649 default:
4650 stw_p(ptr, val);
4651 break;
4653 if (!cpu_physical_memory_is_dirty(addr1)) {
4654 /* invalidate code */
4655 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4656 /* set dirty bit */
4657 cpu_physical_memory_set_dirty_flags(addr1,
4658 (0xff & ~CODE_DIRTY_FLAG));
4663 void stw_phys(target_phys_addr_t addr, uint32_t val)
4665 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4668 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4670 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4673 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4675 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4678 /* XXX: optimize */
4679 void stq_phys(target_phys_addr_t addr, uint64_t val)
4681 val = tswap64(val);
4682 cpu_physical_memory_write(addr, &val, 8);
4685 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4687 val = cpu_to_le64(val);
4688 cpu_physical_memory_write(addr, &val, 8);
4691 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4693 val = cpu_to_be64(val);
4694 cpu_physical_memory_write(addr, &val, 8);
4697 /* virtual memory access for debug (includes writing to ROM) */
4698 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4699 uint8_t *buf, int len, int is_write)
4701 int l;
4702 target_phys_addr_t phys_addr;
4703 target_ulong page;
4705 while (len > 0) {
4706 page = addr & TARGET_PAGE_MASK;
4707 phys_addr = cpu_get_phys_page_debug(env, page);
4708 /* if no physical page mapped, return an error */
4709 if (phys_addr == -1)
4710 return -1;
4711 l = (page + TARGET_PAGE_SIZE) - addr;
4712 if (l > len)
4713 l = len;
4714 phys_addr += (addr & ~TARGET_PAGE_MASK);
4715 if (is_write)
4716 cpu_physical_memory_write_rom(phys_addr, buf, l);
4717 else
4718 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4719 len -= l;
4720 buf += l;
4721 addr += l;
4723 return 0;
4725 #endif
4727 /* in deterministic execution mode, instructions doing device I/Os
4728 must be at the end of the TB */
4729 void cpu_io_recompile(CPUState *env, void *retaddr)
4731 TranslationBlock *tb;
4732 uint32_t n, cflags;
4733 target_ulong pc, cs_base;
4734 uint64_t flags;
4736 tb = tb_find_pc((unsigned long)retaddr);
4737 if (!tb) {
4738 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4739 retaddr);
4741 n = env->icount_decr.u16.low + tb->icount;
4742 cpu_restore_state(tb, env, (unsigned long)retaddr);
4743 /* Calculate how many instructions had been executed before the fault
4744 occurred. */
4745 n = n - env->icount_decr.u16.low;
4746 /* Generate a new TB ending on the I/O insn. */
4747 n++;
4748 /* On MIPS and SH, delay slot instructions can only be restarted if
4749 they were already the first instruction in the TB. If this is not
4750 the first instruction in a TB then re-execute the preceding
4751 branch. */
4752 #if defined(TARGET_MIPS)
4753 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4754 env->active_tc.PC -= 4;
4755 env->icount_decr.u16.low++;
4756 env->hflags &= ~MIPS_HFLAG_BMASK;
4758 #elif defined(TARGET_SH4)
4759 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4760 && n > 1) {
4761 env->pc -= 2;
4762 env->icount_decr.u16.low++;
4763 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4765 #endif
4766 /* This should never happen. */
4767 if (n > CF_COUNT_MASK)
4768 cpu_abort(env, "TB too big during recompile");
4770 cflags = n | CF_LAST_IO;
4771 pc = tb->pc;
4772 cs_base = tb->cs_base;
4773 flags = tb->flags;
4774 tb_phys_invalidate(tb, -1);
4775 /* FIXME: In theory this could raise an exception. In practice
4776 we have already translated the block once so it's probably ok. */
4777 tb_gen_code(env, pc, cs_base, flags, cflags);
4778 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4779 the first in the TB) then we end up generating a whole new TB and
4780 repeating the fault, which is horribly inefficient.
4781 Better would be to execute just this insn uncached, or generate a
4782 second new TB. */
4783 cpu_resume_from_signal(env, NULL);
4786 #if !defined(CONFIG_USER_ONLY)
4788 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4790 int i, target_code_size, max_target_code_size;
4791 int direct_jmp_count, direct_jmp2_count, cross_page;
4792 TranslationBlock *tb;
4794 target_code_size = 0;
4795 max_target_code_size = 0;
4796 cross_page = 0;
4797 direct_jmp_count = 0;
4798 direct_jmp2_count = 0;
4799 for(i = 0; i < nb_tbs; i++) {
4800 tb = &tbs[i];
4801 target_code_size += tb->size;
4802 if (tb->size > max_target_code_size)
4803 max_target_code_size = tb->size;
4804 if (tb->page_addr[1] != -1)
4805 cross_page++;
4806 if (tb->tb_next_offset[0] != 0xffff) {
4807 direct_jmp_count++;
4808 if (tb->tb_next_offset[1] != 0xffff) {
4809 direct_jmp2_count++;
4813 /* XXX: avoid using doubles ? */
4814 cpu_fprintf(f, "Translation buffer state:\n");
4815 cpu_fprintf(f, "gen code size %td/%ld\n",
4816 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4817 cpu_fprintf(f, "TB count %d/%d\n",
4818 nb_tbs, code_gen_max_blocks);
4819 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4820 nb_tbs ? target_code_size / nb_tbs : 0,
4821 max_target_code_size);
4822 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4823 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4824 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4825 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4826 cross_page,
4827 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4828 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4829 direct_jmp_count,
4830 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4831 direct_jmp2_count,
4832 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4833 cpu_fprintf(f, "\nStatistics:\n");
4834 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4835 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4836 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4837 tcg_dump_info(f, cpu_fprintf);
4840 #define MMUSUFFIX _cmmu
4841 #undef GETPC
4842 #define GETPC() NULL
4843 #define env cpu_single_env
4844 #define SOFTMMU_CODE_ACCESS
4846 #define SHIFT 0
4847 #include "softmmu_template.h"
4849 #define SHIFT 1
4850 #include "softmmu_template.h"
4852 #define SHIFT 2
4853 #include "softmmu_template.h"
4855 #define SHIFT 3
4856 #include "softmmu_template.h"
4858 #undef env
4860 #endif