pci-assign: Re-order initfn for memory API
[qemu-kvm.git] / exec.c
blobff514031bd585a1f80274edf2e1a44661b905c79
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "cache-utils.h"
31 #if !defined(TARGET_IA64)
32 #include "tcg.h"
33 #endif
35 #include "hw/hw.h"
36 #include "hw/qdev.h"
37 #include "osdep.h"
38 #include "kvm.h"
39 #include "hw/xen.h"
40 #include "qemu-timer.h"
41 #include "memory.h"
42 #include "exec-memory.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
46 #include <sys/param.h>
47 #if __FreeBSD_version >= 700104
48 #define HAVE_KINFO_GETVMMAP
49 #define sigqueue sigqueue_freebsd /* avoid redefinition */
50 #include <sys/time.h>
51 #include <sys/proc.h>
52 #include <machine/profile.h>
53 #define _KERNEL
54 #include <sys/user.h>
55 #undef _KERNEL
56 #undef sigqueue
57 #include <libutil.h>
58 #endif
59 #endif
60 #else /* !CONFIG_USER_ONLY */
61 #include "xen-mapcache.h"
62 #include "trace.h"
63 #endif
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_TLB
68 //#define DEBUG_UNASSIGNED
70 /* make various TB consistency checks */
71 //#define DEBUG_TB_CHECK
72 //#define DEBUG_TLB_CHECK
74 //#define DEBUG_IOPORT
75 //#define DEBUG_SUBPAGE
77 #if !defined(CONFIG_USER_ONLY)
78 /* TB consistency checks only implemented for usermode emulation. */
79 #undef DEBUG_TB_CHECK
80 #endif
82 #define SMC_BITMAP_USE_THRESHOLD 10
84 static TranslationBlock *tbs;
85 static int code_gen_max_blocks;
86 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
87 static int nb_tbs;
88 /* any access to the tbs or the page table must use this lock */
89 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
91 #if defined(__arm__) || defined(__sparc_v9__)
92 /* The prologue must be reachable with a direct jump. ARM and Sparc64
93 have limited branch ranges (possibly also PPC) so place it in a
94 section close to code segment. */
95 #define code_gen_section \
96 __attribute__((__section__(".gen_code"))) \
97 __attribute__((aligned (32)))
98 #elif defined(_WIN32)
99 /* Maximum alignment for Win32 is 16. */
100 #define code_gen_section \
101 __attribute__((aligned (16)))
102 #else
103 #define code_gen_section \
104 __attribute__((aligned (32)))
105 #endif
107 uint8_t code_gen_prologue[1024] code_gen_section;
108 static uint8_t *code_gen_buffer;
109 static unsigned long code_gen_buffer_size;
110 /* threshold to flush the translated code buffer */
111 static unsigned long code_gen_buffer_max_size;
112 static uint8_t *code_gen_ptr;
114 #if !defined(CONFIG_USER_ONLY)
115 int phys_ram_fd;
116 static int in_migration;
118 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
120 static MemoryRegion *system_memory;
121 static MemoryRegion *system_io;
123 #endif
125 CPUState *first_cpu;
126 /* current CPU in the current thread. It is only valid inside
127 cpu_exec() */
128 CPUState *cpu_single_env;
129 /* 0 = Do not count executed instructions.
130 1 = Precise instruction counting.
131 2 = Adaptive rate instruction counting. */
132 int use_icount = 0;
133 /* Current instruction counter. While executing translated code this may
134 include some instructions that have not yet been executed. */
135 int64_t qemu_icount;
137 typedef struct PageDesc {
138 /* list of TBs intersecting this ram page */
139 TranslationBlock *first_tb;
140 /* in order to optimize self modifying code, we count the number
141 of lookups we do to a given page to use a bitmap */
142 unsigned int code_write_count;
143 uint8_t *code_bitmap;
144 #if defined(CONFIG_USER_ONLY)
145 unsigned long flags;
146 #endif
147 } PageDesc;
149 /* In system mode we want L1_MAP to be based on ram offsets,
150 while in user mode we want it to be based on virtual addresses. */
151 #if !defined(CONFIG_USER_ONLY)
152 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
153 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
154 #else
155 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
156 #endif
157 #else
158 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
159 #endif
161 /* Size of the L2 (and L3, etc) page tables. */
162 #define L2_BITS 10
163 #define L2_SIZE (1 << L2_BITS)
165 /* The bits remaining after N lower levels of page tables. */
166 #define P_L1_BITS_REM \
167 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168 #define V_L1_BITS_REM \
169 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
171 /* Size of the L1 page table. Avoid silly small sizes. */
172 #if P_L1_BITS_REM < 4
173 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
174 #else
175 #define P_L1_BITS P_L1_BITS_REM
176 #endif
178 #if V_L1_BITS_REM < 4
179 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
180 #else
181 #define V_L1_BITS V_L1_BITS_REM
182 #endif
184 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
185 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
187 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
188 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
190 unsigned long qemu_real_host_page_size;
191 unsigned long qemu_host_page_size;
192 unsigned long qemu_host_page_mask;
194 /* This is a multi-level map on the virtual address space.
195 The bottom level has pointers to PageDesc. */
196 static void *l1_map[V_L1_SIZE];
198 #if !defined(CONFIG_USER_ONLY)
199 typedef struct PhysPageDesc {
200 /* offset in host memory of the page + io_index in the low bits */
201 ram_addr_t phys_offset;
202 ram_addr_t region_offset;
203 } PhysPageDesc;
205 /* This is a multi-level map on the physical address space.
206 The bottom level has pointers to PhysPageDesc. */
207 static void *l1_phys_map[P_L1_SIZE];
209 static void io_mem_init(void);
210 static void memory_map_init(void);
212 /* io memory support */
213 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
214 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
215 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
216 static char io_mem_used[IO_MEM_NB_ENTRIES];
217 static int io_mem_watch;
218 #endif
220 /* log support */
221 #ifdef WIN32
222 static const char *logfilename = "qemu.log";
223 #else
224 static const char *logfilename = "/tmp/qemu.log";
225 #endif
226 FILE *logfile;
227 int loglevel;
228 static int log_append = 0;
230 /* statistics */
231 #if !defined(CONFIG_USER_ONLY)
232 static int tlb_flush_count;
233 #endif
234 static int tb_flush_count;
235 static int tb_phys_invalidate_count;
237 #ifdef _WIN32
238 static void map_exec(void *addr, long size)
240 DWORD old_protect;
241 VirtualProtect(addr, size,
242 PAGE_EXECUTE_READWRITE, &old_protect);
245 #else
246 static void map_exec(void *addr, long size)
248 unsigned long start, end, page_size;
250 page_size = getpagesize();
251 start = (unsigned long)addr;
252 start &= ~(page_size - 1);
254 end = (unsigned long)addr + size;
255 end += page_size - 1;
256 end &= ~(page_size - 1);
258 mprotect((void *)start, end - start,
259 PROT_READ | PROT_WRITE | PROT_EXEC);
261 #endif
263 static void page_init(void)
265 /* NOTE: we can always suppose that qemu_host_page_size >=
266 TARGET_PAGE_SIZE */
267 #ifdef _WIN32
269 SYSTEM_INFO system_info;
271 GetSystemInfo(&system_info);
272 qemu_real_host_page_size = system_info.dwPageSize;
274 #else
275 qemu_real_host_page_size = getpagesize();
276 #endif
277 if (qemu_host_page_size == 0)
278 qemu_host_page_size = qemu_real_host_page_size;
279 if (qemu_host_page_size < TARGET_PAGE_SIZE)
280 qemu_host_page_size = TARGET_PAGE_SIZE;
281 qemu_host_page_mask = ~(qemu_host_page_size - 1);
283 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
285 #ifdef HAVE_KINFO_GETVMMAP
286 struct kinfo_vmentry *freep;
287 int i, cnt;
289 freep = kinfo_getvmmap(getpid(), &cnt);
290 if (freep) {
291 mmap_lock();
292 for (i = 0; i < cnt; i++) {
293 unsigned long startaddr, endaddr;
295 startaddr = freep[i].kve_start;
296 endaddr = freep[i].kve_end;
297 if (h2g_valid(startaddr)) {
298 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
300 if (h2g_valid(endaddr)) {
301 endaddr = h2g(endaddr);
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
303 } else {
304 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
305 endaddr = ~0ul;
306 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
307 #endif
311 free(freep);
312 mmap_unlock();
314 #else
315 FILE *f;
317 last_brk = (unsigned long)sbrk(0);
319 f = fopen("/compat/linux/proc/self/maps", "r");
320 if (f) {
321 mmap_lock();
323 do {
324 unsigned long startaddr, endaddr;
325 int n;
327 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
329 if (n == 2 && h2g_valid(startaddr)) {
330 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
332 if (h2g_valid(endaddr)) {
333 endaddr = h2g(endaddr);
334 } else {
335 endaddr = ~0ul;
337 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
339 } while (!feof(f));
341 fclose(f);
342 mmap_unlock();
344 #endif
346 #endif
349 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
351 PageDesc *pd;
352 void **lp;
353 int i;
355 #if defined(CONFIG_USER_ONLY)
356 /* We can't use g_malloc because it may recurse into a locked mutex. */
357 # define ALLOC(P, SIZE) \
358 do { \
359 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
360 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
361 } while (0)
362 #else
363 # define ALLOC(P, SIZE) \
364 do { P = g_malloc0(SIZE); } while (0)
365 #endif
367 /* Level 1. Always allocated. */
368 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
370 /* Level 2..N-1. */
371 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
372 void **p = *lp;
374 if (p == NULL) {
375 if (!alloc) {
376 return NULL;
378 ALLOC(p, sizeof(void *) * L2_SIZE);
379 *lp = p;
382 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
385 pd = *lp;
386 if (pd == NULL) {
387 if (!alloc) {
388 return NULL;
390 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
391 *lp = pd;
394 #undef ALLOC
396 return pd + (index & (L2_SIZE - 1));
399 static inline PageDesc *page_find(tb_page_addr_t index)
401 return page_find_alloc(index, 0);
404 #if !defined(CONFIG_USER_ONLY)
405 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
407 PhysPageDesc *pd;
408 void **lp;
409 int i;
411 /* Level 1. Always allocated. */
412 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
414 /* Level 2..N-1. */
415 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
416 void **p = *lp;
417 if (p == NULL) {
418 if (!alloc) {
419 return NULL;
421 *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
423 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
426 pd = *lp;
427 if (pd == NULL) {
428 int i;
430 if (!alloc) {
431 return NULL;
434 *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
436 for (i = 0; i < L2_SIZE; i++) {
437 pd[i].phys_offset = IO_MEM_UNASSIGNED;
438 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
442 return pd + (index & (L2_SIZE - 1));
445 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
447 return phys_page_find_alloc(index, 0);
450 static void tlb_protect_code(ram_addr_t ram_addr);
451 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
452 target_ulong vaddr);
453 #define mmap_lock() do { } while(0)
454 #define mmap_unlock() do { } while(0)
455 #endif
457 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
459 #if defined(CONFIG_USER_ONLY)
460 /* Currently it is not recommended to allocate big chunks of data in
461 user mode. It will change when a dedicated libc will be used */
462 #define USE_STATIC_CODE_GEN_BUFFER
463 #endif
465 #ifdef USE_STATIC_CODE_GEN_BUFFER
466 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
467 __attribute__((aligned (CODE_GEN_ALIGN)));
468 #endif
470 static void code_gen_alloc(unsigned long tb_size)
472 if (kvm_enabled())
473 return;
475 #ifdef USE_STATIC_CODE_GEN_BUFFER
476 code_gen_buffer = static_code_gen_buffer;
477 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
478 map_exec(code_gen_buffer, code_gen_buffer_size);
479 #else
480 code_gen_buffer_size = tb_size;
481 if (code_gen_buffer_size == 0) {
482 #if defined(CONFIG_USER_ONLY)
483 /* in user mode, phys_ram_size is not meaningful */
484 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
485 #else
486 /* XXX: needs adjustments */
487 code_gen_buffer_size = (unsigned long)(ram_size / 4);
488 #endif
490 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
491 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
492 /* The code gen buffer location may have constraints depending on
493 the host cpu and OS */
494 #if defined(__linux__)
496 int flags;
497 void *start = NULL;
499 flags = MAP_PRIVATE | MAP_ANONYMOUS;
500 #if defined(__x86_64__)
501 flags |= MAP_32BIT;
502 /* Cannot map more than that */
503 if (code_gen_buffer_size > (800 * 1024 * 1024))
504 code_gen_buffer_size = (800 * 1024 * 1024);
505 #elif defined(__sparc_v9__)
506 // Map the buffer below 2G, so we can use direct calls and branches
507 flags |= MAP_FIXED;
508 start = (void *) 0x60000000UL;
509 if (code_gen_buffer_size > (512 * 1024 * 1024))
510 code_gen_buffer_size = (512 * 1024 * 1024);
511 #elif defined(__arm__)
512 /* Map the buffer below 32M, so we can use direct calls and branches */
513 flags |= MAP_FIXED;
514 start = (void *) 0x01000000UL;
515 if (code_gen_buffer_size > 16 * 1024 * 1024)
516 code_gen_buffer_size = 16 * 1024 * 1024;
517 #elif defined(__s390x__)
518 /* Map the buffer so that we can use direct calls and branches. */
519 /* We have a +- 4GB range on the branches; leave some slop. */
520 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
521 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
523 start = (void *)0x90000000UL;
524 #endif
525 code_gen_buffer = mmap(start, code_gen_buffer_size,
526 PROT_WRITE | PROT_READ | PROT_EXEC,
527 flags, -1, 0);
528 if (code_gen_buffer == MAP_FAILED) {
529 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
530 exit(1);
533 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
534 || defined(__DragonFly__) || defined(__OpenBSD__) \
535 || defined(__NetBSD__)
537 int flags;
538 void *addr = NULL;
539 flags = MAP_PRIVATE | MAP_ANONYMOUS;
540 #if defined(__x86_64__)
541 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
542 * 0x40000000 is free */
543 flags |= MAP_FIXED;
544 addr = (void *)0x40000000;
545 /* Cannot map more than that */
546 if (code_gen_buffer_size > (800 * 1024 * 1024))
547 code_gen_buffer_size = (800 * 1024 * 1024);
548 #elif defined(__sparc_v9__)
549 // Map the buffer below 2G, so we can use direct calls and branches
550 flags |= MAP_FIXED;
551 addr = (void *) 0x60000000UL;
552 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
553 code_gen_buffer_size = (512 * 1024 * 1024);
555 #endif
556 code_gen_buffer = mmap(addr, code_gen_buffer_size,
557 PROT_WRITE | PROT_READ | PROT_EXEC,
558 flags, -1, 0);
559 if (code_gen_buffer == MAP_FAILED) {
560 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
561 exit(1);
564 #else
565 code_gen_buffer = g_malloc(code_gen_buffer_size);
566 map_exec(code_gen_buffer, code_gen_buffer_size);
567 #endif
568 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
569 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
570 code_gen_buffer_max_size = code_gen_buffer_size -
571 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
572 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
573 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
576 /* Must be called before using the QEMU cpus. 'tb_size' is the size
577 (in bytes) allocated to the translation buffer. Zero means default
578 size. */
579 void tcg_exec_init(unsigned long tb_size)
581 cpu_gen_init();
582 code_gen_alloc(tb_size);
583 code_gen_ptr = code_gen_buffer;
584 page_init();
585 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
586 /* There's no guest base to take into account, so go ahead and
587 initialize the prologue now. */
588 tcg_prologue_init(&tcg_ctx);
589 #endif
592 bool tcg_enabled(void)
594 return code_gen_buffer != NULL;
597 void cpu_exec_init_all(void)
599 #if !defined(CONFIG_USER_ONLY)
600 memory_map_init();
601 io_mem_init();
602 #endif
605 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
607 static int cpu_common_post_load(void *opaque, int version_id)
609 CPUState *env = opaque;
611 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
612 version_id is increased. */
613 env->interrupt_request &= ~0x01;
614 tlb_flush(env, 1);
616 return 0;
619 static const VMStateDescription vmstate_cpu_common = {
620 .name = "cpu_common",
621 .version_id = 1,
622 .minimum_version_id = 1,
623 .minimum_version_id_old = 1,
624 .post_load = cpu_common_post_load,
625 .fields = (VMStateField []) {
626 VMSTATE_UINT32(halted, CPUState),
627 VMSTATE_UINT32(interrupt_request, CPUState),
628 VMSTATE_END_OF_LIST()
631 #endif
633 CPUState *qemu_get_cpu(int cpu)
635 CPUState *env = first_cpu;
637 while (env) {
638 if (env->cpu_index == cpu)
639 break;
640 env = env->next_cpu;
643 return env;
646 void cpu_exec_init(CPUState *env)
648 CPUState **penv;
649 int cpu_index;
651 #if defined(CONFIG_USER_ONLY)
652 cpu_list_lock();
653 #endif
654 env->next_cpu = NULL;
655 penv = &first_cpu;
656 cpu_index = 0;
657 while (*penv != NULL) {
658 penv = &(*penv)->next_cpu;
659 cpu_index++;
661 env->cpu_index = cpu_index;
662 env->numa_node = 0;
663 QTAILQ_INIT(&env->breakpoints);
664 QTAILQ_INIT(&env->watchpoints);
665 #ifndef CONFIG_USER_ONLY
666 env->thread_id = qemu_get_thread_id();
667 #endif
668 *penv = env;
669 #if defined(CONFIG_USER_ONLY)
670 cpu_list_unlock();
671 #endif
672 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
673 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
674 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
675 cpu_save, cpu_load, env);
676 #endif
679 /* Allocate a new translation block. Flush the translation buffer if
680 too many translation blocks or too much generated code. */
681 static TranslationBlock *tb_alloc(target_ulong pc)
683 TranslationBlock *tb;
685 if (nb_tbs >= code_gen_max_blocks ||
686 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
687 return NULL;
688 tb = &tbs[nb_tbs++];
689 tb->pc = pc;
690 tb->cflags = 0;
691 return tb;
694 void tb_free(TranslationBlock *tb)
696 /* In practice this is mostly used for single use temporary TB
697 Ignore the hard cases and just back up if this TB happens to
698 be the last one generated. */
699 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
700 code_gen_ptr = tb->tc_ptr;
701 nb_tbs--;
705 static inline void invalidate_page_bitmap(PageDesc *p)
707 if (p->code_bitmap) {
708 g_free(p->code_bitmap);
709 p->code_bitmap = NULL;
711 p->code_write_count = 0;
714 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
716 static void page_flush_tb_1 (int level, void **lp)
718 int i;
720 if (*lp == NULL) {
721 return;
723 if (level == 0) {
724 PageDesc *pd = *lp;
725 for (i = 0; i < L2_SIZE; ++i) {
726 pd[i].first_tb = NULL;
727 invalidate_page_bitmap(pd + i);
729 } else {
730 void **pp = *lp;
731 for (i = 0; i < L2_SIZE; ++i) {
732 page_flush_tb_1 (level - 1, pp + i);
737 static void page_flush_tb(void)
739 int i;
740 for (i = 0; i < V_L1_SIZE; i++) {
741 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
745 /* flush all the translation blocks */
746 /* XXX: tb_flush is currently not thread safe */
747 void tb_flush(CPUState *env1)
749 CPUState *env;
750 #if defined(DEBUG_FLUSH)
751 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
752 (unsigned long)(code_gen_ptr - code_gen_buffer),
753 nb_tbs, nb_tbs > 0 ?
754 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
755 #endif
756 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
757 cpu_abort(env1, "Internal error: code buffer overflow\n");
759 nb_tbs = 0;
761 for(env = first_cpu; env != NULL; env = env->next_cpu) {
762 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
765 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
766 page_flush_tb();
768 code_gen_ptr = code_gen_buffer;
769 /* XXX: flush processor icache at this point if cache flush is
770 expensive */
771 tb_flush_count++;
774 #ifdef DEBUG_TB_CHECK
776 static void tb_invalidate_check(target_ulong address)
778 TranslationBlock *tb;
779 int i;
780 address &= TARGET_PAGE_MASK;
781 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
782 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
783 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
784 address >= tb->pc + tb->size)) {
785 printf("ERROR invalidate: address=" TARGET_FMT_lx
786 " PC=%08lx size=%04x\n",
787 address, (long)tb->pc, tb->size);
793 /* verify that all the pages have correct rights for code */
794 static void tb_page_check(void)
796 TranslationBlock *tb;
797 int i, flags1, flags2;
799 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
800 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
801 flags1 = page_get_flags(tb->pc);
802 flags2 = page_get_flags(tb->pc + tb->size - 1);
803 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
804 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
805 (long)tb->pc, tb->size, flags1, flags2);
811 #endif
813 /* invalidate one TB */
814 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
815 int next_offset)
817 TranslationBlock *tb1;
818 for(;;) {
819 tb1 = *ptb;
820 if (tb1 == tb) {
821 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
822 break;
824 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
828 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
830 TranslationBlock *tb1;
831 unsigned int n1;
833 for(;;) {
834 tb1 = *ptb;
835 n1 = (long)tb1 & 3;
836 tb1 = (TranslationBlock *)((long)tb1 & ~3);
837 if (tb1 == tb) {
838 *ptb = tb1->page_next[n1];
839 break;
841 ptb = &tb1->page_next[n1];
845 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
847 TranslationBlock *tb1, **ptb;
848 unsigned int n1;
850 ptb = &tb->jmp_next[n];
851 tb1 = *ptb;
852 if (tb1) {
853 /* find tb(n) in circular list */
854 for(;;) {
855 tb1 = *ptb;
856 n1 = (long)tb1 & 3;
857 tb1 = (TranslationBlock *)((long)tb1 & ~3);
858 if (n1 == n && tb1 == tb)
859 break;
860 if (n1 == 2) {
861 ptb = &tb1->jmp_first;
862 } else {
863 ptb = &tb1->jmp_next[n1];
866 /* now we can suppress tb(n) from the list */
867 *ptb = tb->jmp_next[n];
869 tb->jmp_next[n] = NULL;
873 /* reset the jump entry 'n' of a TB so that it is not chained to
874 another TB */
875 static inline void tb_reset_jump(TranslationBlock *tb, int n)
877 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
880 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
882 CPUState *env;
883 PageDesc *p;
884 unsigned int h, n1;
885 tb_page_addr_t phys_pc;
886 TranslationBlock *tb1, *tb2;
888 /* remove the TB from the hash list */
889 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
890 h = tb_phys_hash_func(phys_pc);
891 tb_remove(&tb_phys_hash[h], tb,
892 offsetof(TranslationBlock, phys_hash_next));
894 /* remove the TB from the page list */
895 if (tb->page_addr[0] != page_addr) {
896 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
897 tb_page_remove(&p->first_tb, tb);
898 invalidate_page_bitmap(p);
900 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
901 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
902 tb_page_remove(&p->first_tb, tb);
903 invalidate_page_bitmap(p);
906 tb_invalidated_flag = 1;
908 /* remove the TB from the hash list */
909 h = tb_jmp_cache_hash_func(tb->pc);
910 for(env = first_cpu; env != NULL; env = env->next_cpu) {
911 if (env->tb_jmp_cache[h] == tb)
912 env->tb_jmp_cache[h] = NULL;
915 /* suppress this TB from the two jump lists */
916 tb_jmp_remove(tb, 0);
917 tb_jmp_remove(tb, 1);
919 /* suppress any remaining jumps to this TB */
920 tb1 = tb->jmp_first;
921 for(;;) {
922 n1 = (long)tb1 & 3;
923 if (n1 == 2)
924 break;
925 tb1 = (TranslationBlock *)((long)tb1 & ~3);
926 tb2 = tb1->jmp_next[n1];
927 tb_reset_jump(tb1, n1);
928 tb1->jmp_next[n1] = NULL;
929 tb1 = tb2;
931 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
933 tb_phys_invalidate_count++;
936 static inline void set_bits(uint8_t *tab, int start, int len)
938 int end, mask, end1;
940 end = start + len;
941 tab += start >> 3;
942 mask = 0xff << (start & 7);
943 if ((start & ~7) == (end & ~7)) {
944 if (start < end) {
945 mask &= ~(0xff << (end & 7));
946 *tab |= mask;
948 } else {
949 *tab++ |= mask;
950 start = (start + 8) & ~7;
951 end1 = end & ~7;
952 while (start < end1) {
953 *tab++ = 0xff;
954 start += 8;
956 if (start < end) {
957 mask = ~(0xff << (end & 7));
958 *tab |= mask;
963 static void build_page_bitmap(PageDesc *p)
965 int n, tb_start, tb_end;
966 TranslationBlock *tb;
968 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
970 tb = p->first_tb;
971 while (tb != NULL) {
972 n = (long)tb & 3;
973 tb = (TranslationBlock *)((long)tb & ~3);
974 /* NOTE: this is subtle as a TB may span two physical pages */
975 if (n == 0) {
976 /* NOTE: tb_end may be after the end of the page, but
977 it is not a problem */
978 tb_start = tb->pc & ~TARGET_PAGE_MASK;
979 tb_end = tb_start + tb->size;
980 if (tb_end > TARGET_PAGE_SIZE)
981 tb_end = TARGET_PAGE_SIZE;
982 } else {
983 tb_start = 0;
984 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
986 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
987 tb = tb->page_next[n];
991 TranslationBlock *tb_gen_code(CPUState *env,
992 target_ulong pc, target_ulong cs_base,
993 int flags, int cflags)
995 TranslationBlock *tb;
996 uint8_t *tc_ptr;
997 tb_page_addr_t phys_pc, phys_page2;
998 target_ulong virt_page2;
999 int code_gen_size;
1001 phys_pc = get_page_addr_code(env, pc);
1002 tb = tb_alloc(pc);
1003 if (!tb) {
1004 /* flush must be done */
1005 tb_flush(env);
1006 /* cannot fail at this point */
1007 tb = tb_alloc(pc);
1008 /* Don't forget to invalidate previous TB info. */
1009 tb_invalidated_flag = 1;
1011 tc_ptr = code_gen_ptr;
1012 tb->tc_ptr = tc_ptr;
1013 tb->cs_base = cs_base;
1014 tb->flags = flags;
1015 tb->cflags = cflags;
1016 cpu_gen_code(env, tb, &code_gen_size);
1017 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1019 /* check next page if needed */
1020 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1021 phys_page2 = -1;
1022 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1023 phys_page2 = get_page_addr_code(env, virt_page2);
1025 tb_link_page(tb, phys_pc, phys_page2);
1026 return tb;
1029 /* invalidate all TBs which intersect with the target physical page
1030 starting in range [start;end[. NOTE: start and end must refer to
1031 the same physical page. 'is_cpu_write_access' should be true if called
1032 from a real cpu write access: the virtual CPU will exit the current
1033 TB if code is modified inside this TB. */
1034 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1035 int is_cpu_write_access)
1037 TranslationBlock *tb, *tb_next, *saved_tb;
1038 CPUState *env = cpu_single_env;
1039 tb_page_addr_t tb_start, tb_end;
1040 PageDesc *p;
1041 int n;
1042 #ifdef TARGET_HAS_PRECISE_SMC
1043 int current_tb_not_found = is_cpu_write_access;
1044 TranslationBlock *current_tb = NULL;
1045 int current_tb_modified = 0;
1046 target_ulong current_pc = 0;
1047 target_ulong current_cs_base = 0;
1048 int current_flags = 0;
1049 #endif /* TARGET_HAS_PRECISE_SMC */
1051 p = page_find(start >> TARGET_PAGE_BITS);
1052 if (!p)
1053 return;
1054 if (!p->code_bitmap &&
1055 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1056 is_cpu_write_access) {
1057 /* build code bitmap */
1058 build_page_bitmap(p);
1061 /* we remove all the TBs in the range [start, end[ */
1062 /* XXX: see if in some cases it could be faster to invalidate all the code */
1063 tb = p->first_tb;
1064 while (tb != NULL) {
1065 n = (long)tb & 3;
1066 tb = (TranslationBlock *)((long)tb & ~3);
1067 tb_next = tb->page_next[n];
1068 /* NOTE: this is subtle as a TB may span two physical pages */
1069 if (n == 0) {
1070 /* NOTE: tb_end may be after the end of the page, but
1071 it is not a problem */
1072 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1073 tb_end = tb_start + tb->size;
1074 } else {
1075 tb_start = tb->page_addr[1];
1076 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1078 if (!(tb_end <= start || tb_start >= end)) {
1079 #ifdef TARGET_HAS_PRECISE_SMC
1080 if (current_tb_not_found) {
1081 current_tb_not_found = 0;
1082 current_tb = NULL;
1083 if (env->mem_io_pc) {
1084 /* now we have a real cpu fault */
1085 current_tb = tb_find_pc(env->mem_io_pc);
1088 if (current_tb == tb &&
1089 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1090 /* If we are modifying the current TB, we must stop
1091 its execution. We could be more precise by checking
1092 that the modification is after the current PC, but it
1093 would require a specialized function to partially
1094 restore the CPU state */
1096 current_tb_modified = 1;
1097 cpu_restore_state(current_tb, env, env->mem_io_pc);
1098 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1099 &current_flags);
1101 #endif /* TARGET_HAS_PRECISE_SMC */
1102 /* we need to do that to handle the case where a signal
1103 occurs while doing tb_phys_invalidate() */
1104 saved_tb = NULL;
1105 if (env) {
1106 saved_tb = env->current_tb;
1107 env->current_tb = NULL;
1109 tb_phys_invalidate(tb, -1);
1110 if (env) {
1111 env->current_tb = saved_tb;
1112 if (env->interrupt_request && env->current_tb)
1113 cpu_interrupt(env, env->interrupt_request);
1116 tb = tb_next;
1118 #if !defined(CONFIG_USER_ONLY)
1119 /* if no code remaining, no need to continue to use slow writes */
1120 if (!p->first_tb) {
1121 invalidate_page_bitmap(p);
1122 if (is_cpu_write_access) {
1123 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1126 #endif
1127 #ifdef TARGET_HAS_PRECISE_SMC
1128 if (current_tb_modified) {
1129 /* we generate a block containing just the instruction
1130 modifying the memory. It will ensure that it cannot modify
1131 itself */
1132 env->current_tb = NULL;
1133 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1134 cpu_resume_from_signal(env, NULL);
1136 #endif
1139 /* len must be <= 8 and start must be a multiple of len */
1140 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1142 PageDesc *p;
1143 int offset, b;
1144 #if 0
1145 if (1) {
1146 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1147 cpu_single_env->mem_io_vaddr, len,
1148 cpu_single_env->eip,
1149 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1151 #endif
1152 p = page_find(start >> TARGET_PAGE_BITS);
1153 if (!p)
1154 return;
1155 if (p->code_bitmap) {
1156 offset = start & ~TARGET_PAGE_MASK;
1157 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1158 if (b & ((1 << len) - 1))
1159 goto do_invalidate;
1160 } else {
1161 do_invalidate:
1162 tb_invalidate_phys_page_range(start, start + len, 1);
1166 #if !defined(CONFIG_SOFTMMU)
1167 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1168 unsigned long pc, void *puc)
1170 TranslationBlock *tb;
1171 PageDesc *p;
1172 int n;
1173 #ifdef TARGET_HAS_PRECISE_SMC
1174 TranslationBlock *current_tb = NULL;
1175 CPUState *env = cpu_single_env;
1176 int current_tb_modified = 0;
1177 target_ulong current_pc = 0;
1178 target_ulong current_cs_base = 0;
1179 int current_flags = 0;
1180 #endif
1182 addr &= TARGET_PAGE_MASK;
1183 p = page_find(addr >> TARGET_PAGE_BITS);
1184 if (!p)
1185 return;
1186 tb = p->first_tb;
1187 #ifdef TARGET_HAS_PRECISE_SMC
1188 if (tb && pc != 0) {
1189 current_tb = tb_find_pc(pc);
1191 #endif
1192 while (tb != NULL) {
1193 n = (long)tb & 3;
1194 tb = (TranslationBlock *)((long)tb & ~3);
1195 #ifdef TARGET_HAS_PRECISE_SMC
1196 if (current_tb == tb &&
1197 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1198 /* If we are modifying the current TB, we must stop
1199 its execution. We could be more precise by checking
1200 that the modification is after the current PC, but it
1201 would require a specialized function to partially
1202 restore the CPU state */
1204 current_tb_modified = 1;
1205 cpu_restore_state(current_tb, env, pc);
1206 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1207 &current_flags);
1209 #endif /* TARGET_HAS_PRECISE_SMC */
1210 tb_phys_invalidate(tb, addr);
1211 tb = tb->page_next[n];
1213 p->first_tb = NULL;
1214 #ifdef TARGET_HAS_PRECISE_SMC
1215 if (current_tb_modified) {
1216 /* we generate a block containing just the instruction
1217 modifying the memory. It will ensure that it cannot modify
1218 itself */
1219 env->current_tb = NULL;
1220 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1221 cpu_resume_from_signal(env, puc);
1223 #endif
1225 #endif
1227 /* add the tb in the target page and protect it if necessary */
1228 static inline void tb_alloc_page(TranslationBlock *tb,
1229 unsigned int n, tb_page_addr_t page_addr)
1231 PageDesc *p;
1232 #ifndef CONFIG_USER_ONLY
1233 bool page_already_protected;
1234 #endif
1236 tb->page_addr[n] = page_addr;
1237 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1238 tb->page_next[n] = p->first_tb;
1239 #ifndef CONFIG_USER_ONLY
1240 page_already_protected = p->first_tb != NULL;
1241 #endif
1242 p->first_tb = (TranslationBlock *)((long)tb | n);
1243 invalidate_page_bitmap(p);
1245 #if defined(TARGET_HAS_SMC) || 1
1247 #if defined(CONFIG_USER_ONLY)
1248 if (p->flags & PAGE_WRITE) {
1249 target_ulong addr;
1250 PageDesc *p2;
1251 int prot;
1253 /* force the host page as non writable (writes will have a
1254 page fault + mprotect overhead) */
1255 page_addr &= qemu_host_page_mask;
1256 prot = 0;
1257 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1258 addr += TARGET_PAGE_SIZE) {
1260 p2 = page_find (addr >> TARGET_PAGE_BITS);
1261 if (!p2)
1262 continue;
1263 prot |= p2->flags;
1264 p2->flags &= ~PAGE_WRITE;
1266 mprotect(g2h(page_addr), qemu_host_page_size,
1267 (prot & PAGE_BITS) & ~PAGE_WRITE);
1268 #ifdef DEBUG_TB_INVALIDATE
1269 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1270 page_addr);
1271 #endif
1273 #else
1274 /* if some code is already present, then the pages are already
1275 protected. So we handle the case where only the first TB is
1276 allocated in a physical page */
1277 if (!page_already_protected) {
1278 tlb_protect_code(page_addr);
1280 #endif
1282 #endif /* TARGET_HAS_SMC */
1285 /* add a new TB and link it to the physical page tables. phys_page2 is
1286 (-1) to indicate that only one page contains the TB. */
1287 void tb_link_page(TranslationBlock *tb,
1288 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1290 unsigned int h;
1291 TranslationBlock **ptb;
1293 /* Grab the mmap lock to stop another thread invalidating this TB
1294 before we are done. */
1295 mmap_lock();
1296 /* add in the physical hash table */
1297 h = tb_phys_hash_func(phys_pc);
1298 ptb = &tb_phys_hash[h];
1299 tb->phys_hash_next = *ptb;
1300 *ptb = tb;
1302 /* add in the page list */
1303 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1304 if (phys_page2 != -1)
1305 tb_alloc_page(tb, 1, phys_page2);
1306 else
1307 tb->page_addr[1] = -1;
1309 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1310 tb->jmp_next[0] = NULL;
1311 tb->jmp_next[1] = NULL;
1313 /* init original jump addresses */
1314 if (tb->tb_next_offset[0] != 0xffff)
1315 tb_reset_jump(tb, 0);
1316 if (tb->tb_next_offset[1] != 0xffff)
1317 tb_reset_jump(tb, 1);
1319 #ifdef DEBUG_TB_CHECK
1320 tb_page_check();
1321 #endif
1322 mmap_unlock();
1325 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1326 tb[1].tc_ptr. Return NULL if not found */
1327 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1329 int m_min, m_max, m;
1330 unsigned long v;
1331 TranslationBlock *tb;
1333 if (nb_tbs <= 0)
1334 return NULL;
1335 if (tc_ptr < (unsigned long)code_gen_buffer ||
1336 tc_ptr >= (unsigned long)code_gen_ptr)
1337 return NULL;
1338 /* binary search (cf Knuth) */
1339 m_min = 0;
1340 m_max = nb_tbs - 1;
1341 while (m_min <= m_max) {
1342 m = (m_min + m_max) >> 1;
1343 tb = &tbs[m];
1344 v = (unsigned long)tb->tc_ptr;
1345 if (v == tc_ptr)
1346 return tb;
1347 else if (tc_ptr < v) {
1348 m_max = m - 1;
1349 } else {
1350 m_min = m + 1;
1353 return &tbs[m_max];
1356 static void tb_reset_jump_recursive(TranslationBlock *tb);
1358 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1360 TranslationBlock *tb1, *tb_next, **ptb;
1361 unsigned int n1;
1363 tb1 = tb->jmp_next[n];
1364 if (tb1 != NULL) {
1365 /* find head of list */
1366 for(;;) {
1367 n1 = (long)tb1 & 3;
1368 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1369 if (n1 == 2)
1370 break;
1371 tb1 = tb1->jmp_next[n1];
1373 /* we are now sure now that tb jumps to tb1 */
1374 tb_next = tb1;
1376 /* remove tb from the jmp_first list */
1377 ptb = &tb_next->jmp_first;
1378 for(;;) {
1379 tb1 = *ptb;
1380 n1 = (long)tb1 & 3;
1381 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1382 if (n1 == n && tb1 == tb)
1383 break;
1384 ptb = &tb1->jmp_next[n1];
1386 *ptb = tb->jmp_next[n];
1387 tb->jmp_next[n] = NULL;
1389 /* suppress the jump to next tb in generated code */
1390 tb_reset_jump(tb, n);
1392 /* suppress jumps in the tb on which we could have jumped */
1393 tb_reset_jump_recursive(tb_next);
1397 static void tb_reset_jump_recursive(TranslationBlock *tb)
1399 tb_reset_jump_recursive2(tb, 0);
1400 tb_reset_jump_recursive2(tb, 1);
1403 #if defined(TARGET_HAS_ICE)
1404 #if defined(CONFIG_USER_ONLY)
1405 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1407 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1409 #else
1410 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1412 target_phys_addr_t addr;
1413 target_ulong pd;
1414 ram_addr_t ram_addr;
1415 PhysPageDesc *p;
1417 addr = cpu_get_phys_page_debug(env, pc);
1418 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1419 if (!p) {
1420 pd = IO_MEM_UNASSIGNED;
1421 } else {
1422 pd = p->phys_offset;
1424 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1425 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1427 #endif
1428 #endif /* TARGET_HAS_ICE */
1430 #if defined(CONFIG_USER_ONLY)
1431 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1436 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1437 int flags, CPUWatchpoint **watchpoint)
1439 return -ENOSYS;
1441 #else
1442 /* Add a watchpoint. */
1443 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1444 int flags, CPUWatchpoint **watchpoint)
1446 target_ulong len_mask = ~(len - 1);
1447 CPUWatchpoint *wp;
1449 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1450 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1451 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1452 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1453 return -EINVAL;
1455 wp = g_malloc(sizeof(*wp));
1457 wp->vaddr = addr;
1458 wp->len_mask = len_mask;
1459 wp->flags = flags;
1461 /* keep all GDB-injected watchpoints in front */
1462 if (flags & BP_GDB)
1463 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1464 else
1465 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1467 tlb_flush_page(env, addr);
1469 if (watchpoint)
1470 *watchpoint = wp;
1471 return 0;
1474 /* Remove a specific watchpoint. */
1475 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1476 int flags)
1478 target_ulong len_mask = ~(len - 1);
1479 CPUWatchpoint *wp;
1481 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1482 if (addr == wp->vaddr && len_mask == wp->len_mask
1483 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1484 cpu_watchpoint_remove_by_ref(env, wp);
1485 return 0;
1488 return -ENOENT;
1491 /* Remove a specific watchpoint by reference. */
1492 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1494 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1496 tlb_flush_page(env, watchpoint->vaddr);
1498 g_free(watchpoint);
1501 /* Remove all matching watchpoints. */
1502 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1504 CPUWatchpoint *wp, *next;
1506 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1507 if (wp->flags & mask)
1508 cpu_watchpoint_remove_by_ref(env, wp);
1511 #endif
1513 /* Add a breakpoint. */
1514 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1515 CPUBreakpoint **breakpoint)
1517 #if defined(TARGET_HAS_ICE)
1518 CPUBreakpoint *bp;
1520 bp = g_malloc(sizeof(*bp));
1522 bp->pc = pc;
1523 bp->flags = flags;
1525 /* keep all GDB-injected breakpoints in front */
1526 if (flags & BP_GDB)
1527 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1528 else
1529 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1531 breakpoint_invalidate(env, pc);
1533 if (breakpoint)
1534 *breakpoint = bp;
1535 return 0;
1536 #else
1537 return -ENOSYS;
1538 #endif
1541 /* Remove a specific breakpoint. */
1542 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1544 #if defined(TARGET_HAS_ICE)
1545 CPUBreakpoint *bp;
1547 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1548 if (bp->pc == pc && bp->flags == flags) {
1549 cpu_breakpoint_remove_by_ref(env, bp);
1550 return 0;
1553 return -ENOENT;
1554 #else
1555 return -ENOSYS;
1556 #endif
1559 /* Remove a specific breakpoint by reference. */
1560 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1562 #if defined(TARGET_HAS_ICE)
1563 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1565 breakpoint_invalidate(env, breakpoint->pc);
1567 g_free(breakpoint);
1568 #endif
1571 /* Remove all matching breakpoints. */
1572 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1574 #if defined(TARGET_HAS_ICE)
1575 CPUBreakpoint *bp, *next;
1577 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1578 if (bp->flags & mask)
1579 cpu_breakpoint_remove_by_ref(env, bp);
1581 #endif
1584 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1585 CPU loop after each instruction */
1586 void cpu_single_step(CPUState *env, int enabled)
1588 #if defined(TARGET_HAS_ICE)
1589 if (env->singlestep_enabled != enabled) {
1590 env->singlestep_enabled = enabled;
1591 if (kvm_enabled())
1592 kvm_update_guest_debug(env, 0);
1593 else {
1594 /* must flush all the translated code to avoid inconsistencies */
1595 /* XXX: only flush what is necessary */
1596 tb_flush(env);
1599 #endif
1602 /* enable or disable low levels log */
1603 void cpu_set_log(int log_flags)
1605 loglevel = log_flags;
1606 if (loglevel && !logfile) {
1607 logfile = fopen(logfilename, log_append ? "a" : "w");
1608 if (!logfile) {
1609 perror(logfilename);
1610 _exit(1);
1612 #if !defined(CONFIG_SOFTMMU)
1613 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1615 static char logfile_buf[4096];
1616 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1618 #elif !defined(_WIN32)
1619 /* Win32 doesn't support line-buffering and requires size >= 2 */
1620 setvbuf(logfile, NULL, _IOLBF, 0);
1621 #endif
1622 log_append = 1;
1624 if (!loglevel && logfile) {
1625 fclose(logfile);
1626 logfile = NULL;
1630 void cpu_set_log_filename(const char *filename)
1632 logfilename = strdup(filename);
1633 if (logfile) {
1634 fclose(logfile);
1635 logfile = NULL;
1637 cpu_set_log(loglevel);
1640 static void cpu_unlink_tb(CPUState *env)
1642 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1643 problem and hope the cpu will stop of its own accord. For userspace
1644 emulation this often isn't actually as bad as it sounds. Often
1645 signals are used primarily to interrupt blocking syscalls. */
1646 TranslationBlock *tb;
1647 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1649 spin_lock(&interrupt_lock);
1650 tb = env->current_tb;
1651 /* if the cpu is currently executing code, we must unlink it and
1652 all the potentially executing TB */
1653 if (tb) {
1654 env->current_tb = NULL;
1655 tb_reset_jump_recursive(tb);
1657 spin_unlock(&interrupt_lock);
1660 #ifndef CONFIG_USER_ONLY
1661 /* mask must never be zero, except for A20 change call */
1662 static void tcg_handle_interrupt(CPUState *env, int mask)
1664 int old_mask;
1666 old_mask = env->interrupt_request;
1667 env->interrupt_request |= mask;
1670 * If called from iothread context, wake the target cpu in
1671 * case its halted.
1673 if (!qemu_cpu_is_self(env)) {
1674 qemu_cpu_kick(env);
1675 return;
1678 if (use_icount) {
1679 env->icount_decr.u16.high = 0xffff;
1680 if (!can_do_io(env)
1681 && (mask & ~old_mask) != 0) {
1682 cpu_abort(env, "Raised interrupt while not in I/O function");
1684 } else {
1685 cpu_unlink_tb(env);
1689 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1691 #else /* CONFIG_USER_ONLY */
1693 void cpu_interrupt(CPUState *env, int mask)
1695 env->interrupt_request |= mask;
1696 cpu_unlink_tb(env);
1698 #endif /* CONFIG_USER_ONLY */
1700 void cpu_reset_interrupt(CPUState *env, int mask)
1702 env->interrupt_request &= ~mask;
1705 void cpu_exit(CPUState *env)
1707 env->exit_request = 1;
1708 cpu_unlink_tb(env);
1711 const CPULogItem cpu_log_items[] = {
1712 { CPU_LOG_TB_OUT_ASM, "out_asm",
1713 "show generated host assembly code for each compiled TB" },
1714 { CPU_LOG_TB_IN_ASM, "in_asm",
1715 "show target assembly code for each compiled TB" },
1716 { CPU_LOG_TB_OP, "op",
1717 "show micro ops for each compiled TB" },
1718 { CPU_LOG_TB_OP_OPT, "op_opt",
1719 "show micro ops "
1720 #ifdef TARGET_I386
1721 "before eflags optimization and "
1722 #endif
1723 "after liveness analysis" },
1724 { CPU_LOG_INT, "int",
1725 "show interrupts/exceptions in short format" },
1726 { CPU_LOG_EXEC, "exec",
1727 "show trace before each executed TB (lots of logs)" },
1728 { CPU_LOG_TB_CPU, "cpu",
1729 "show CPU state before block translation" },
1730 #ifdef TARGET_I386
1731 { CPU_LOG_PCALL, "pcall",
1732 "show protected mode far calls/returns/exceptions" },
1733 { CPU_LOG_RESET, "cpu_reset",
1734 "show CPU state before CPU resets" },
1735 #endif
1736 #ifdef DEBUG_IOPORT
1737 { CPU_LOG_IOPORT, "ioport",
1738 "show all i/o ports accesses" },
1739 #endif
1740 { 0, NULL, NULL },
1743 #ifndef CONFIG_USER_ONLY
1744 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1745 = QLIST_HEAD_INITIALIZER(memory_client_list);
1747 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1748 ram_addr_t size,
1749 ram_addr_t phys_offset,
1750 bool log_dirty)
1752 CPUPhysMemoryClient *client;
1753 QLIST_FOREACH(client, &memory_client_list, list) {
1754 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1758 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1759 target_phys_addr_t end)
1761 CPUPhysMemoryClient *client;
1762 QLIST_FOREACH(client, &memory_client_list, list) {
1763 int r = client->sync_dirty_bitmap(client, start, end);
1764 if (r < 0)
1765 return r;
1767 return 0;
1770 static int cpu_notify_migration_log(int enable)
1772 CPUPhysMemoryClient *client;
1773 QLIST_FOREACH(client, &memory_client_list, list) {
1774 int r = client->migration_log(client, enable);
1775 if (r < 0)
1776 return r;
1778 return 0;
1781 struct last_map {
1782 target_phys_addr_t start_addr;
1783 ram_addr_t size;
1784 ram_addr_t phys_offset;
1787 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1788 * address. Each intermediate table provides the next L2_BITs of guest
1789 * physical address space. The number of levels vary based on host and
1790 * guest configuration, making it efficient to build the final guest
1791 * physical address by seeding the L1 offset and shifting and adding in
1792 * each L2 offset as we recurse through them. */
1793 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1794 void **lp, target_phys_addr_t addr,
1795 struct last_map *map)
1797 int i;
1799 if (*lp == NULL) {
1800 return;
1802 if (level == 0) {
1803 PhysPageDesc *pd = *lp;
1804 addr <<= L2_BITS + TARGET_PAGE_BITS;
1805 for (i = 0; i < L2_SIZE; ++i) {
1806 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1807 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1809 if (map->size &&
1810 start_addr == map->start_addr + map->size &&
1811 pd[i].phys_offset == map->phys_offset + map->size) {
1813 map->size += TARGET_PAGE_SIZE;
1814 continue;
1815 } else if (map->size) {
1816 client->set_memory(client, map->start_addr,
1817 map->size, map->phys_offset, false);
1820 map->start_addr = start_addr;
1821 map->size = TARGET_PAGE_SIZE;
1822 map->phys_offset = pd[i].phys_offset;
1825 } else {
1826 void **pp = *lp;
1827 for (i = 0; i < L2_SIZE; ++i) {
1828 phys_page_for_each_1(client, level - 1, pp + i,
1829 (addr << L2_BITS) | i, map);
1834 static void phys_page_for_each(CPUPhysMemoryClient *client)
1836 int i;
1837 struct last_map map = { };
1839 for (i = 0; i < P_L1_SIZE; ++i) {
1840 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1841 l1_phys_map + i, i, &map);
1843 if (map.size) {
1844 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1845 false);
1849 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1851 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1852 phys_page_for_each(client);
1855 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1857 QLIST_REMOVE(client, list);
1859 #endif
1861 static int cmp1(const char *s1, int n, const char *s2)
1863 if (strlen(s2) != n)
1864 return 0;
1865 return memcmp(s1, s2, n) == 0;
1868 /* takes a comma separated list of log masks. Return 0 if error. */
1869 int cpu_str_to_log_mask(const char *str)
1871 const CPULogItem *item;
1872 int mask;
1873 const char *p, *p1;
1875 p = str;
1876 mask = 0;
1877 for(;;) {
1878 p1 = strchr(p, ',');
1879 if (!p1)
1880 p1 = p + strlen(p);
1881 if(cmp1(p,p1-p,"all")) {
1882 for(item = cpu_log_items; item->mask != 0; item++) {
1883 mask |= item->mask;
1885 } else {
1886 for(item = cpu_log_items; item->mask != 0; item++) {
1887 if (cmp1(p, p1 - p, item->name))
1888 goto found;
1890 return 0;
1892 found:
1893 mask |= item->mask;
1894 if (*p1 != ',')
1895 break;
1896 p = p1 + 1;
1898 return mask;
1901 void cpu_abort(CPUState *env, const char *fmt, ...)
1903 va_list ap;
1904 va_list ap2;
1906 va_start(ap, fmt);
1907 va_copy(ap2, ap);
1908 fprintf(stderr, "qemu: fatal: ");
1909 vfprintf(stderr, fmt, ap);
1910 fprintf(stderr, "\n");
1911 #ifdef TARGET_I386
1912 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1913 #else
1914 cpu_dump_state(env, stderr, fprintf, 0);
1915 #endif
1916 if (qemu_log_enabled()) {
1917 qemu_log("qemu: fatal: ");
1918 qemu_log_vprintf(fmt, ap2);
1919 qemu_log("\n");
1920 #ifdef TARGET_I386
1921 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1922 #else
1923 log_cpu_state(env, 0);
1924 #endif
1925 qemu_log_flush();
1926 qemu_log_close();
1928 va_end(ap2);
1929 va_end(ap);
1930 #if defined(CONFIG_USER_ONLY)
1932 struct sigaction act;
1933 sigfillset(&act.sa_mask);
1934 act.sa_handler = SIG_DFL;
1935 sigaction(SIGABRT, &act, NULL);
1937 #endif
1938 abort();
1941 CPUState *cpu_copy(CPUState *env)
1943 CPUState *new_env = cpu_init(env->cpu_model_str);
1944 CPUState *next_cpu = new_env->next_cpu;
1945 int cpu_index = new_env->cpu_index;
1946 #if defined(TARGET_HAS_ICE)
1947 CPUBreakpoint *bp;
1948 CPUWatchpoint *wp;
1949 #endif
1951 memcpy(new_env, env, sizeof(CPUState));
1953 /* Preserve chaining and index. */
1954 new_env->next_cpu = next_cpu;
1955 new_env->cpu_index = cpu_index;
1957 /* Clone all break/watchpoints.
1958 Note: Once we support ptrace with hw-debug register access, make sure
1959 BP_CPU break/watchpoints are handled correctly on clone. */
1960 QTAILQ_INIT(&env->breakpoints);
1961 QTAILQ_INIT(&env->watchpoints);
1962 #if defined(TARGET_HAS_ICE)
1963 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1964 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1966 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1967 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1968 wp->flags, NULL);
1970 #endif
1972 return new_env;
1975 #if !defined(CONFIG_USER_ONLY)
1977 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1979 unsigned int i;
1981 /* Discard jump cache entries for any tb which might potentially
1982 overlap the flushed page. */
1983 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1984 memset (&env->tb_jmp_cache[i], 0,
1985 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1987 i = tb_jmp_cache_hash_page(addr);
1988 memset (&env->tb_jmp_cache[i], 0,
1989 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1992 static CPUTLBEntry s_cputlb_empty_entry = {
1993 .addr_read = -1,
1994 .addr_write = -1,
1995 .addr_code = -1,
1996 .addend = -1,
1999 /* NOTE: if flush_global is true, also flush global entries (not
2000 implemented yet) */
2001 void tlb_flush(CPUState *env, int flush_global)
2003 int i;
2005 #if defined(DEBUG_TLB)
2006 printf("tlb_flush:\n");
2007 #endif
2008 /* must reset current TB so that interrupts cannot modify the
2009 links while we are modifying them */
2010 env->current_tb = NULL;
2012 for(i = 0; i < CPU_TLB_SIZE; i++) {
2013 int mmu_idx;
2014 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2015 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2019 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2021 env->tlb_flush_addr = -1;
2022 env->tlb_flush_mask = 0;
2023 tlb_flush_count++;
2026 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2028 if (addr == (tlb_entry->addr_read &
2029 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2030 addr == (tlb_entry->addr_write &
2031 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2032 addr == (tlb_entry->addr_code &
2033 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2034 *tlb_entry = s_cputlb_empty_entry;
2038 void tlb_flush_page(CPUState *env, target_ulong addr)
2040 int i;
2041 int mmu_idx;
2043 #if defined(DEBUG_TLB)
2044 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2045 #endif
2046 /* Check if we need to flush due to large pages. */
2047 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2048 #if defined(DEBUG_TLB)
2049 printf("tlb_flush_page: forced full flush ("
2050 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2051 env->tlb_flush_addr, env->tlb_flush_mask);
2052 #endif
2053 tlb_flush(env, 1);
2054 return;
2056 /* must reset current TB so that interrupts cannot modify the
2057 links while we are modifying them */
2058 env->current_tb = NULL;
2060 addr &= TARGET_PAGE_MASK;
2061 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2062 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2063 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2065 tlb_flush_jmp_cache(env, addr);
2068 /* update the TLBs so that writes to code in the virtual page 'addr'
2069 can be detected */
2070 static void tlb_protect_code(ram_addr_t ram_addr)
2072 cpu_physical_memory_reset_dirty(ram_addr,
2073 ram_addr + TARGET_PAGE_SIZE,
2074 CODE_DIRTY_FLAG);
2077 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2078 tested for self modifying code */
2079 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2080 target_ulong vaddr)
2082 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2085 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2086 unsigned long start, unsigned long length)
2088 unsigned long addr;
2089 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2090 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2091 if ((addr - start) < length) {
2092 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2097 /* Note: start and end must be within the same ram block. */
2098 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2099 int dirty_flags)
2101 CPUState *env;
2102 unsigned long length, start1;
2103 int i;
2105 start &= TARGET_PAGE_MASK;
2106 end = TARGET_PAGE_ALIGN(end);
2108 length = end - start;
2109 if (length == 0)
2110 return;
2111 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2113 /* we modify the TLB cache so that the dirty bit will be set again
2114 when accessing the range */
2115 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2116 /* Check that we don't span multiple blocks - this breaks the
2117 address comparisons below. */
2118 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2119 != (end - 1) - start) {
2120 abort();
2123 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2124 int mmu_idx;
2125 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2126 for(i = 0; i < CPU_TLB_SIZE; i++)
2127 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2128 start1, length);
2133 int cpu_physical_memory_set_dirty_tracking(int enable)
2135 int ret = 0;
2136 in_migration = enable;
2137 ret = cpu_notify_migration_log(!!enable);
2138 return ret;
2141 int cpu_physical_memory_get_dirty_tracking(void)
2143 return in_migration;
2146 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2147 target_phys_addr_t end_addr)
2149 int ret;
2151 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2152 return ret;
2155 int cpu_physical_log_start(target_phys_addr_t start_addr,
2156 ram_addr_t size)
2158 CPUPhysMemoryClient *client;
2159 QLIST_FOREACH(client, &memory_client_list, list) {
2160 if (client->log_start) {
2161 int r = client->log_start(client, start_addr, size);
2162 if (r < 0) {
2163 return r;
2167 return 0;
2170 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2171 ram_addr_t size)
2173 CPUPhysMemoryClient *client;
2174 QLIST_FOREACH(client, &memory_client_list, list) {
2175 if (client->log_stop) {
2176 int r = client->log_stop(client, start_addr, size);
2177 if (r < 0) {
2178 return r;
2182 return 0;
2185 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2187 ram_addr_t ram_addr;
2188 void *p;
2190 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2191 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2192 + tlb_entry->addend);
2193 ram_addr = qemu_ram_addr_from_host_nofail(p);
2194 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2195 tlb_entry->addr_write |= TLB_NOTDIRTY;
2200 /* update the TLB according to the current state of the dirty bits */
2201 void cpu_tlb_update_dirty(CPUState *env)
2203 int i;
2204 int mmu_idx;
2205 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2206 for(i = 0; i < CPU_TLB_SIZE; i++)
2207 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2211 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2213 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2214 tlb_entry->addr_write = vaddr;
2217 /* update the TLB corresponding to virtual page vaddr
2218 so that it is no longer dirty */
2219 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2221 int i;
2222 int mmu_idx;
2224 vaddr &= TARGET_PAGE_MASK;
2225 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2226 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2227 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2230 /* Our TLB does not support large pages, so remember the area covered by
2231 large pages and trigger a full TLB flush if these are invalidated. */
2232 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2233 target_ulong size)
2235 target_ulong mask = ~(size - 1);
2237 if (env->tlb_flush_addr == (target_ulong)-1) {
2238 env->tlb_flush_addr = vaddr & mask;
2239 env->tlb_flush_mask = mask;
2240 return;
2242 /* Extend the existing region to include the new page.
2243 This is a compromise between unnecessary flushes and the cost
2244 of maintaining a full variable size TLB. */
2245 mask &= env->tlb_flush_mask;
2246 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2247 mask <<= 1;
2249 env->tlb_flush_addr &= mask;
2250 env->tlb_flush_mask = mask;
2253 /* Add a new TLB entry. At most one entry for a given virtual address
2254 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2255 supplied size is only used by tlb_flush_page. */
2256 void tlb_set_page(CPUState *env, target_ulong vaddr,
2257 target_phys_addr_t paddr, int prot,
2258 int mmu_idx, target_ulong size)
2260 PhysPageDesc *p;
2261 unsigned long pd;
2262 unsigned int index;
2263 target_ulong address;
2264 target_ulong code_address;
2265 unsigned long addend;
2266 CPUTLBEntry *te;
2267 CPUWatchpoint *wp;
2268 target_phys_addr_t iotlb;
2270 assert(size >= TARGET_PAGE_SIZE);
2271 if (size != TARGET_PAGE_SIZE) {
2272 tlb_add_large_page(env, vaddr, size);
2274 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2275 if (!p) {
2276 pd = IO_MEM_UNASSIGNED;
2277 } else {
2278 pd = p->phys_offset;
2280 #if defined(DEBUG_TLB)
2281 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2282 " prot=%x idx=%d pd=0x%08lx\n",
2283 vaddr, paddr, prot, mmu_idx, pd);
2284 #endif
2286 address = vaddr;
2287 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2288 /* IO memory case (romd handled later) */
2289 address |= TLB_MMIO;
2291 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2292 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2293 /* Normal RAM. */
2294 iotlb = pd & TARGET_PAGE_MASK;
2295 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2296 iotlb |= IO_MEM_NOTDIRTY;
2297 else
2298 iotlb |= IO_MEM_ROM;
2299 } else {
2300 /* IO handlers are currently passed a physical address.
2301 It would be nice to pass an offset from the base address
2302 of that region. This would avoid having to special case RAM,
2303 and avoid full address decoding in every device.
2304 We can't use the high bits of pd for this because
2305 IO_MEM_ROMD uses these as a ram address. */
2306 iotlb = (pd & ~TARGET_PAGE_MASK);
2307 if (p) {
2308 iotlb += p->region_offset;
2309 } else {
2310 iotlb += paddr;
2314 code_address = address;
2315 /* Make accesses to pages with watchpoints go via the
2316 watchpoint trap routines. */
2317 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2318 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2319 /* Avoid trapping reads of pages with a write breakpoint. */
2320 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2321 iotlb = io_mem_watch + paddr;
2322 address |= TLB_MMIO;
2323 break;
2328 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2329 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2330 te = &env->tlb_table[mmu_idx][index];
2331 te->addend = addend - vaddr;
2332 if (prot & PAGE_READ) {
2333 te->addr_read = address;
2334 } else {
2335 te->addr_read = -1;
2338 if (prot & PAGE_EXEC) {
2339 te->addr_code = code_address;
2340 } else {
2341 te->addr_code = -1;
2343 if (prot & PAGE_WRITE) {
2344 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2345 (pd & IO_MEM_ROMD)) {
2346 /* Write access calls the I/O callback. */
2347 te->addr_write = address | TLB_MMIO;
2348 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2349 !cpu_physical_memory_is_dirty(pd)) {
2350 te->addr_write = address | TLB_NOTDIRTY;
2351 } else {
2352 te->addr_write = address;
2354 } else {
2355 te->addr_write = -1;
2359 #else
2361 void tlb_flush(CPUState *env, int flush_global)
2365 void tlb_flush_page(CPUState *env, target_ulong addr)
2370 * Walks guest process memory "regions" one by one
2371 * and calls callback function 'fn' for each region.
2374 struct walk_memory_regions_data
2376 walk_memory_regions_fn fn;
2377 void *priv;
2378 unsigned long start;
2379 int prot;
2382 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2383 abi_ulong end, int new_prot)
2385 if (data->start != -1ul) {
2386 int rc = data->fn(data->priv, data->start, end, data->prot);
2387 if (rc != 0) {
2388 return rc;
2392 data->start = (new_prot ? end : -1ul);
2393 data->prot = new_prot;
2395 return 0;
2398 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2399 abi_ulong base, int level, void **lp)
2401 abi_ulong pa;
2402 int i, rc;
2404 if (*lp == NULL) {
2405 return walk_memory_regions_end(data, base, 0);
2408 if (level == 0) {
2409 PageDesc *pd = *lp;
2410 for (i = 0; i < L2_SIZE; ++i) {
2411 int prot = pd[i].flags;
2413 pa = base | (i << TARGET_PAGE_BITS);
2414 if (prot != data->prot) {
2415 rc = walk_memory_regions_end(data, pa, prot);
2416 if (rc != 0) {
2417 return rc;
2421 } else {
2422 void **pp = *lp;
2423 for (i = 0; i < L2_SIZE; ++i) {
2424 pa = base | ((abi_ulong)i <<
2425 (TARGET_PAGE_BITS + L2_BITS * level));
2426 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2427 if (rc != 0) {
2428 return rc;
2433 return 0;
2436 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2438 struct walk_memory_regions_data data;
2439 unsigned long i;
2441 data.fn = fn;
2442 data.priv = priv;
2443 data.start = -1ul;
2444 data.prot = 0;
2446 for (i = 0; i < V_L1_SIZE; i++) {
2447 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2448 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2449 if (rc != 0) {
2450 return rc;
2454 return walk_memory_regions_end(&data, 0, 0);
2457 static int dump_region(void *priv, abi_ulong start,
2458 abi_ulong end, unsigned long prot)
2460 FILE *f = (FILE *)priv;
2462 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2463 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2464 start, end, end - start,
2465 ((prot & PAGE_READ) ? 'r' : '-'),
2466 ((prot & PAGE_WRITE) ? 'w' : '-'),
2467 ((prot & PAGE_EXEC) ? 'x' : '-'));
2469 return (0);
2472 /* dump memory mappings */
2473 void page_dump(FILE *f)
2475 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2476 "start", "end", "size", "prot");
2477 walk_memory_regions(f, dump_region);
2480 int page_get_flags(target_ulong address)
2482 PageDesc *p;
2484 p = page_find(address >> TARGET_PAGE_BITS);
2485 if (!p)
2486 return 0;
2487 return p->flags;
2490 /* Modify the flags of a page and invalidate the code if necessary.
2491 The flag PAGE_WRITE_ORG is positioned automatically depending
2492 on PAGE_WRITE. The mmap_lock should already be held. */
2493 void page_set_flags(target_ulong start, target_ulong end, int flags)
2495 target_ulong addr, len;
2497 /* This function should never be called with addresses outside the
2498 guest address space. If this assert fires, it probably indicates
2499 a missing call to h2g_valid. */
2500 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2501 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2502 #endif
2503 assert(start < end);
2505 start = start & TARGET_PAGE_MASK;
2506 end = TARGET_PAGE_ALIGN(end);
2508 if (flags & PAGE_WRITE) {
2509 flags |= PAGE_WRITE_ORG;
2512 for (addr = start, len = end - start;
2513 len != 0;
2514 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2515 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2517 /* If the write protection bit is set, then we invalidate
2518 the code inside. */
2519 if (!(p->flags & PAGE_WRITE) &&
2520 (flags & PAGE_WRITE) &&
2521 p->first_tb) {
2522 tb_invalidate_phys_page(addr, 0, NULL);
2524 p->flags = flags;
2528 int page_check_range(target_ulong start, target_ulong len, int flags)
2530 PageDesc *p;
2531 target_ulong end;
2532 target_ulong addr;
2534 /* This function should never be called with addresses outside the
2535 guest address space. If this assert fires, it probably indicates
2536 a missing call to h2g_valid. */
2537 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2538 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2539 #endif
2541 if (len == 0) {
2542 return 0;
2544 if (start + len - 1 < start) {
2545 /* We've wrapped around. */
2546 return -1;
2549 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2550 start = start & TARGET_PAGE_MASK;
2552 for (addr = start, len = end - start;
2553 len != 0;
2554 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2555 p = page_find(addr >> TARGET_PAGE_BITS);
2556 if( !p )
2557 return -1;
2558 if( !(p->flags & PAGE_VALID) )
2559 return -1;
2561 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2562 return -1;
2563 if (flags & PAGE_WRITE) {
2564 if (!(p->flags & PAGE_WRITE_ORG))
2565 return -1;
2566 /* unprotect the page if it was put read-only because it
2567 contains translated code */
2568 if (!(p->flags & PAGE_WRITE)) {
2569 if (!page_unprotect(addr, 0, NULL))
2570 return -1;
2572 return 0;
2575 return 0;
2578 /* called from signal handler: invalidate the code and unprotect the
2579 page. Return TRUE if the fault was successfully handled. */
2580 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2582 unsigned int prot;
2583 PageDesc *p;
2584 target_ulong host_start, host_end, addr;
2586 /* Technically this isn't safe inside a signal handler. However we
2587 know this only ever happens in a synchronous SEGV handler, so in
2588 practice it seems to be ok. */
2589 mmap_lock();
2591 p = page_find(address >> TARGET_PAGE_BITS);
2592 if (!p) {
2593 mmap_unlock();
2594 return 0;
2597 /* if the page was really writable, then we change its
2598 protection back to writable */
2599 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2600 host_start = address & qemu_host_page_mask;
2601 host_end = host_start + qemu_host_page_size;
2603 prot = 0;
2604 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2605 p = page_find(addr >> TARGET_PAGE_BITS);
2606 p->flags |= PAGE_WRITE;
2607 prot |= p->flags;
2609 /* and since the content will be modified, we must invalidate
2610 the corresponding translated code. */
2611 tb_invalidate_phys_page(addr, pc, puc);
2612 #ifdef DEBUG_TB_CHECK
2613 tb_invalidate_check(addr);
2614 #endif
2616 mprotect((void *)g2h(host_start), qemu_host_page_size,
2617 prot & PAGE_BITS);
2619 mmap_unlock();
2620 return 1;
2622 mmap_unlock();
2623 return 0;
2626 static inline void tlb_set_dirty(CPUState *env,
2627 unsigned long addr, target_ulong vaddr)
2630 #endif /* defined(CONFIG_USER_ONLY) */
2632 #if !defined(CONFIG_USER_ONLY)
2634 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2635 typedef struct subpage_t {
2636 target_phys_addr_t base;
2637 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2638 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2639 } subpage_t;
2641 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2642 ram_addr_t memory, ram_addr_t region_offset);
2643 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2644 ram_addr_t orig_memory,
2645 ram_addr_t region_offset);
2646 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2647 need_subpage) \
2648 do { \
2649 if (addr > start_addr) \
2650 start_addr2 = 0; \
2651 else { \
2652 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2653 if (start_addr2 > 0) \
2654 need_subpage = 1; \
2657 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2658 end_addr2 = TARGET_PAGE_SIZE - 1; \
2659 else { \
2660 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2661 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2662 need_subpage = 1; \
2664 } while (0)
2666 /* register physical memory.
2667 For RAM, 'size' must be a multiple of the target page size.
2668 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2669 io memory page. The address used when calling the IO function is
2670 the offset from the start of the region, plus region_offset. Both
2671 start_addr and region_offset are rounded down to a page boundary
2672 before calculating this offset. This should not be a problem unless
2673 the low bits of start_addr and region_offset differ. */
2674 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2675 ram_addr_t size,
2676 ram_addr_t phys_offset,
2677 ram_addr_t region_offset,
2678 bool log_dirty)
2680 target_phys_addr_t addr, end_addr;
2681 PhysPageDesc *p;
2682 CPUState *env;
2683 ram_addr_t orig_size = size;
2684 subpage_t *subpage;
2686 assert(size);
2687 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2689 if (phys_offset == IO_MEM_UNASSIGNED) {
2690 region_offset = start_addr;
2692 region_offset &= TARGET_PAGE_MASK;
2693 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2694 end_addr = start_addr + (target_phys_addr_t)size;
2696 addr = start_addr;
2697 do {
2698 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2699 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2700 ram_addr_t orig_memory = p->phys_offset;
2701 target_phys_addr_t start_addr2, end_addr2;
2702 int need_subpage = 0;
2704 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2705 need_subpage);
2706 if (need_subpage) {
2707 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2708 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2709 &p->phys_offset, orig_memory,
2710 p->region_offset);
2711 } else {
2712 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2713 >> IO_MEM_SHIFT];
2715 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2716 region_offset);
2717 p->region_offset = 0;
2718 } else {
2719 p->phys_offset = phys_offset;
2720 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2721 (phys_offset & IO_MEM_ROMD))
2722 phys_offset += TARGET_PAGE_SIZE;
2724 } else {
2725 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2726 p->phys_offset = phys_offset;
2727 p->region_offset = region_offset;
2728 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2729 (phys_offset & IO_MEM_ROMD)) {
2730 phys_offset += TARGET_PAGE_SIZE;
2731 } else {
2732 target_phys_addr_t start_addr2, end_addr2;
2733 int need_subpage = 0;
2735 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2736 end_addr2, need_subpage);
2738 if (need_subpage) {
2739 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2740 &p->phys_offset, IO_MEM_UNASSIGNED,
2741 addr & TARGET_PAGE_MASK);
2742 subpage_register(subpage, start_addr2, end_addr2,
2743 phys_offset, region_offset);
2744 p->region_offset = 0;
2748 region_offset += TARGET_PAGE_SIZE;
2749 addr += TARGET_PAGE_SIZE;
2750 } while (addr != end_addr);
2752 /* since each CPU stores ram addresses in its TLB cache, we must
2753 reset the modified entries */
2754 /* XXX: slow ! */
2755 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2756 tlb_flush(env, 1);
2760 /* XXX: temporary until new memory mapping API */
2761 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2763 PhysPageDesc *p;
2765 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2766 if (!p)
2767 return IO_MEM_UNASSIGNED;
2768 return p->phys_offset;
2771 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2773 if (kvm_enabled())
2774 kvm_coalesce_mmio_region(addr, size);
2777 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2779 if (kvm_enabled())
2780 kvm_uncoalesce_mmio_region(addr, size);
2783 void qemu_flush_coalesced_mmio_buffer(void)
2785 if (kvm_enabled())
2786 kvm_flush_coalesced_mmio_buffer();
2789 #if defined(__linux__) && !defined(TARGET_S390X)
2791 #include <sys/vfs.h>
2793 #define HUGETLBFS_MAGIC 0x958458f6
2795 static long gethugepagesize(const char *path)
2797 struct statfs fs;
2798 int ret;
2800 do {
2801 ret = statfs(path, &fs);
2802 } while (ret != 0 && errno == EINTR);
2804 if (ret != 0) {
2805 perror(path);
2806 return 0;
2809 if (fs.f_type != HUGETLBFS_MAGIC)
2810 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2812 return fs.f_bsize;
2815 static void *file_ram_alloc(RAMBlock *block,
2816 ram_addr_t memory,
2817 const char *path)
2819 char *filename;
2820 void *area;
2821 int fd;
2822 #ifdef MAP_POPULATE
2823 int flags;
2824 #endif
2825 unsigned long hpagesize;
2827 hpagesize = gethugepagesize(path);
2828 if (!hpagesize) {
2829 return NULL;
2832 if (memory < hpagesize) {
2833 return NULL;
2836 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2837 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2838 return NULL;
2841 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2842 return NULL;
2845 fd = mkstemp(filename);
2846 if (fd < 0) {
2847 perror("unable to create backing store for hugepages");
2848 free(filename);
2849 return NULL;
2851 unlink(filename);
2852 free(filename);
2854 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2857 * ftruncate is not supported by hugetlbfs in older
2858 * hosts, so don't bother bailing out on errors.
2859 * If anything goes wrong with it under other filesystems,
2860 * mmap will fail.
2862 if (ftruncate(fd, memory))
2863 perror("ftruncate");
2865 #ifdef MAP_POPULATE
2866 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2867 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2868 * to sidestep this quirk.
2870 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2871 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2872 #else
2873 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2874 #endif
2875 if (area == MAP_FAILED) {
2876 perror("file_ram_alloc: can't mmap RAM pages");
2877 close(fd);
2878 return (NULL);
2880 block->fd = fd;
2881 return area;
2883 #endif
2885 static ram_addr_t find_ram_offset(ram_addr_t size)
2887 RAMBlock *block, *next_block;
2888 ram_addr_t offset = 0, mingap = RAM_ADDR_MAX;
2890 if (QLIST_EMPTY(&ram_list.blocks))
2891 return 0;
2893 QLIST_FOREACH(block, &ram_list.blocks, next) {
2894 ram_addr_t end, next = RAM_ADDR_MAX;
2896 end = block->offset + block->length;
2898 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2899 if (next_block->offset >= end) {
2900 next = MIN(next, next_block->offset);
2903 if (next - end >= size && next - end < mingap) {
2904 offset = end;
2905 mingap = next - end;
2908 return offset;
2911 static ram_addr_t last_ram_offset(void)
2913 RAMBlock *block;
2914 ram_addr_t last = 0;
2916 QLIST_FOREACH(block, &ram_list.blocks, next)
2917 last = MAX(last, block->offset + block->length);
2919 return last;
2922 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2923 ram_addr_t size, void *host)
2925 RAMBlock *new_block, *block;
2927 size = TARGET_PAGE_ALIGN(size);
2928 new_block = g_malloc0(sizeof(*new_block));
2930 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2931 char *id = dev->parent_bus->info->get_dev_path(dev);
2932 if (id) {
2933 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2934 g_free(id);
2937 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2939 QLIST_FOREACH(block, &ram_list.blocks, next) {
2940 if (!strcmp(block->idstr, new_block->idstr)) {
2941 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2942 new_block->idstr);
2943 abort();
2947 new_block->offset = find_ram_offset(size);
2948 if (host) {
2949 new_block->host = host;
2950 new_block->flags |= RAM_PREALLOC_MASK;
2951 } else {
2952 if (mem_path) {
2953 #if defined (__linux__) && !defined(TARGET_S390X)
2954 new_block->host = file_ram_alloc(new_block, size, mem_path);
2955 if (!new_block->host) {
2956 new_block->host = qemu_vmalloc(size);
2957 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2959 #else
2960 fprintf(stderr, "-mem-path option unsupported\n");
2961 exit(1);
2962 #endif
2963 } else {
2964 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2965 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2966 an system defined value, which is at least 256GB. Larger systems
2967 have larger values. We put the guest between the end of data
2968 segment (system break) and this value. We use 32GB as a base to
2969 have enough room for the system break to grow. */
2970 new_block->host = mmap((void*)0x800000000, size,
2971 PROT_EXEC|PROT_READ|PROT_WRITE,
2972 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2973 if (new_block->host == MAP_FAILED) {
2974 fprintf(stderr, "Allocating RAM failed\n");
2975 abort();
2977 #else
2978 if (xen_enabled()) {
2979 xen_ram_alloc(new_block->offset, size);
2980 } else {
2981 new_block->host = qemu_vmalloc(size);
2983 #endif
2984 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2987 new_block->length = size;
2989 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2991 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2992 last_ram_offset() >> TARGET_PAGE_BITS);
2993 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2994 0xff, size >> TARGET_PAGE_BITS);
2996 if (kvm_enabled())
2997 kvm_setup_guest_memory(new_block->host, size);
2999 return new_block->offset;
3002 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
3004 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
3007 void qemu_ram_free_from_ptr(ram_addr_t addr)
3009 RAMBlock *block;
3011 QLIST_FOREACH(block, &ram_list.blocks, next) {
3012 if (addr == block->offset) {
3013 QLIST_REMOVE(block, next);
3014 g_free(block);
3015 return;
3020 void qemu_ram_free(ram_addr_t addr)
3022 RAMBlock *block;
3024 QLIST_FOREACH(block, &ram_list.blocks, next) {
3025 if (addr == block->offset) {
3026 QLIST_REMOVE(block, next);
3027 if (block->flags & RAM_PREALLOC_MASK) {
3029 } else if (mem_path) {
3030 #if defined (__linux__) && !defined(TARGET_S390X)
3031 if (block->fd) {
3032 munmap(block->host, block->length);
3033 close(block->fd);
3034 } else {
3035 qemu_vfree(block->host);
3037 #else
3038 abort();
3039 #endif
3040 } else {
3041 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3042 munmap(block->host, block->length);
3043 #else
3044 if (xen_enabled()) {
3045 xen_invalidate_map_cache_entry(block->host);
3046 } else {
3047 qemu_vfree(block->host);
3049 #endif
3051 g_free(block);
3052 return;
3058 #ifndef _WIN32
3059 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3061 RAMBlock *block;
3062 ram_addr_t offset;
3063 int flags;
3064 void *area, *vaddr;
3066 QLIST_FOREACH(block, &ram_list.blocks, next) {
3067 offset = addr - block->offset;
3068 if (offset < block->length) {
3069 vaddr = block->host + offset;
3070 if (block->flags & RAM_PREALLOC_MASK) {
3072 } else {
3073 flags = MAP_FIXED;
3074 munmap(vaddr, length);
3075 if (mem_path) {
3076 #if defined(__linux__) && !defined(TARGET_S390X)
3077 if (block->fd) {
3078 #ifdef MAP_POPULATE
3079 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3080 MAP_PRIVATE;
3081 #else
3082 flags |= MAP_PRIVATE;
3083 #endif
3084 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3085 flags, block->fd, offset);
3086 } else {
3087 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3088 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3089 flags, -1, 0);
3091 #else
3092 abort();
3093 #endif
3094 } else {
3095 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3096 flags |= MAP_SHARED | MAP_ANONYMOUS;
3097 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3098 flags, -1, 0);
3099 #else
3100 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3101 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3102 flags, -1, 0);
3103 #endif
3105 if (area != vaddr) {
3106 fprintf(stderr, "Could not remap addr: "
3107 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3108 length, addr);
3109 exit(1);
3111 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3113 return;
3117 #endif /* !_WIN32 */
3119 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3120 With the exception of the softmmu code in this file, this should
3121 only be used for local memory (e.g. video ram) that the device owns,
3122 and knows it isn't going to access beyond the end of the block.
3124 It should not be used for general purpose DMA.
3125 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3127 void *qemu_get_ram_ptr(ram_addr_t addr)
3129 RAMBlock *block;
3131 QLIST_FOREACH(block, &ram_list.blocks, next) {
3132 if (addr - block->offset < block->length) {
3133 /* Move this entry to to start of the list. */
3134 if (block != QLIST_FIRST(&ram_list.blocks)) {
3135 QLIST_REMOVE(block, next);
3136 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3138 if (xen_enabled()) {
3139 /* We need to check if the requested address is in the RAM
3140 * because we don't want to map the entire memory in QEMU.
3141 * In that case just map until the end of the page.
3143 if (block->offset == 0) {
3144 return xen_map_cache(addr, 0, 0);
3145 } else if (block->host == NULL) {
3146 block->host =
3147 xen_map_cache(block->offset, block->length, 1);
3150 return block->host + (addr - block->offset);
3154 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3155 abort();
3157 return NULL;
3160 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3161 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3163 void *qemu_safe_ram_ptr(ram_addr_t addr)
3165 RAMBlock *block;
3167 QLIST_FOREACH(block, &ram_list.blocks, next) {
3168 if (addr - block->offset < block->length) {
3169 if (xen_enabled()) {
3170 /* We need to check if the requested address is in the RAM
3171 * because we don't want to map the entire memory in QEMU.
3172 * In that case just map until the end of the page.
3174 if (block->offset == 0) {
3175 return xen_map_cache(addr, 0, 0);
3176 } else if (block->host == NULL) {
3177 block->host =
3178 xen_map_cache(block->offset, block->length, 1);
3181 return block->host + (addr - block->offset);
3185 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3186 abort();
3188 return NULL;
3191 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3192 * but takes a size argument */
3193 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3195 if (*size == 0) {
3196 return NULL;
3198 if (xen_enabled()) {
3199 return xen_map_cache(addr, *size, 1);
3200 } else {
3201 RAMBlock *block;
3203 QLIST_FOREACH(block, &ram_list.blocks, next) {
3204 if (addr - block->offset < block->length) {
3205 if (addr - block->offset + *size > block->length)
3206 *size = block->length - addr + block->offset;
3207 return block->host + (addr - block->offset);
3211 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3212 abort();
3216 void qemu_put_ram_ptr(void *addr)
3218 trace_qemu_put_ram_ptr(addr);
3221 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3223 RAMBlock *block;
3224 uint8_t *host = ptr;
3226 if (xen_enabled()) {
3227 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3228 return 0;
3231 QLIST_FOREACH(block, &ram_list.blocks, next) {
3232 /* This case append when the block is not mapped. */
3233 if (block->host == NULL) {
3234 continue;
3236 if (host - block->host < block->length) {
3237 *ram_addr = block->offset + (host - block->host);
3238 return 0;
3242 return -1;
3245 /* Some of the softmmu routines need to translate from a host pointer
3246 (typically a TLB entry) back to a ram offset. */
3247 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3249 ram_addr_t ram_addr;
3251 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3252 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3253 abort();
3255 return ram_addr;
3258 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3260 #ifdef DEBUG_UNASSIGNED
3261 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3262 #endif
3263 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3264 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3265 #endif
3266 return 0;
3269 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3271 #ifdef DEBUG_UNASSIGNED
3272 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3273 #endif
3274 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3275 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3276 #endif
3277 return 0;
3280 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3282 #ifdef DEBUG_UNASSIGNED
3283 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3284 #endif
3285 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3286 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3287 #endif
3288 return 0;
3291 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3293 #ifdef DEBUG_UNASSIGNED
3294 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3295 #endif
3296 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3297 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3298 #endif
3301 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3303 #ifdef DEBUG_UNASSIGNED
3304 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3305 #endif
3306 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3307 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3308 #endif
3311 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3313 #ifdef DEBUG_UNASSIGNED
3314 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3315 #endif
3316 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3317 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3318 #endif
3321 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3322 unassigned_mem_readb,
3323 unassigned_mem_readw,
3324 unassigned_mem_readl,
3327 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3328 unassigned_mem_writeb,
3329 unassigned_mem_writew,
3330 unassigned_mem_writel,
3333 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3334 uint32_t val)
3336 int dirty_flags;
3337 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3338 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3339 #if !defined(CONFIG_USER_ONLY)
3340 tb_invalidate_phys_page_fast(ram_addr, 1);
3341 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3342 #endif
3344 stb_p(qemu_get_ram_ptr(ram_addr), val);
3345 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3346 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3347 /* we remove the notdirty callback only if the code has been
3348 flushed */
3349 if (dirty_flags == 0xff)
3350 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3353 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3354 uint32_t val)
3356 int dirty_flags;
3357 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3358 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3359 #if !defined(CONFIG_USER_ONLY)
3360 tb_invalidate_phys_page_fast(ram_addr, 2);
3361 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3362 #endif
3364 stw_p(qemu_get_ram_ptr(ram_addr), val);
3365 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3366 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3367 /* we remove the notdirty callback only if the code has been
3368 flushed */
3369 if (dirty_flags == 0xff)
3370 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3373 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3374 uint32_t val)
3376 int dirty_flags;
3377 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3378 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3379 #if !defined(CONFIG_USER_ONLY)
3380 tb_invalidate_phys_page_fast(ram_addr, 4);
3381 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3382 #endif
3384 stl_p(qemu_get_ram_ptr(ram_addr), val);
3385 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3386 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3387 /* we remove the notdirty callback only if the code has been
3388 flushed */
3389 if (dirty_flags == 0xff)
3390 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3393 static CPUReadMemoryFunc * const error_mem_read[3] = {
3394 NULL, /* never used */
3395 NULL, /* never used */
3396 NULL, /* never used */
3399 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3400 notdirty_mem_writeb,
3401 notdirty_mem_writew,
3402 notdirty_mem_writel,
3405 /* Generate a debug exception if a watchpoint has been hit. */
3406 static void check_watchpoint(int offset, int len_mask, int flags)
3408 CPUState *env = cpu_single_env;
3409 target_ulong pc, cs_base;
3410 TranslationBlock *tb;
3411 target_ulong vaddr;
3412 CPUWatchpoint *wp;
3413 int cpu_flags;
3415 if (env->watchpoint_hit) {
3416 /* We re-entered the check after replacing the TB. Now raise
3417 * the debug interrupt so that is will trigger after the
3418 * current instruction. */
3419 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3420 return;
3422 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3423 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3424 if ((vaddr == (wp->vaddr & len_mask) ||
3425 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3426 wp->flags |= BP_WATCHPOINT_HIT;
3427 if (!env->watchpoint_hit) {
3428 env->watchpoint_hit = wp;
3429 tb = tb_find_pc(env->mem_io_pc);
3430 if (!tb) {
3431 cpu_abort(env, "check_watchpoint: could not find TB for "
3432 "pc=%p", (void *)env->mem_io_pc);
3434 cpu_restore_state(tb, env, env->mem_io_pc);
3435 tb_phys_invalidate(tb, -1);
3436 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3437 env->exception_index = EXCP_DEBUG;
3438 } else {
3439 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3440 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3442 cpu_resume_from_signal(env, NULL);
3444 } else {
3445 wp->flags &= ~BP_WATCHPOINT_HIT;
3450 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3451 so these check for a hit then pass through to the normal out-of-line
3452 phys routines. */
3453 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3455 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3456 return ldub_phys(addr);
3459 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3461 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3462 return lduw_phys(addr);
3465 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3467 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3468 return ldl_phys(addr);
3471 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3472 uint32_t val)
3474 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3475 stb_phys(addr, val);
3478 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3479 uint32_t val)
3481 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3482 stw_phys(addr, val);
3485 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3486 uint32_t val)
3488 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3489 stl_phys(addr, val);
3492 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3493 watch_mem_readb,
3494 watch_mem_readw,
3495 watch_mem_readl,
3498 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3499 watch_mem_writeb,
3500 watch_mem_writew,
3501 watch_mem_writel,
3504 static inline uint32_t subpage_readlen (subpage_t *mmio,
3505 target_phys_addr_t addr,
3506 unsigned int len)
3508 unsigned int idx = SUBPAGE_IDX(addr);
3509 #if defined(DEBUG_SUBPAGE)
3510 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3511 mmio, len, addr, idx);
3512 #endif
3514 addr += mmio->region_offset[idx];
3515 idx = mmio->sub_io_index[idx];
3516 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3519 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3520 uint32_t value, unsigned int len)
3522 unsigned int idx = SUBPAGE_IDX(addr);
3523 #if defined(DEBUG_SUBPAGE)
3524 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3525 __func__, mmio, len, addr, idx, value);
3526 #endif
3528 addr += mmio->region_offset[idx];
3529 idx = mmio->sub_io_index[idx];
3530 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3533 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3535 return subpage_readlen(opaque, addr, 0);
3538 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3539 uint32_t value)
3541 subpage_writelen(opaque, addr, value, 0);
3544 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3546 return subpage_readlen(opaque, addr, 1);
3549 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3550 uint32_t value)
3552 subpage_writelen(opaque, addr, value, 1);
3555 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3557 return subpage_readlen(opaque, addr, 2);
3560 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3561 uint32_t value)
3563 subpage_writelen(opaque, addr, value, 2);
3566 static CPUReadMemoryFunc * const subpage_read[] = {
3567 &subpage_readb,
3568 &subpage_readw,
3569 &subpage_readl,
3572 static CPUWriteMemoryFunc * const subpage_write[] = {
3573 &subpage_writeb,
3574 &subpage_writew,
3575 &subpage_writel,
3578 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3579 ram_addr_t memory, ram_addr_t region_offset)
3581 int idx, eidx;
3583 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3584 return -1;
3585 idx = SUBPAGE_IDX(start);
3586 eidx = SUBPAGE_IDX(end);
3587 #if defined(DEBUG_SUBPAGE)
3588 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3589 mmio, start, end, idx, eidx, memory);
3590 #endif
3591 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3592 memory = IO_MEM_UNASSIGNED;
3593 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3594 for (; idx <= eidx; idx++) {
3595 mmio->sub_io_index[idx] = memory;
3596 mmio->region_offset[idx] = region_offset;
3599 return 0;
3602 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3603 ram_addr_t orig_memory,
3604 ram_addr_t region_offset)
3606 subpage_t *mmio;
3607 int subpage_memory;
3609 mmio = g_malloc0(sizeof(subpage_t));
3611 mmio->base = base;
3612 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3613 DEVICE_NATIVE_ENDIAN);
3614 #if defined(DEBUG_SUBPAGE)
3615 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3616 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3617 #endif
3618 *phys = subpage_memory | IO_MEM_SUBPAGE;
3619 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3621 return mmio;
3624 static int get_free_io_mem_idx(void)
3626 int i;
3628 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3629 if (!io_mem_used[i]) {
3630 io_mem_used[i] = 1;
3631 return i;
3633 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3634 return -1;
3638 * Usually, devices operate in little endian mode. There are devices out
3639 * there that operate in big endian too. Each device gets byte swapped
3640 * mmio if plugged onto a CPU that does the other endianness.
3642 * CPU Device swap?
3644 * little little no
3645 * little big yes
3646 * big little yes
3647 * big big no
3650 typedef struct SwapEndianContainer {
3651 CPUReadMemoryFunc *read[3];
3652 CPUWriteMemoryFunc *write[3];
3653 void *opaque;
3654 } SwapEndianContainer;
3656 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3658 uint32_t val;
3659 SwapEndianContainer *c = opaque;
3660 val = c->read[0](c->opaque, addr);
3661 return val;
3664 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3666 uint32_t val;
3667 SwapEndianContainer *c = opaque;
3668 val = bswap16(c->read[1](c->opaque, addr));
3669 return val;
3672 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3674 uint32_t val;
3675 SwapEndianContainer *c = opaque;
3676 val = bswap32(c->read[2](c->opaque, addr));
3677 return val;
3680 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3681 swapendian_mem_readb,
3682 swapendian_mem_readw,
3683 swapendian_mem_readl
3686 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3687 uint32_t val)
3689 SwapEndianContainer *c = opaque;
3690 c->write[0](c->opaque, addr, val);
3693 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3694 uint32_t val)
3696 SwapEndianContainer *c = opaque;
3697 c->write[1](c->opaque, addr, bswap16(val));
3700 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3701 uint32_t val)
3703 SwapEndianContainer *c = opaque;
3704 c->write[2](c->opaque, addr, bswap32(val));
3707 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3708 swapendian_mem_writeb,
3709 swapendian_mem_writew,
3710 swapendian_mem_writel
3713 static void swapendian_init(int io_index)
3715 SwapEndianContainer *c = g_malloc(sizeof(SwapEndianContainer));
3716 int i;
3718 /* Swap mmio for big endian targets */
3719 c->opaque = io_mem_opaque[io_index];
3720 for (i = 0; i < 3; i++) {
3721 c->read[i] = io_mem_read[io_index][i];
3722 c->write[i] = io_mem_write[io_index][i];
3724 io_mem_read[io_index][i] = swapendian_readfn[i];
3725 io_mem_write[io_index][i] = swapendian_writefn[i];
3727 io_mem_opaque[io_index] = c;
3730 static void swapendian_del(int io_index)
3732 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3733 g_free(io_mem_opaque[io_index]);
3737 /* mem_read and mem_write are arrays of functions containing the
3738 function to access byte (index 0), word (index 1) and dword (index
3739 2). Functions can be omitted with a NULL function pointer.
3740 If io_index is non zero, the corresponding io zone is
3741 modified. If it is zero, a new io zone is allocated. The return
3742 value can be used with cpu_register_physical_memory(). (-1) is
3743 returned if error. */
3744 static int cpu_register_io_memory_fixed(int io_index,
3745 CPUReadMemoryFunc * const *mem_read,
3746 CPUWriteMemoryFunc * const *mem_write,
3747 void *opaque, enum device_endian endian)
3749 int i;
3751 if (io_index <= 0) {
3752 io_index = get_free_io_mem_idx();
3753 if (io_index == -1)
3754 return io_index;
3755 } else {
3756 io_index >>= IO_MEM_SHIFT;
3757 if (io_index >= IO_MEM_NB_ENTRIES)
3758 return -1;
3761 for (i = 0; i < 3; ++i) {
3762 io_mem_read[io_index][i]
3763 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3765 for (i = 0; i < 3; ++i) {
3766 io_mem_write[io_index][i]
3767 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3769 io_mem_opaque[io_index] = opaque;
3771 switch (endian) {
3772 case DEVICE_BIG_ENDIAN:
3773 #ifndef TARGET_WORDS_BIGENDIAN
3774 swapendian_init(io_index);
3775 #endif
3776 break;
3777 case DEVICE_LITTLE_ENDIAN:
3778 #ifdef TARGET_WORDS_BIGENDIAN
3779 swapendian_init(io_index);
3780 #endif
3781 break;
3782 case DEVICE_NATIVE_ENDIAN:
3783 default:
3784 break;
3787 return (io_index << IO_MEM_SHIFT);
3790 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3791 CPUWriteMemoryFunc * const *mem_write,
3792 void *opaque, enum device_endian endian)
3794 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3797 void cpu_unregister_io_memory(int io_table_address)
3799 int i;
3800 int io_index = io_table_address >> IO_MEM_SHIFT;
3802 swapendian_del(io_index);
3804 for (i=0;i < 3; i++) {
3805 io_mem_read[io_index][i] = unassigned_mem_read[i];
3806 io_mem_write[io_index][i] = unassigned_mem_write[i];
3808 io_mem_opaque[io_index] = NULL;
3809 io_mem_used[io_index] = 0;
3812 static void io_mem_init(void)
3814 int i;
3816 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3817 unassigned_mem_write, NULL,
3818 DEVICE_NATIVE_ENDIAN);
3819 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3820 unassigned_mem_write, NULL,
3821 DEVICE_NATIVE_ENDIAN);
3822 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3823 notdirty_mem_write, NULL,
3824 DEVICE_NATIVE_ENDIAN);
3825 for (i=0; i<5; i++)
3826 io_mem_used[i] = 1;
3828 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3829 watch_mem_write, NULL,
3830 DEVICE_NATIVE_ENDIAN);
3833 static void memory_map_init(void)
3835 system_memory = g_malloc(sizeof(*system_memory));
3836 memory_region_init(system_memory, "system", INT64_MAX);
3837 set_system_memory_map(system_memory);
3839 system_io = g_malloc(sizeof(*system_io));
3840 memory_region_init(system_io, "io", 65536);
3841 set_system_io_map(system_io);
3844 MemoryRegion *get_system_memory(void)
3846 return system_memory;
3849 MemoryRegion *get_system_io(void)
3851 return system_io;
3854 #endif /* !defined(CONFIG_USER_ONLY) */
3856 /* physical memory access (slow version, mainly for debug) */
3857 #if defined(CONFIG_USER_ONLY)
3858 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3859 uint8_t *buf, int len, int is_write)
3861 int l, flags;
3862 target_ulong page;
3863 void * p;
3865 while (len > 0) {
3866 page = addr & TARGET_PAGE_MASK;
3867 l = (page + TARGET_PAGE_SIZE) - addr;
3868 if (l > len)
3869 l = len;
3870 flags = page_get_flags(page);
3871 if (!(flags & PAGE_VALID))
3872 return -1;
3873 if (is_write) {
3874 if (!(flags & PAGE_WRITE))
3875 return -1;
3876 /* XXX: this code should not depend on lock_user */
3877 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3878 return -1;
3879 memcpy(p, buf, l);
3880 unlock_user(p, addr, l);
3881 } else {
3882 if (!(flags & PAGE_READ))
3883 return -1;
3884 /* XXX: this code should not depend on lock_user */
3885 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3886 return -1;
3887 memcpy(buf, p, l);
3888 unlock_user(p, addr, 0);
3890 len -= l;
3891 buf += l;
3892 addr += l;
3894 return 0;
3897 #else
3898 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3899 int len, int is_write)
3901 int l, io_index;
3902 uint8_t *ptr;
3903 uint32_t val;
3904 target_phys_addr_t page;
3905 ram_addr_t pd;
3906 PhysPageDesc *p;
3908 while (len > 0) {
3909 page = addr & TARGET_PAGE_MASK;
3910 l = (page + TARGET_PAGE_SIZE) - addr;
3911 if (l > len)
3912 l = len;
3913 p = phys_page_find(page >> TARGET_PAGE_BITS);
3914 if (!p) {
3915 pd = IO_MEM_UNASSIGNED;
3916 } else {
3917 pd = p->phys_offset;
3920 if (is_write) {
3921 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3922 target_phys_addr_t addr1 = addr;
3923 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3924 if (p)
3925 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3926 /* XXX: could force cpu_single_env to NULL to avoid
3927 potential bugs */
3928 if (l >= 4 && ((addr1 & 3) == 0)) {
3929 /* 32 bit write access */
3930 val = ldl_p(buf);
3931 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3932 l = 4;
3933 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3934 /* 16 bit write access */
3935 val = lduw_p(buf);
3936 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3937 l = 2;
3938 } else {
3939 /* 8 bit write access */
3940 val = ldub_p(buf);
3941 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3942 l = 1;
3944 } else {
3945 ram_addr_t addr1;
3946 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3947 /* RAM case */
3948 ptr = qemu_get_ram_ptr(addr1);
3949 memcpy(ptr, buf, l);
3950 if (!cpu_physical_memory_is_dirty(addr1)) {
3951 /* invalidate code */
3952 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3953 /* set dirty bit */
3954 cpu_physical_memory_set_dirty_flags(
3955 addr1, (0xff & ~CODE_DIRTY_FLAG));
3957 /* qemu doesn't execute guest code directly, but kvm does
3958 therefore flush instruction caches */
3959 if (kvm_enabled())
3960 flush_icache_range((unsigned long)ptr,
3961 ((unsigned long)ptr)+l);
3962 qemu_put_ram_ptr(ptr);
3964 } else {
3965 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3966 !(pd & IO_MEM_ROMD)) {
3967 target_phys_addr_t addr1 = addr;
3968 /* I/O case */
3969 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3970 if (p)
3971 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3972 if (l >= 4 && ((addr1 & 3) == 0)) {
3973 /* 32 bit read access */
3974 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3975 stl_p(buf, val);
3976 l = 4;
3977 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3978 /* 16 bit read access */
3979 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3980 stw_p(buf, val);
3981 l = 2;
3982 } else {
3983 /* 8 bit read access */
3984 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3985 stb_p(buf, val);
3986 l = 1;
3988 } else {
3989 /* RAM case */
3990 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3991 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3992 qemu_put_ram_ptr(ptr);
3995 len -= l;
3996 buf += l;
3997 addr += l;
4001 /* used for ROM loading : can write in RAM and ROM */
4002 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
4003 const uint8_t *buf, int len)
4005 int l;
4006 uint8_t *ptr;
4007 target_phys_addr_t page;
4008 unsigned long pd;
4009 PhysPageDesc *p;
4011 while (len > 0) {
4012 page = addr & TARGET_PAGE_MASK;
4013 l = (page + TARGET_PAGE_SIZE) - addr;
4014 if (l > len)
4015 l = len;
4016 p = phys_page_find(page >> TARGET_PAGE_BITS);
4017 if (!p) {
4018 pd = IO_MEM_UNASSIGNED;
4019 } else {
4020 pd = p->phys_offset;
4023 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
4024 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
4025 !(pd & IO_MEM_ROMD)) {
4026 /* do nothing */
4027 } else {
4028 unsigned long addr1;
4029 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4030 /* ROM/RAM case */
4031 ptr = qemu_get_ram_ptr(addr1);
4032 memcpy(ptr, buf, l);
4033 qemu_put_ram_ptr(ptr);
4035 len -= l;
4036 buf += l;
4037 addr += l;
4041 typedef struct {
4042 void *buffer;
4043 target_phys_addr_t addr;
4044 target_phys_addr_t len;
4045 } BounceBuffer;
4047 static BounceBuffer bounce;
4049 typedef struct MapClient {
4050 void *opaque;
4051 void (*callback)(void *opaque);
4052 QLIST_ENTRY(MapClient) link;
4053 } MapClient;
4055 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4056 = QLIST_HEAD_INITIALIZER(map_client_list);
4058 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4060 MapClient *client = g_malloc(sizeof(*client));
4062 client->opaque = opaque;
4063 client->callback = callback;
4064 QLIST_INSERT_HEAD(&map_client_list, client, link);
4065 return client;
4068 void cpu_unregister_map_client(void *_client)
4070 MapClient *client = (MapClient *)_client;
4072 QLIST_REMOVE(client, link);
4073 g_free(client);
4076 static void cpu_notify_map_clients(void)
4078 MapClient *client;
4080 while (!QLIST_EMPTY(&map_client_list)) {
4081 client = QLIST_FIRST(&map_client_list);
4082 client->callback(client->opaque);
4083 cpu_unregister_map_client(client);
4087 /* Map a physical memory region into a host virtual address.
4088 * May map a subset of the requested range, given by and returned in *plen.
4089 * May return NULL if resources needed to perform the mapping are exhausted.
4090 * Use only for reads OR writes - not for read-modify-write operations.
4091 * Use cpu_register_map_client() to know when retrying the map operation is
4092 * likely to succeed.
4094 void *cpu_physical_memory_map(target_phys_addr_t addr,
4095 target_phys_addr_t *plen,
4096 int is_write)
4098 target_phys_addr_t len = *plen;
4099 target_phys_addr_t todo = 0;
4100 int l;
4101 target_phys_addr_t page;
4102 unsigned long pd;
4103 PhysPageDesc *p;
4104 ram_addr_t raddr = RAM_ADDR_MAX;
4105 ram_addr_t rlen;
4106 void *ret;
4108 while (len > 0) {
4109 page = addr & TARGET_PAGE_MASK;
4110 l = (page + TARGET_PAGE_SIZE) - addr;
4111 if (l > len)
4112 l = len;
4113 p = phys_page_find(page >> TARGET_PAGE_BITS);
4114 if (!p) {
4115 pd = IO_MEM_UNASSIGNED;
4116 } else {
4117 pd = p->phys_offset;
4120 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4121 if (todo || bounce.buffer) {
4122 break;
4124 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4125 bounce.addr = addr;
4126 bounce.len = l;
4127 if (!is_write) {
4128 cpu_physical_memory_read(addr, bounce.buffer, l);
4131 *plen = l;
4132 return bounce.buffer;
4134 if (!todo) {
4135 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4138 len -= l;
4139 addr += l;
4140 todo += l;
4142 rlen = todo;
4143 ret = qemu_ram_ptr_length(raddr, &rlen);
4144 *plen = rlen;
4145 return ret;
4148 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4149 * Will also mark the memory as dirty if is_write == 1. access_len gives
4150 * the amount of memory that was actually read or written by the caller.
4152 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4153 int is_write, target_phys_addr_t access_len)
4155 unsigned long flush_len = (unsigned long)access_len;
4157 if (buffer != bounce.buffer) {
4158 if (is_write) {
4159 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4160 while (access_len) {
4161 unsigned l;
4162 l = TARGET_PAGE_SIZE;
4163 if (l > access_len)
4164 l = access_len;
4165 if (!cpu_physical_memory_is_dirty(addr1)) {
4166 /* invalidate code */
4167 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4168 /* set dirty bit */
4169 cpu_physical_memory_set_dirty_flags(
4170 addr1, (0xff & ~CODE_DIRTY_FLAG));
4172 addr1 += l;
4173 access_len -= l;
4175 dma_flush_range((unsigned long)buffer,
4176 (unsigned long)buffer + flush_len);
4178 if (xen_enabled()) {
4179 xen_invalidate_map_cache_entry(buffer);
4181 return;
4183 if (is_write) {
4184 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4186 qemu_vfree(bounce.buffer);
4187 bounce.buffer = NULL;
4188 cpu_notify_map_clients();
4191 /* warning: addr must be aligned */
4192 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4193 enum device_endian endian)
4195 int io_index;
4196 uint8_t *ptr;
4197 uint32_t val;
4198 unsigned long pd;
4199 PhysPageDesc *p;
4201 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4202 if (!p) {
4203 pd = IO_MEM_UNASSIGNED;
4204 } else {
4205 pd = p->phys_offset;
4208 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4209 !(pd & IO_MEM_ROMD)) {
4210 /* I/O case */
4211 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4212 if (p)
4213 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4214 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4215 #if defined(TARGET_WORDS_BIGENDIAN)
4216 if (endian == DEVICE_LITTLE_ENDIAN) {
4217 val = bswap32(val);
4219 #else
4220 if (endian == DEVICE_BIG_ENDIAN) {
4221 val = bswap32(val);
4223 #endif
4224 } else {
4225 /* RAM case */
4226 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4227 (addr & ~TARGET_PAGE_MASK);
4228 switch (endian) {
4229 case DEVICE_LITTLE_ENDIAN:
4230 val = ldl_le_p(ptr);
4231 break;
4232 case DEVICE_BIG_ENDIAN:
4233 val = ldl_be_p(ptr);
4234 break;
4235 default:
4236 val = ldl_p(ptr);
4237 break;
4240 return val;
4243 uint32_t ldl_phys(target_phys_addr_t addr)
4245 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4248 uint32_t ldl_le_phys(target_phys_addr_t addr)
4250 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4253 uint32_t ldl_be_phys(target_phys_addr_t addr)
4255 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4258 /* warning: addr must be aligned */
4259 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4260 enum device_endian endian)
4262 int io_index;
4263 uint8_t *ptr;
4264 uint64_t val;
4265 unsigned long pd;
4266 PhysPageDesc *p;
4268 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4269 if (!p) {
4270 pd = IO_MEM_UNASSIGNED;
4271 } else {
4272 pd = p->phys_offset;
4275 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4276 !(pd & IO_MEM_ROMD)) {
4277 /* I/O case */
4278 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4279 if (p)
4280 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4282 /* XXX This is broken when device endian != cpu endian.
4283 Fix and add "endian" variable check */
4284 #ifdef TARGET_WORDS_BIGENDIAN
4285 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4286 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4287 #else
4288 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4289 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4290 #endif
4291 } else {
4292 /* RAM case */
4293 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4294 (addr & ~TARGET_PAGE_MASK);
4295 switch (endian) {
4296 case DEVICE_LITTLE_ENDIAN:
4297 val = ldq_le_p(ptr);
4298 break;
4299 case DEVICE_BIG_ENDIAN:
4300 val = ldq_be_p(ptr);
4301 break;
4302 default:
4303 val = ldq_p(ptr);
4304 break;
4307 return val;
4310 uint64_t ldq_phys(target_phys_addr_t addr)
4312 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4315 uint64_t ldq_le_phys(target_phys_addr_t addr)
4317 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4320 uint64_t ldq_be_phys(target_phys_addr_t addr)
4322 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4325 /* XXX: optimize */
4326 uint32_t ldub_phys(target_phys_addr_t addr)
4328 uint8_t val;
4329 cpu_physical_memory_read(addr, &val, 1);
4330 return val;
4333 /* warning: addr must be aligned */
4334 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4335 enum device_endian endian)
4337 int io_index;
4338 uint8_t *ptr;
4339 uint64_t val;
4340 unsigned long pd;
4341 PhysPageDesc *p;
4343 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4344 if (!p) {
4345 pd = IO_MEM_UNASSIGNED;
4346 } else {
4347 pd = p->phys_offset;
4350 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4351 !(pd & IO_MEM_ROMD)) {
4352 /* I/O case */
4353 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4354 if (p)
4355 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4356 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4357 #if defined(TARGET_WORDS_BIGENDIAN)
4358 if (endian == DEVICE_LITTLE_ENDIAN) {
4359 val = bswap16(val);
4361 #else
4362 if (endian == DEVICE_BIG_ENDIAN) {
4363 val = bswap16(val);
4365 #endif
4366 } else {
4367 /* RAM case */
4368 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4369 (addr & ~TARGET_PAGE_MASK);
4370 switch (endian) {
4371 case DEVICE_LITTLE_ENDIAN:
4372 val = lduw_le_p(ptr);
4373 break;
4374 case DEVICE_BIG_ENDIAN:
4375 val = lduw_be_p(ptr);
4376 break;
4377 default:
4378 val = lduw_p(ptr);
4379 break;
4382 return val;
4385 uint32_t lduw_phys(target_phys_addr_t addr)
4387 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4390 uint32_t lduw_le_phys(target_phys_addr_t addr)
4392 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4395 uint32_t lduw_be_phys(target_phys_addr_t addr)
4397 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4400 /* warning: addr must be aligned. The ram page is not masked as dirty
4401 and the code inside is not invalidated. It is useful if the dirty
4402 bits are used to track modified PTEs */
4403 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4405 int io_index;
4406 uint8_t *ptr;
4407 unsigned long pd;
4408 PhysPageDesc *p;
4410 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4411 if (!p) {
4412 pd = IO_MEM_UNASSIGNED;
4413 } else {
4414 pd = p->phys_offset;
4417 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4418 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4419 if (p)
4420 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4421 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4422 } else {
4423 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4424 ptr = qemu_get_ram_ptr(addr1);
4425 stl_p(ptr, val);
4427 if (unlikely(in_migration)) {
4428 if (!cpu_physical_memory_is_dirty(addr1)) {
4429 /* invalidate code */
4430 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4431 /* set dirty bit */
4432 cpu_physical_memory_set_dirty_flags(
4433 addr1, (0xff & ~CODE_DIRTY_FLAG));
4439 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4441 int io_index;
4442 uint8_t *ptr;
4443 unsigned long pd;
4444 PhysPageDesc *p;
4446 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4447 if (!p) {
4448 pd = IO_MEM_UNASSIGNED;
4449 } else {
4450 pd = p->phys_offset;
4453 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4454 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4455 if (p)
4456 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4457 #ifdef TARGET_WORDS_BIGENDIAN
4458 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4459 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4460 #else
4461 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4462 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4463 #endif
4464 } else {
4465 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4466 (addr & ~TARGET_PAGE_MASK);
4467 stq_p(ptr, val);
4471 /* warning: addr must be aligned */
4472 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4473 enum device_endian endian)
4475 int io_index;
4476 uint8_t *ptr;
4477 unsigned long pd;
4478 PhysPageDesc *p;
4480 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4481 if (!p) {
4482 pd = IO_MEM_UNASSIGNED;
4483 } else {
4484 pd = p->phys_offset;
4487 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4488 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4489 if (p)
4490 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4491 #if defined(TARGET_WORDS_BIGENDIAN)
4492 if (endian == DEVICE_LITTLE_ENDIAN) {
4493 val = bswap32(val);
4495 #else
4496 if (endian == DEVICE_BIG_ENDIAN) {
4497 val = bswap32(val);
4499 #endif
4500 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4501 } else {
4502 unsigned long addr1;
4503 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4504 /* RAM case */
4505 ptr = qemu_get_ram_ptr(addr1);
4506 switch (endian) {
4507 case DEVICE_LITTLE_ENDIAN:
4508 stl_le_p(ptr, val);
4509 break;
4510 case DEVICE_BIG_ENDIAN:
4511 stl_be_p(ptr, val);
4512 break;
4513 default:
4514 stl_p(ptr, val);
4515 break;
4517 if (!cpu_physical_memory_is_dirty(addr1)) {
4518 /* invalidate code */
4519 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4520 /* set dirty bit */
4521 cpu_physical_memory_set_dirty_flags(addr1,
4522 (0xff & ~CODE_DIRTY_FLAG));
4527 void stl_phys(target_phys_addr_t addr, uint32_t val)
4529 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4532 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4534 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4537 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4539 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4542 /* XXX: optimize */
4543 void stb_phys(target_phys_addr_t addr, uint32_t val)
4545 uint8_t v = val;
4546 cpu_physical_memory_write(addr, &v, 1);
4549 /* warning: addr must be aligned */
4550 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4551 enum device_endian endian)
4553 int io_index;
4554 uint8_t *ptr;
4555 unsigned long pd;
4556 PhysPageDesc *p;
4558 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4559 if (!p) {
4560 pd = IO_MEM_UNASSIGNED;
4561 } else {
4562 pd = p->phys_offset;
4565 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4566 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4567 if (p)
4568 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4569 #if defined(TARGET_WORDS_BIGENDIAN)
4570 if (endian == DEVICE_LITTLE_ENDIAN) {
4571 val = bswap16(val);
4573 #else
4574 if (endian == DEVICE_BIG_ENDIAN) {
4575 val = bswap16(val);
4577 #endif
4578 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4579 } else {
4580 unsigned long addr1;
4581 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4582 /* RAM case */
4583 ptr = qemu_get_ram_ptr(addr1);
4584 switch (endian) {
4585 case DEVICE_LITTLE_ENDIAN:
4586 stw_le_p(ptr, val);
4587 break;
4588 case DEVICE_BIG_ENDIAN:
4589 stw_be_p(ptr, val);
4590 break;
4591 default:
4592 stw_p(ptr, val);
4593 break;
4595 if (!cpu_physical_memory_is_dirty(addr1)) {
4596 /* invalidate code */
4597 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4598 /* set dirty bit */
4599 cpu_physical_memory_set_dirty_flags(addr1,
4600 (0xff & ~CODE_DIRTY_FLAG));
4605 void stw_phys(target_phys_addr_t addr, uint32_t val)
4607 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4610 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4612 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4615 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4617 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4620 /* XXX: optimize */
4621 void stq_phys(target_phys_addr_t addr, uint64_t val)
4623 val = tswap64(val);
4624 cpu_physical_memory_write(addr, &val, 8);
4627 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4629 val = cpu_to_le64(val);
4630 cpu_physical_memory_write(addr, &val, 8);
4633 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4635 val = cpu_to_be64(val);
4636 cpu_physical_memory_write(addr, &val, 8);
4639 /* virtual memory access for debug (includes writing to ROM) */
4640 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4641 uint8_t *buf, int len, int is_write)
4643 int l;
4644 target_phys_addr_t phys_addr;
4645 target_ulong page;
4647 while (len > 0) {
4648 page = addr & TARGET_PAGE_MASK;
4649 phys_addr = cpu_get_phys_page_debug(env, page);
4650 /* if no physical page mapped, return an error */
4651 if (phys_addr == -1)
4652 return -1;
4653 l = (page + TARGET_PAGE_SIZE) - addr;
4654 if (l > len)
4655 l = len;
4656 phys_addr += (addr & ~TARGET_PAGE_MASK);
4657 if (is_write)
4658 cpu_physical_memory_write_rom(phys_addr, buf, l);
4659 else
4660 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4661 len -= l;
4662 buf += l;
4663 addr += l;
4665 return 0;
4667 #endif
4669 /* in deterministic execution mode, instructions doing device I/Os
4670 must be at the end of the TB */
4671 void cpu_io_recompile(CPUState *env, void *retaddr)
4673 TranslationBlock *tb;
4674 uint32_t n, cflags;
4675 target_ulong pc, cs_base;
4676 uint64_t flags;
4678 tb = tb_find_pc((unsigned long)retaddr);
4679 if (!tb) {
4680 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4681 retaddr);
4683 n = env->icount_decr.u16.low + tb->icount;
4684 cpu_restore_state(tb, env, (unsigned long)retaddr);
4685 /* Calculate how many instructions had been executed before the fault
4686 occurred. */
4687 n = n - env->icount_decr.u16.low;
4688 /* Generate a new TB ending on the I/O insn. */
4689 n++;
4690 /* On MIPS and SH, delay slot instructions can only be restarted if
4691 they were already the first instruction in the TB. If this is not
4692 the first instruction in a TB then re-execute the preceding
4693 branch. */
4694 #if defined(TARGET_MIPS)
4695 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4696 env->active_tc.PC -= 4;
4697 env->icount_decr.u16.low++;
4698 env->hflags &= ~MIPS_HFLAG_BMASK;
4700 #elif defined(TARGET_SH4)
4701 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4702 && n > 1) {
4703 env->pc -= 2;
4704 env->icount_decr.u16.low++;
4705 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4707 #endif
4708 /* This should never happen. */
4709 if (n > CF_COUNT_MASK)
4710 cpu_abort(env, "TB too big during recompile");
4712 cflags = n | CF_LAST_IO;
4713 pc = tb->pc;
4714 cs_base = tb->cs_base;
4715 flags = tb->flags;
4716 tb_phys_invalidate(tb, -1);
4717 /* FIXME: In theory this could raise an exception. In practice
4718 we have already translated the block once so it's probably ok. */
4719 tb_gen_code(env, pc, cs_base, flags, cflags);
4720 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4721 the first in the TB) then we end up generating a whole new TB and
4722 repeating the fault, which is horribly inefficient.
4723 Better would be to execute just this insn uncached, or generate a
4724 second new TB. */
4725 cpu_resume_from_signal(env, NULL);
4728 #if !defined(CONFIG_USER_ONLY)
4730 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4732 int i, target_code_size, max_target_code_size;
4733 int direct_jmp_count, direct_jmp2_count, cross_page;
4734 TranslationBlock *tb;
4736 target_code_size = 0;
4737 max_target_code_size = 0;
4738 cross_page = 0;
4739 direct_jmp_count = 0;
4740 direct_jmp2_count = 0;
4741 for(i = 0; i < nb_tbs; i++) {
4742 tb = &tbs[i];
4743 target_code_size += tb->size;
4744 if (tb->size > max_target_code_size)
4745 max_target_code_size = tb->size;
4746 if (tb->page_addr[1] != -1)
4747 cross_page++;
4748 if (tb->tb_next_offset[0] != 0xffff) {
4749 direct_jmp_count++;
4750 if (tb->tb_next_offset[1] != 0xffff) {
4751 direct_jmp2_count++;
4755 /* XXX: avoid using doubles ? */
4756 cpu_fprintf(f, "Translation buffer state:\n");
4757 cpu_fprintf(f, "gen code size %td/%ld\n",
4758 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4759 cpu_fprintf(f, "TB count %d/%d\n",
4760 nb_tbs, code_gen_max_blocks);
4761 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4762 nb_tbs ? target_code_size / nb_tbs : 0,
4763 max_target_code_size);
4764 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4765 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4766 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4767 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4768 cross_page,
4769 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4770 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4771 direct_jmp_count,
4772 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4773 direct_jmp2_count,
4774 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4775 cpu_fprintf(f, "\nStatistics:\n");
4776 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4777 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4778 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4779 #ifdef CONFIG_PROFILER
4780 tcg_dump_info(f, cpu_fprintf);
4781 #endif
4784 #define MMUSUFFIX _cmmu
4785 #define GETPC() NULL
4786 #define env cpu_single_env
4787 #define SOFTMMU_CODE_ACCESS
4789 #define SHIFT 0
4790 #include "softmmu_template.h"
4792 #define SHIFT 1
4793 #include "softmmu_template.h"
4795 #define SHIFT 2
4796 #include "softmmu_template.h"
4798 #define SHIFT 3
4799 #include "softmmu_template.h"
4801 #undef env
4803 #endif