Merge commit '14015304b662e8f8ccce46c5a6927af6a14c510b' into upstream-merge
[qemu-kvm.git] / exec.c
blobcf7d0d2d679a9c709d1d32626935bb451604c7f9
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "cache-utils.h"
31 #if !defined(TARGET_IA64)
32 #include "tcg.h"
33 #endif
35 #include "hw/hw.h"
36 #include "hw/qdev.h"
37 #include "osdep.h"
38 #include "kvm.h"
39 #include "hw/xen.h"
40 #include "qemu-timer.h"
41 #include "memory.h"
42 #include "exec-memory.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
46 #include <sys/param.h>
47 #if __FreeBSD_version >= 700104
48 #define HAVE_KINFO_GETVMMAP
49 #define sigqueue sigqueue_freebsd /* avoid redefinition */
50 #include <sys/time.h>
51 #include <sys/proc.h>
52 #include <machine/profile.h>
53 #define _KERNEL
54 #include <sys/user.h>
55 #undef _KERNEL
56 #undef sigqueue
57 #include <libutil.h>
58 #endif
59 #endif
60 #else /* !CONFIG_USER_ONLY */
61 #include "xen-mapcache.h"
62 #include "trace.h"
63 #endif
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_TLB
68 //#define DEBUG_UNASSIGNED
70 /* make various TB consistency checks */
71 //#define DEBUG_TB_CHECK
72 //#define DEBUG_TLB_CHECK
74 //#define DEBUG_IOPORT
75 //#define DEBUG_SUBPAGE
77 #if !defined(CONFIG_USER_ONLY)
78 /* TB consistency checks only implemented for usermode emulation. */
79 #undef DEBUG_TB_CHECK
80 #endif
82 #define SMC_BITMAP_USE_THRESHOLD 10
84 static TranslationBlock *tbs;
85 static int code_gen_max_blocks;
86 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
87 static int nb_tbs;
88 /* any access to the tbs or the page table must use this lock */
89 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
91 #if defined(__arm__) || defined(__sparc_v9__)
92 /* The prologue must be reachable with a direct jump. ARM and Sparc64
93 have limited branch ranges (possibly also PPC) so place it in a
94 section close to code segment. */
95 #define code_gen_section \
96 __attribute__((__section__(".gen_code"))) \
97 __attribute__((aligned (32)))
98 #elif defined(_WIN32)
99 /* Maximum alignment for Win32 is 16. */
100 #define code_gen_section \
101 __attribute__((aligned (16)))
102 #else
103 #define code_gen_section \
104 __attribute__((aligned (32)))
105 #endif
107 uint8_t code_gen_prologue[1024] code_gen_section;
108 static uint8_t *code_gen_buffer;
109 static unsigned long code_gen_buffer_size;
110 /* threshold to flush the translated code buffer */
111 static unsigned long code_gen_buffer_max_size;
112 static uint8_t *code_gen_ptr;
114 #if !defined(CONFIG_USER_ONLY)
115 int phys_ram_fd;
116 static int in_migration;
118 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
120 static MemoryRegion *system_memory;
121 static MemoryRegion *system_io;
123 #endif
125 CPUState *first_cpu;
126 /* current CPU in the current thread. It is only valid inside
127 cpu_exec() */
128 CPUState *cpu_single_env;
129 /* 0 = Do not count executed instructions.
130 1 = Precise instruction counting.
131 2 = Adaptive rate instruction counting. */
132 int use_icount = 0;
133 /* Current instruction counter. While executing translated code this may
134 include some instructions that have not yet been executed. */
135 int64_t qemu_icount;
137 typedef struct PageDesc {
138 /* list of TBs intersecting this ram page */
139 TranslationBlock *first_tb;
140 /* in order to optimize self modifying code, we count the number
141 of lookups we do to a given page to use a bitmap */
142 unsigned int code_write_count;
143 uint8_t *code_bitmap;
144 #if defined(CONFIG_USER_ONLY)
145 unsigned long flags;
146 #endif
147 } PageDesc;
149 /* In system mode we want L1_MAP to be based on ram offsets,
150 while in user mode we want it to be based on virtual addresses. */
151 #if !defined(CONFIG_USER_ONLY)
152 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
153 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
154 #else
155 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
156 #endif
157 #else
158 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
159 #endif
161 /* Size of the L2 (and L3, etc) page tables. */
162 #define L2_BITS 10
163 #define L2_SIZE (1 << L2_BITS)
165 /* The bits remaining after N lower levels of page tables. */
166 #define P_L1_BITS_REM \
167 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168 #define V_L1_BITS_REM \
169 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
171 /* Size of the L1 page table. Avoid silly small sizes. */
172 #if P_L1_BITS_REM < 4
173 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
174 #else
175 #define P_L1_BITS P_L1_BITS_REM
176 #endif
178 #if V_L1_BITS_REM < 4
179 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
180 #else
181 #define V_L1_BITS V_L1_BITS_REM
182 #endif
184 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
185 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
187 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
188 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
190 unsigned long qemu_real_host_page_size;
191 unsigned long qemu_host_page_bits;
192 unsigned long qemu_host_page_size;
193 unsigned long qemu_host_page_mask;
195 /* This is a multi-level map on the virtual address space.
196 The bottom level has pointers to PageDesc. */
197 static void *l1_map[V_L1_SIZE];
199 #if !defined(CONFIG_USER_ONLY)
200 typedef struct PhysPageDesc {
201 /* offset in host memory of the page + io_index in the low bits */
202 ram_addr_t phys_offset;
203 ram_addr_t region_offset;
204 } PhysPageDesc;
206 /* This is a multi-level map on the physical address space.
207 The bottom level has pointers to PhysPageDesc. */
208 static void *l1_phys_map[P_L1_SIZE];
210 static void io_mem_init(void);
211 static void memory_map_init(void);
213 /* io memory support */
214 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
215 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
216 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
217 static char io_mem_used[IO_MEM_NB_ENTRIES];
218 static int io_mem_watch;
219 #endif
221 /* log support */
222 #ifdef WIN32
223 static const char *logfilename = "qemu.log";
224 #else
225 static const char *logfilename = "/tmp/qemu.log";
226 #endif
227 FILE *logfile;
228 int loglevel;
229 static int log_append = 0;
231 /* statistics */
232 #if !defined(CONFIG_USER_ONLY)
233 static int tlb_flush_count;
234 #endif
235 static int tb_flush_count;
236 static int tb_phys_invalidate_count;
238 #ifdef _WIN32
239 static void map_exec(void *addr, long size)
241 DWORD old_protect;
242 VirtualProtect(addr, size,
243 PAGE_EXECUTE_READWRITE, &old_protect);
246 #else
247 static void map_exec(void *addr, long size)
249 unsigned long start, end, page_size;
251 page_size = getpagesize();
252 start = (unsigned long)addr;
253 start &= ~(page_size - 1);
255 end = (unsigned long)addr + size;
256 end += page_size - 1;
257 end &= ~(page_size - 1);
259 mprotect((void *)start, end - start,
260 PROT_READ | PROT_WRITE | PROT_EXEC);
262 #endif
264 static void page_init(void)
266 /* NOTE: we can always suppose that qemu_host_page_size >=
267 TARGET_PAGE_SIZE */
268 #ifdef _WIN32
270 SYSTEM_INFO system_info;
272 GetSystemInfo(&system_info);
273 qemu_real_host_page_size = system_info.dwPageSize;
275 #else
276 qemu_real_host_page_size = getpagesize();
277 #endif
278 if (qemu_host_page_size == 0)
279 qemu_host_page_size = qemu_real_host_page_size;
280 if (qemu_host_page_size < TARGET_PAGE_SIZE)
281 qemu_host_page_size = TARGET_PAGE_SIZE;
282 qemu_host_page_bits = 0;
283 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
284 qemu_host_page_bits++;
285 qemu_host_page_mask = ~(qemu_host_page_size - 1);
287 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
289 #ifdef HAVE_KINFO_GETVMMAP
290 struct kinfo_vmentry *freep;
291 int i, cnt;
293 freep = kinfo_getvmmap(getpid(), &cnt);
294 if (freep) {
295 mmap_lock();
296 for (i = 0; i < cnt; i++) {
297 unsigned long startaddr, endaddr;
299 startaddr = freep[i].kve_start;
300 endaddr = freep[i].kve_end;
301 if (h2g_valid(startaddr)) {
302 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
304 if (h2g_valid(endaddr)) {
305 endaddr = h2g(endaddr);
306 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
307 } else {
308 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
309 endaddr = ~0ul;
310 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
311 #endif
315 free(freep);
316 mmap_unlock();
318 #else
319 FILE *f;
321 last_brk = (unsigned long)sbrk(0);
323 f = fopen("/compat/linux/proc/self/maps", "r");
324 if (f) {
325 mmap_lock();
327 do {
328 unsigned long startaddr, endaddr;
329 int n;
331 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
333 if (n == 2 && h2g_valid(startaddr)) {
334 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
336 if (h2g_valid(endaddr)) {
337 endaddr = h2g(endaddr);
338 } else {
339 endaddr = ~0ul;
341 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
343 } while (!feof(f));
345 fclose(f);
346 mmap_unlock();
348 #endif
350 #endif
353 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
355 PageDesc *pd;
356 void **lp;
357 int i;
359 #if defined(CONFIG_USER_ONLY)
360 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
361 # define ALLOC(P, SIZE) \
362 do { \
363 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
364 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
365 } while (0)
366 #else
367 # define ALLOC(P, SIZE) \
368 do { P = qemu_mallocz(SIZE); } while (0)
369 #endif
371 /* Level 1. Always allocated. */
372 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
374 /* Level 2..N-1. */
375 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
376 void **p = *lp;
378 if (p == NULL) {
379 if (!alloc) {
380 return NULL;
382 ALLOC(p, sizeof(void *) * L2_SIZE);
383 *lp = p;
386 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
389 pd = *lp;
390 if (pd == NULL) {
391 if (!alloc) {
392 return NULL;
394 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
395 *lp = pd;
398 #undef ALLOC
400 return pd + (index & (L2_SIZE - 1));
403 static inline PageDesc *page_find(tb_page_addr_t index)
405 return page_find_alloc(index, 0);
408 #if !defined(CONFIG_USER_ONLY)
409 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
411 PhysPageDesc *pd;
412 void **lp;
413 int i;
415 /* Level 1. Always allocated. */
416 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
418 /* Level 2..N-1. */
419 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
420 void **p = *lp;
421 if (p == NULL) {
422 if (!alloc) {
423 return NULL;
425 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
427 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
430 pd = *lp;
431 if (pd == NULL) {
432 int i;
434 if (!alloc) {
435 return NULL;
438 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
440 for (i = 0; i < L2_SIZE; i++) {
441 pd[i].phys_offset = IO_MEM_UNASSIGNED;
442 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
446 return pd + (index & (L2_SIZE - 1));
449 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
451 return phys_page_find_alloc(index, 0);
454 static void tlb_protect_code(ram_addr_t ram_addr);
455 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
456 target_ulong vaddr);
457 #define mmap_lock() do { } while(0)
458 #define mmap_unlock() do { } while(0)
459 #endif
461 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
463 #if defined(CONFIG_USER_ONLY)
464 /* Currently it is not recommended to allocate big chunks of data in
465 user mode. It will change when a dedicated libc will be used */
466 #define USE_STATIC_CODE_GEN_BUFFER
467 #endif
469 #ifdef USE_STATIC_CODE_GEN_BUFFER
470 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
471 __attribute__((aligned (CODE_GEN_ALIGN)));
472 #endif
474 static void code_gen_alloc(unsigned long tb_size)
476 if (kvm_enabled())
477 return;
479 #ifdef USE_STATIC_CODE_GEN_BUFFER
480 code_gen_buffer = static_code_gen_buffer;
481 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
482 map_exec(code_gen_buffer, code_gen_buffer_size);
483 #else
484 code_gen_buffer_size = tb_size;
485 if (code_gen_buffer_size == 0) {
486 #if defined(CONFIG_USER_ONLY)
487 /* in user mode, phys_ram_size is not meaningful */
488 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
489 #else
490 /* XXX: needs adjustments */
491 code_gen_buffer_size = (unsigned long)(ram_size / 4);
492 #endif
494 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
495 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
496 /* The code gen buffer location may have constraints depending on
497 the host cpu and OS */
498 #if defined(__linux__)
500 int flags;
501 void *start = NULL;
503 flags = MAP_PRIVATE | MAP_ANONYMOUS;
504 #if defined(__x86_64__)
505 flags |= MAP_32BIT;
506 /* Cannot map more than that */
507 if (code_gen_buffer_size > (800 * 1024 * 1024))
508 code_gen_buffer_size = (800 * 1024 * 1024);
509 #elif defined(__sparc_v9__)
510 // Map the buffer below 2G, so we can use direct calls and branches
511 flags |= MAP_FIXED;
512 start = (void *) 0x60000000UL;
513 if (code_gen_buffer_size > (512 * 1024 * 1024))
514 code_gen_buffer_size = (512 * 1024 * 1024);
515 #elif defined(__arm__)
516 /* Map the buffer below 32M, so we can use direct calls and branches */
517 flags |= MAP_FIXED;
518 start = (void *) 0x01000000UL;
519 if (code_gen_buffer_size > 16 * 1024 * 1024)
520 code_gen_buffer_size = 16 * 1024 * 1024;
521 #elif defined(__s390x__)
522 /* Map the buffer so that we can use direct calls and branches. */
523 /* We have a +- 4GB range on the branches; leave some slop. */
524 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
525 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
527 start = (void *)0x90000000UL;
528 #endif
529 code_gen_buffer = mmap(start, code_gen_buffer_size,
530 PROT_WRITE | PROT_READ | PROT_EXEC,
531 flags, -1, 0);
532 if (code_gen_buffer == MAP_FAILED) {
533 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
534 exit(1);
537 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
538 || defined(__DragonFly__) || defined(__OpenBSD__) \
539 || defined(__NetBSD__)
541 int flags;
542 void *addr = NULL;
543 flags = MAP_PRIVATE | MAP_ANONYMOUS;
544 #if defined(__x86_64__)
545 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
546 * 0x40000000 is free */
547 flags |= MAP_FIXED;
548 addr = (void *)0x40000000;
549 /* Cannot map more than that */
550 if (code_gen_buffer_size > (800 * 1024 * 1024))
551 code_gen_buffer_size = (800 * 1024 * 1024);
552 #elif defined(__sparc_v9__)
553 // Map the buffer below 2G, so we can use direct calls and branches
554 flags |= MAP_FIXED;
555 addr = (void *) 0x60000000UL;
556 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
557 code_gen_buffer_size = (512 * 1024 * 1024);
559 #endif
560 code_gen_buffer = mmap(addr, code_gen_buffer_size,
561 PROT_WRITE | PROT_READ | PROT_EXEC,
562 flags, -1, 0);
563 if (code_gen_buffer == MAP_FAILED) {
564 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
565 exit(1);
568 #else
569 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
570 map_exec(code_gen_buffer, code_gen_buffer_size);
571 #endif
572 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
573 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
574 code_gen_buffer_max_size = code_gen_buffer_size -
575 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
576 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
577 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
580 /* Must be called before using the QEMU cpus. 'tb_size' is the size
581 (in bytes) allocated to the translation buffer. Zero means default
582 size. */
583 void tcg_exec_init(unsigned long tb_size)
585 cpu_gen_init();
586 code_gen_alloc(tb_size);
587 code_gen_ptr = code_gen_buffer;
588 page_init();
589 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
590 /* There's no guest base to take into account, so go ahead and
591 initialize the prologue now. */
592 tcg_prologue_init(&tcg_ctx);
593 #endif
596 bool tcg_enabled(void)
598 return code_gen_buffer != NULL;
601 void cpu_exec_init_all(void)
603 #if !defined(CONFIG_USER_ONLY)
604 memory_map_init();
605 io_mem_init();
606 #endif
609 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
611 static int cpu_common_post_load(void *opaque, int version_id)
613 CPUState *env = opaque;
615 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
616 version_id is increased. */
617 env->interrupt_request &= ~0x01;
618 tlb_flush(env, 1);
620 return 0;
623 static const VMStateDescription vmstate_cpu_common = {
624 .name = "cpu_common",
625 .version_id = 1,
626 .minimum_version_id = 1,
627 .minimum_version_id_old = 1,
628 .post_load = cpu_common_post_load,
629 .fields = (VMStateField []) {
630 VMSTATE_UINT32(halted, CPUState),
631 VMSTATE_UINT32(interrupt_request, CPUState),
632 VMSTATE_END_OF_LIST()
635 #endif
637 CPUState *qemu_get_cpu(int cpu)
639 CPUState *env = first_cpu;
641 while (env) {
642 if (env->cpu_index == cpu)
643 break;
644 env = env->next_cpu;
647 return env;
650 void cpu_exec_init(CPUState *env)
652 CPUState **penv;
653 int cpu_index;
655 #if defined(CONFIG_USER_ONLY)
656 cpu_list_lock();
657 #endif
658 env->next_cpu = NULL;
659 penv = &first_cpu;
660 cpu_index = 0;
661 while (*penv != NULL) {
662 penv = &(*penv)->next_cpu;
663 cpu_index++;
665 env->cpu_index = cpu_index;
666 env->numa_node = 0;
667 QTAILQ_INIT(&env->breakpoints);
668 QTAILQ_INIT(&env->watchpoints);
669 #ifndef CONFIG_USER_ONLY
670 env->thread_id = qemu_get_thread_id();
671 #endif
672 *penv = env;
673 #if defined(CONFIG_USER_ONLY)
674 cpu_list_unlock();
675 #endif
676 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
677 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
678 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
679 cpu_save, cpu_load, env);
680 #endif
683 /* Allocate a new translation block. Flush the translation buffer if
684 too many translation blocks or too much generated code. */
685 static TranslationBlock *tb_alloc(target_ulong pc)
687 TranslationBlock *tb;
689 if (nb_tbs >= code_gen_max_blocks ||
690 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
691 return NULL;
692 tb = &tbs[nb_tbs++];
693 tb->pc = pc;
694 tb->cflags = 0;
695 return tb;
698 void tb_free(TranslationBlock *tb)
700 /* In practice this is mostly used for single use temporary TB
701 Ignore the hard cases and just back up if this TB happens to
702 be the last one generated. */
703 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
704 code_gen_ptr = tb->tc_ptr;
705 nb_tbs--;
709 static inline void invalidate_page_bitmap(PageDesc *p)
711 if (p->code_bitmap) {
712 qemu_free(p->code_bitmap);
713 p->code_bitmap = NULL;
715 p->code_write_count = 0;
718 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
720 static void page_flush_tb_1 (int level, void **lp)
722 int i;
724 if (*lp == NULL) {
725 return;
727 if (level == 0) {
728 PageDesc *pd = *lp;
729 for (i = 0; i < L2_SIZE; ++i) {
730 pd[i].first_tb = NULL;
731 invalidate_page_bitmap(pd + i);
733 } else {
734 void **pp = *lp;
735 for (i = 0; i < L2_SIZE; ++i) {
736 page_flush_tb_1 (level - 1, pp + i);
741 static void page_flush_tb(void)
743 int i;
744 for (i = 0; i < V_L1_SIZE; i++) {
745 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
749 /* flush all the translation blocks */
750 /* XXX: tb_flush is currently not thread safe */
751 void tb_flush(CPUState *env1)
753 CPUState *env;
754 #if defined(DEBUG_FLUSH)
755 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
756 (unsigned long)(code_gen_ptr - code_gen_buffer),
757 nb_tbs, nb_tbs > 0 ?
758 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
759 #endif
760 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
761 cpu_abort(env1, "Internal error: code buffer overflow\n");
763 nb_tbs = 0;
765 for(env = first_cpu; env != NULL; env = env->next_cpu) {
766 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
769 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
770 page_flush_tb();
772 code_gen_ptr = code_gen_buffer;
773 /* XXX: flush processor icache at this point if cache flush is
774 expensive */
775 tb_flush_count++;
778 #ifdef DEBUG_TB_CHECK
780 static void tb_invalidate_check(target_ulong address)
782 TranslationBlock *tb;
783 int i;
784 address &= TARGET_PAGE_MASK;
785 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
786 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
787 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
788 address >= tb->pc + tb->size)) {
789 printf("ERROR invalidate: address=" TARGET_FMT_lx
790 " PC=%08lx size=%04x\n",
791 address, (long)tb->pc, tb->size);
797 /* verify that all the pages have correct rights for code */
798 static void tb_page_check(void)
800 TranslationBlock *tb;
801 int i, flags1, flags2;
803 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
804 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
805 flags1 = page_get_flags(tb->pc);
806 flags2 = page_get_flags(tb->pc + tb->size - 1);
807 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
808 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
809 (long)tb->pc, tb->size, flags1, flags2);
815 #endif
817 /* invalidate one TB */
818 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
819 int next_offset)
821 TranslationBlock *tb1;
822 for(;;) {
823 tb1 = *ptb;
824 if (tb1 == tb) {
825 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
826 break;
828 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
832 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
834 TranslationBlock *tb1;
835 unsigned int n1;
837 for(;;) {
838 tb1 = *ptb;
839 n1 = (long)tb1 & 3;
840 tb1 = (TranslationBlock *)((long)tb1 & ~3);
841 if (tb1 == tb) {
842 *ptb = tb1->page_next[n1];
843 break;
845 ptb = &tb1->page_next[n1];
849 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
851 TranslationBlock *tb1, **ptb;
852 unsigned int n1;
854 ptb = &tb->jmp_next[n];
855 tb1 = *ptb;
856 if (tb1) {
857 /* find tb(n) in circular list */
858 for(;;) {
859 tb1 = *ptb;
860 n1 = (long)tb1 & 3;
861 tb1 = (TranslationBlock *)((long)tb1 & ~3);
862 if (n1 == n && tb1 == tb)
863 break;
864 if (n1 == 2) {
865 ptb = &tb1->jmp_first;
866 } else {
867 ptb = &tb1->jmp_next[n1];
870 /* now we can suppress tb(n) from the list */
871 *ptb = tb->jmp_next[n];
873 tb->jmp_next[n] = NULL;
877 /* reset the jump entry 'n' of a TB so that it is not chained to
878 another TB */
879 static inline void tb_reset_jump(TranslationBlock *tb, int n)
881 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
884 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
886 CPUState *env;
887 PageDesc *p;
888 unsigned int h, n1;
889 tb_page_addr_t phys_pc;
890 TranslationBlock *tb1, *tb2;
892 /* remove the TB from the hash list */
893 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
894 h = tb_phys_hash_func(phys_pc);
895 tb_remove(&tb_phys_hash[h], tb,
896 offsetof(TranslationBlock, phys_hash_next));
898 /* remove the TB from the page list */
899 if (tb->page_addr[0] != page_addr) {
900 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
901 tb_page_remove(&p->first_tb, tb);
902 invalidate_page_bitmap(p);
904 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
905 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
906 tb_page_remove(&p->first_tb, tb);
907 invalidate_page_bitmap(p);
910 tb_invalidated_flag = 1;
912 /* remove the TB from the hash list */
913 h = tb_jmp_cache_hash_func(tb->pc);
914 for(env = first_cpu; env != NULL; env = env->next_cpu) {
915 if (env->tb_jmp_cache[h] == tb)
916 env->tb_jmp_cache[h] = NULL;
919 /* suppress this TB from the two jump lists */
920 tb_jmp_remove(tb, 0);
921 tb_jmp_remove(tb, 1);
923 /* suppress any remaining jumps to this TB */
924 tb1 = tb->jmp_first;
925 for(;;) {
926 n1 = (long)tb1 & 3;
927 if (n1 == 2)
928 break;
929 tb1 = (TranslationBlock *)((long)tb1 & ~3);
930 tb2 = tb1->jmp_next[n1];
931 tb_reset_jump(tb1, n1);
932 tb1->jmp_next[n1] = NULL;
933 tb1 = tb2;
935 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
937 tb_phys_invalidate_count++;
940 static inline void set_bits(uint8_t *tab, int start, int len)
942 int end, mask, end1;
944 end = start + len;
945 tab += start >> 3;
946 mask = 0xff << (start & 7);
947 if ((start & ~7) == (end & ~7)) {
948 if (start < end) {
949 mask &= ~(0xff << (end & 7));
950 *tab |= mask;
952 } else {
953 *tab++ |= mask;
954 start = (start + 8) & ~7;
955 end1 = end & ~7;
956 while (start < end1) {
957 *tab++ = 0xff;
958 start += 8;
960 if (start < end) {
961 mask = ~(0xff << (end & 7));
962 *tab |= mask;
967 static void build_page_bitmap(PageDesc *p)
969 int n, tb_start, tb_end;
970 TranslationBlock *tb;
972 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
974 tb = p->first_tb;
975 while (tb != NULL) {
976 n = (long)tb & 3;
977 tb = (TranslationBlock *)((long)tb & ~3);
978 /* NOTE: this is subtle as a TB may span two physical pages */
979 if (n == 0) {
980 /* NOTE: tb_end may be after the end of the page, but
981 it is not a problem */
982 tb_start = tb->pc & ~TARGET_PAGE_MASK;
983 tb_end = tb_start + tb->size;
984 if (tb_end > TARGET_PAGE_SIZE)
985 tb_end = TARGET_PAGE_SIZE;
986 } else {
987 tb_start = 0;
988 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
990 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
991 tb = tb->page_next[n];
995 TranslationBlock *tb_gen_code(CPUState *env,
996 target_ulong pc, target_ulong cs_base,
997 int flags, int cflags)
999 TranslationBlock *tb;
1000 uint8_t *tc_ptr;
1001 tb_page_addr_t phys_pc, phys_page2;
1002 target_ulong virt_page2;
1003 int code_gen_size;
1005 phys_pc = get_page_addr_code(env, pc);
1006 tb = tb_alloc(pc);
1007 if (!tb) {
1008 /* flush must be done */
1009 tb_flush(env);
1010 /* cannot fail at this point */
1011 tb = tb_alloc(pc);
1012 /* Don't forget to invalidate previous TB info. */
1013 tb_invalidated_flag = 1;
1015 tc_ptr = code_gen_ptr;
1016 tb->tc_ptr = tc_ptr;
1017 tb->cs_base = cs_base;
1018 tb->flags = flags;
1019 tb->cflags = cflags;
1020 cpu_gen_code(env, tb, &code_gen_size);
1021 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1023 /* check next page if needed */
1024 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1025 phys_page2 = -1;
1026 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1027 phys_page2 = get_page_addr_code(env, virt_page2);
1029 tb_link_page(tb, phys_pc, phys_page2);
1030 return tb;
1033 /* invalidate all TBs which intersect with the target physical page
1034 starting in range [start;end[. NOTE: start and end must refer to
1035 the same physical page. 'is_cpu_write_access' should be true if called
1036 from a real cpu write access: the virtual CPU will exit the current
1037 TB if code is modified inside this TB. */
1038 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1039 int is_cpu_write_access)
1041 TranslationBlock *tb, *tb_next, *saved_tb;
1042 CPUState *env = cpu_single_env;
1043 tb_page_addr_t tb_start, tb_end;
1044 PageDesc *p;
1045 int n;
1046 #ifdef TARGET_HAS_PRECISE_SMC
1047 int current_tb_not_found = is_cpu_write_access;
1048 TranslationBlock *current_tb = NULL;
1049 int current_tb_modified = 0;
1050 target_ulong current_pc = 0;
1051 target_ulong current_cs_base = 0;
1052 int current_flags = 0;
1053 #endif /* TARGET_HAS_PRECISE_SMC */
1055 p = page_find(start >> TARGET_PAGE_BITS);
1056 if (!p)
1057 return;
1058 if (!p->code_bitmap &&
1059 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1060 is_cpu_write_access) {
1061 /* build code bitmap */
1062 build_page_bitmap(p);
1065 /* we remove all the TBs in the range [start, end[ */
1066 /* XXX: see if in some cases it could be faster to invalidate all the code */
1067 tb = p->first_tb;
1068 while (tb != NULL) {
1069 n = (long)tb & 3;
1070 tb = (TranslationBlock *)((long)tb & ~3);
1071 tb_next = tb->page_next[n];
1072 /* NOTE: this is subtle as a TB may span two physical pages */
1073 if (n == 0) {
1074 /* NOTE: tb_end may be after the end of the page, but
1075 it is not a problem */
1076 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1077 tb_end = tb_start + tb->size;
1078 } else {
1079 tb_start = tb->page_addr[1];
1080 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1082 if (!(tb_end <= start || tb_start >= end)) {
1083 #ifdef TARGET_HAS_PRECISE_SMC
1084 if (current_tb_not_found) {
1085 current_tb_not_found = 0;
1086 current_tb = NULL;
1087 if (env->mem_io_pc) {
1088 /* now we have a real cpu fault */
1089 current_tb = tb_find_pc(env->mem_io_pc);
1092 if (current_tb == tb &&
1093 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1094 /* If we are modifying the current TB, we must stop
1095 its execution. We could be more precise by checking
1096 that the modification is after the current PC, but it
1097 would require a specialized function to partially
1098 restore the CPU state */
1100 current_tb_modified = 1;
1101 cpu_restore_state(current_tb, env, env->mem_io_pc);
1102 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1103 &current_flags);
1105 #endif /* TARGET_HAS_PRECISE_SMC */
1106 /* we need to do that to handle the case where a signal
1107 occurs while doing tb_phys_invalidate() */
1108 saved_tb = NULL;
1109 if (env) {
1110 saved_tb = env->current_tb;
1111 env->current_tb = NULL;
1113 tb_phys_invalidate(tb, -1);
1114 if (env) {
1115 env->current_tb = saved_tb;
1116 if (env->interrupt_request && env->current_tb)
1117 cpu_interrupt(env, env->interrupt_request);
1120 tb = tb_next;
1122 #if !defined(CONFIG_USER_ONLY)
1123 /* if no code remaining, no need to continue to use slow writes */
1124 if (!p->first_tb) {
1125 invalidate_page_bitmap(p);
1126 if (is_cpu_write_access) {
1127 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1130 #endif
1131 #ifdef TARGET_HAS_PRECISE_SMC
1132 if (current_tb_modified) {
1133 /* we generate a block containing just the instruction
1134 modifying the memory. It will ensure that it cannot modify
1135 itself */
1136 env->current_tb = NULL;
1137 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1138 cpu_resume_from_signal(env, NULL);
1140 #endif
1143 /* len must be <= 8 and start must be a multiple of len */
1144 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1146 PageDesc *p;
1147 int offset, b;
1148 #if 0
1149 if (1) {
1150 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1151 cpu_single_env->mem_io_vaddr, len,
1152 cpu_single_env->eip,
1153 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1155 #endif
1156 p = page_find(start >> TARGET_PAGE_BITS);
1157 if (!p)
1158 return;
1159 if (p->code_bitmap) {
1160 offset = start & ~TARGET_PAGE_MASK;
1161 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1162 if (b & ((1 << len) - 1))
1163 goto do_invalidate;
1164 } else {
1165 do_invalidate:
1166 tb_invalidate_phys_page_range(start, start + len, 1);
1170 #if !defined(CONFIG_SOFTMMU)
1171 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1172 unsigned long pc, void *puc)
1174 TranslationBlock *tb;
1175 PageDesc *p;
1176 int n;
1177 #ifdef TARGET_HAS_PRECISE_SMC
1178 TranslationBlock *current_tb = NULL;
1179 CPUState *env = cpu_single_env;
1180 int current_tb_modified = 0;
1181 target_ulong current_pc = 0;
1182 target_ulong current_cs_base = 0;
1183 int current_flags = 0;
1184 #endif
1186 addr &= TARGET_PAGE_MASK;
1187 p = page_find(addr >> TARGET_PAGE_BITS);
1188 if (!p)
1189 return;
1190 tb = p->first_tb;
1191 #ifdef TARGET_HAS_PRECISE_SMC
1192 if (tb && pc != 0) {
1193 current_tb = tb_find_pc(pc);
1195 #endif
1196 while (tb != NULL) {
1197 n = (long)tb & 3;
1198 tb = (TranslationBlock *)((long)tb & ~3);
1199 #ifdef TARGET_HAS_PRECISE_SMC
1200 if (current_tb == tb &&
1201 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1202 /* If we are modifying the current TB, we must stop
1203 its execution. We could be more precise by checking
1204 that the modification is after the current PC, but it
1205 would require a specialized function to partially
1206 restore the CPU state */
1208 current_tb_modified = 1;
1209 cpu_restore_state(current_tb, env, pc);
1210 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1211 &current_flags);
1213 #endif /* TARGET_HAS_PRECISE_SMC */
1214 tb_phys_invalidate(tb, addr);
1215 tb = tb->page_next[n];
1217 p->first_tb = NULL;
1218 #ifdef TARGET_HAS_PRECISE_SMC
1219 if (current_tb_modified) {
1220 /* we generate a block containing just the instruction
1221 modifying the memory. It will ensure that it cannot modify
1222 itself */
1223 env->current_tb = NULL;
1224 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1225 cpu_resume_from_signal(env, puc);
1227 #endif
1229 #endif
1231 /* add the tb in the target page and protect it if necessary */
1232 static inline void tb_alloc_page(TranslationBlock *tb,
1233 unsigned int n, tb_page_addr_t page_addr)
1235 PageDesc *p;
1236 #ifndef CONFIG_USER_ONLY
1237 bool page_already_protected;
1238 #endif
1240 tb->page_addr[n] = page_addr;
1241 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1242 tb->page_next[n] = p->first_tb;
1243 #ifndef CONFIG_USER_ONLY
1244 page_already_protected = p->first_tb != NULL;
1245 #endif
1246 p->first_tb = (TranslationBlock *)((long)tb | n);
1247 invalidate_page_bitmap(p);
1249 #if defined(TARGET_HAS_SMC) || 1
1251 #if defined(CONFIG_USER_ONLY)
1252 if (p->flags & PAGE_WRITE) {
1253 target_ulong addr;
1254 PageDesc *p2;
1255 int prot;
1257 /* force the host page as non writable (writes will have a
1258 page fault + mprotect overhead) */
1259 page_addr &= qemu_host_page_mask;
1260 prot = 0;
1261 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1262 addr += TARGET_PAGE_SIZE) {
1264 p2 = page_find (addr >> TARGET_PAGE_BITS);
1265 if (!p2)
1266 continue;
1267 prot |= p2->flags;
1268 p2->flags &= ~PAGE_WRITE;
1270 mprotect(g2h(page_addr), qemu_host_page_size,
1271 (prot & PAGE_BITS) & ~PAGE_WRITE);
1272 #ifdef DEBUG_TB_INVALIDATE
1273 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1274 page_addr);
1275 #endif
1277 #else
1278 /* if some code is already present, then the pages are already
1279 protected. So we handle the case where only the first TB is
1280 allocated in a physical page */
1281 if (!page_already_protected) {
1282 tlb_protect_code(page_addr);
1284 #endif
1286 #endif /* TARGET_HAS_SMC */
1289 /* add a new TB and link it to the physical page tables. phys_page2 is
1290 (-1) to indicate that only one page contains the TB. */
1291 void tb_link_page(TranslationBlock *tb,
1292 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1294 unsigned int h;
1295 TranslationBlock **ptb;
1297 /* Grab the mmap lock to stop another thread invalidating this TB
1298 before we are done. */
1299 mmap_lock();
1300 /* add in the physical hash table */
1301 h = tb_phys_hash_func(phys_pc);
1302 ptb = &tb_phys_hash[h];
1303 tb->phys_hash_next = *ptb;
1304 *ptb = tb;
1306 /* add in the page list */
1307 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1308 if (phys_page2 != -1)
1309 tb_alloc_page(tb, 1, phys_page2);
1310 else
1311 tb->page_addr[1] = -1;
1313 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1314 tb->jmp_next[0] = NULL;
1315 tb->jmp_next[1] = NULL;
1317 /* init original jump addresses */
1318 if (tb->tb_next_offset[0] != 0xffff)
1319 tb_reset_jump(tb, 0);
1320 if (tb->tb_next_offset[1] != 0xffff)
1321 tb_reset_jump(tb, 1);
1323 #ifdef DEBUG_TB_CHECK
1324 tb_page_check();
1325 #endif
1326 mmap_unlock();
1329 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1330 tb[1].tc_ptr. Return NULL if not found */
1331 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1333 int m_min, m_max, m;
1334 unsigned long v;
1335 TranslationBlock *tb;
1337 if (nb_tbs <= 0)
1338 return NULL;
1339 if (tc_ptr < (unsigned long)code_gen_buffer ||
1340 tc_ptr >= (unsigned long)code_gen_ptr)
1341 return NULL;
1342 /* binary search (cf Knuth) */
1343 m_min = 0;
1344 m_max = nb_tbs - 1;
1345 while (m_min <= m_max) {
1346 m = (m_min + m_max) >> 1;
1347 tb = &tbs[m];
1348 v = (unsigned long)tb->tc_ptr;
1349 if (v == tc_ptr)
1350 return tb;
1351 else if (tc_ptr < v) {
1352 m_max = m - 1;
1353 } else {
1354 m_min = m + 1;
1357 return &tbs[m_max];
1360 static void tb_reset_jump_recursive(TranslationBlock *tb);
1362 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1364 TranslationBlock *tb1, *tb_next, **ptb;
1365 unsigned int n1;
1367 tb1 = tb->jmp_next[n];
1368 if (tb1 != NULL) {
1369 /* find head of list */
1370 for(;;) {
1371 n1 = (long)tb1 & 3;
1372 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1373 if (n1 == 2)
1374 break;
1375 tb1 = tb1->jmp_next[n1];
1377 /* we are now sure now that tb jumps to tb1 */
1378 tb_next = tb1;
1380 /* remove tb from the jmp_first list */
1381 ptb = &tb_next->jmp_first;
1382 for(;;) {
1383 tb1 = *ptb;
1384 n1 = (long)tb1 & 3;
1385 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1386 if (n1 == n && tb1 == tb)
1387 break;
1388 ptb = &tb1->jmp_next[n1];
1390 *ptb = tb->jmp_next[n];
1391 tb->jmp_next[n] = NULL;
1393 /* suppress the jump to next tb in generated code */
1394 tb_reset_jump(tb, n);
1396 /* suppress jumps in the tb on which we could have jumped */
1397 tb_reset_jump_recursive(tb_next);
1401 static void tb_reset_jump_recursive(TranslationBlock *tb)
1403 tb_reset_jump_recursive2(tb, 0);
1404 tb_reset_jump_recursive2(tb, 1);
1407 #if defined(TARGET_HAS_ICE)
1408 #if defined(CONFIG_USER_ONLY)
1409 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1411 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1413 #else
1414 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1416 target_phys_addr_t addr;
1417 target_ulong pd;
1418 ram_addr_t ram_addr;
1419 PhysPageDesc *p;
1421 addr = cpu_get_phys_page_debug(env, pc);
1422 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1423 if (!p) {
1424 pd = IO_MEM_UNASSIGNED;
1425 } else {
1426 pd = p->phys_offset;
1428 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1429 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1431 #endif
1432 #endif /* TARGET_HAS_ICE */
1434 #if defined(CONFIG_USER_ONLY)
1435 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1440 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1441 int flags, CPUWatchpoint **watchpoint)
1443 return -ENOSYS;
1445 #else
1446 /* Add a watchpoint. */
1447 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1448 int flags, CPUWatchpoint **watchpoint)
1450 target_ulong len_mask = ~(len - 1);
1451 CPUWatchpoint *wp;
1453 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1454 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1455 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1456 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1457 return -EINVAL;
1459 wp = qemu_malloc(sizeof(*wp));
1461 wp->vaddr = addr;
1462 wp->len_mask = len_mask;
1463 wp->flags = flags;
1465 /* keep all GDB-injected watchpoints in front */
1466 if (flags & BP_GDB)
1467 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1468 else
1469 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1471 tlb_flush_page(env, addr);
1473 if (watchpoint)
1474 *watchpoint = wp;
1475 return 0;
1478 /* Remove a specific watchpoint. */
1479 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1480 int flags)
1482 target_ulong len_mask = ~(len - 1);
1483 CPUWatchpoint *wp;
1485 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1486 if (addr == wp->vaddr && len_mask == wp->len_mask
1487 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1488 cpu_watchpoint_remove_by_ref(env, wp);
1489 return 0;
1492 return -ENOENT;
1495 /* Remove a specific watchpoint by reference. */
1496 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1498 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1500 tlb_flush_page(env, watchpoint->vaddr);
1502 qemu_free(watchpoint);
1505 /* Remove all matching watchpoints. */
1506 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1508 CPUWatchpoint *wp, *next;
1510 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1511 if (wp->flags & mask)
1512 cpu_watchpoint_remove_by_ref(env, wp);
1515 #endif
1517 /* Add a breakpoint. */
1518 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1519 CPUBreakpoint **breakpoint)
1521 #if defined(TARGET_HAS_ICE)
1522 CPUBreakpoint *bp;
1524 bp = qemu_malloc(sizeof(*bp));
1526 bp->pc = pc;
1527 bp->flags = flags;
1529 /* keep all GDB-injected breakpoints in front */
1530 if (flags & BP_GDB)
1531 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1532 else
1533 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1535 breakpoint_invalidate(env, pc);
1537 if (breakpoint)
1538 *breakpoint = bp;
1539 return 0;
1540 #else
1541 return -ENOSYS;
1542 #endif
1545 /* Remove a specific breakpoint. */
1546 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1548 #if defined(TARGET_HAS_ICE)
1549 CPUBreakpoint *bp;
1551 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1552 if (bp->pc == pc && bp->flags == flags) {
1553 cpu_breakpoint_remove_by_ref(env, bp);
1554 return 0;
1557 return -ENOENT;
1558 #else
1559 return -ENOSYS;
1560 #endif
1563 /* Remove a specific breakpoint by reference. */
1564 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1566 #if defined(TARGET_HAS_ICE)
1567 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1569 breakpoint_invalidate(env, breakpoint->pc);
1571 qemu_free(breakpoint);
1572 #endif
1575 /* Remove all matching breakpoints. */
1576 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1578 #if defined(TARGET_HAS_ICE)
1579 CPUBreakpoint *bp, *next;
1581 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1582 if (bp->flags & mask)
1583 cpu_breakpoint_remove_by_ref(env, bp);
1585 #endif
1588 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1589 CPU loop after each instruction */
1590 void cpu_single_step(CPUState *env, int enabled)
1592 #if defined(TARGET_HAS_ICE)
1593 if (env->singlestep_enabled != enabled) {
1594 env->singlestep_enabled = enabled;
1595 if (kvm_enabled())
1596 kvm_update_guest_debug(env, 0);
1597 else {
1598 /* must flush all the translated code to avoid inconsistencies */
1599 /* XXX: only flush what is necessary */
1600 tb_flush(env);
1603 #endif
1606 /* enable or disable low levels log */
1607 void cpu_set_log(int log_flags)
1609 loglevel = log_flags;
1610 if (loglevel && !logfile) {
1611 logfile = fopen(logfilename, log_append ? "a" : "w");
1612 if (!logfile) {
1613 perror(logfilename);
1614 _exit(1);
1616 #if !defined(CONFIG_SOFTMMU)
1617 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1619 static char logfile_buf[4096];
1620 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1622 #elif !defined(_WIN32)
1623 /* Win32 doesn't support line-buffering and requires size >= 2 */
1624 setvbuf(logfile, NULL, _IOLBF, 0);
1625 #endif
1626 log_append = 1;
1628 if (!loglevel && logfile) {
1629 fclose(logfile);
1630 logfile = NULL;
1634 void cpu_set_log_filename(const char *filename)
1636 logfilename = strdup(filename);
1637 if (logfile) {
1638 fclose(logfile);
1639 logfile = NULL;
1641 cpu_set_log(loglevel);
1644 static void cpu_unlink_tb(CPUState *env)
1646 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1647 problem and hope the cpu will stop of its own accord. For userspace
1648 emulation this often isn't actually as bad as it sounds. Often
1649 signals are used primarily to interrupt blocking syscalls. */
1650 TranslationBlock *tb;
1651 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1653 spin_lock(&interrupt_lock);
1654 tb = env->current_tb;
1655 /* if the cpu is currently executing code, we must unlink it and
1656 all the potentially executing TB */
1657 if (tb) {
1658 env->current_tb = NULL;
1659 tb_reset_jump_recursive(tb);
1661 spin_unlock(&interrupt_lock);
1664 #ifndef CONFIG_USER_ONLY
1665 /* mask must never be zero, except for A20 change call */
1666 static void tcg_handle_interrupt(CPUState *env, int mask)
1668 int old_mask;
1670 old_mask = env->interrupt_request;
1671 env->interrupt_request |= mask;
1674 * If called from iothread context, wake the target cpu in
1675 * case its halted.
1677 if (!qemu_cpu_is_self(env)) {
1678 qemu_cpu_kick(env);
1679 return;
1682 if (use_icount) {
1683 env->icount_decr.u16.high = 0xffff;
1684 if (!can_do_io(env)
1685 && (mask & ~old_mask) != 0) {
1686 cpu_abort(env, "Raised interrupt while not in I/O function");
1688 } else {
1689 cpu_unlink_tb(env);
1693 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1695 #else /* CONFIG_USER_ONLY */
1697 void cpu_interrupt(CPUState *env, int mask)
1699 env->interrupt_request |= mask;
1700 cpu_unlink_tb(env);
1702 #endif /* CONFIG_USER_ONLY */
1704 void cpu_reset_interrupt(CPUState *env, int mask)
1706 env->interrupt_request &= ~mask;
1709 void cpu_exit(CPUState *env)
1711 env->exit_request = 1;
1712 cpu_unlink_tb(env);
1715 const CPULogItem cpu_log_items[] = {
1716 { CPU_LOG_TB_OUT_ASM, "out_asm",
1717 "show generated host assembly code for each compiled TB" },
1718 { CPU_LOG_TB_IN_ASM, "in_asm",
1719 "show target assembly code for each compiled TB" },
1720 { CPU_LOG_TB_OP, "op",
1721 "show micro ops for each compiled TB" },
1722 { CPU_LOG_TB_OP_OPT, "op_opt",
1723 "show micro ops "
1724 #ifdef TARGET_I386
1725 "before eflags optimization and "
1726 #endif
1727 "after liveness analysis" },
1728 { CPU_LOG_INT, "int",
1729 "show interrupts/exceptions in short format" },
1730 { CPU_LOG_EXEC, "exec",
1731 "show trace before each executed TB (lots of logs)" },
1732 { CPU_LOG_TB_CPU, "cpu",
1733 "show CPU state before block translation" },
1734 #ifdef TARGET_I386
1735 { CPU_LOG_PCALL, "pcall",
1736 "show protected mode far calls/returns/exceptions" },
1737 { CPU_LOG_RESET, "cpu_reset",
1738 "show CPU state before CPU resets" },
1739 #endif
1740 #ifdef DEBUG_IOPORT
1741 { CPU_LOG_IOPORT, "ioport",
1742 "show all i/o ports accesses" },
1743 #endif
1744 { 0, NULL, NULL },
1747 #ifndef CONFIG_USER_ONLY
1748 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1749 = QLIST_HEAD_INITIALIZER(memory_client_list);
1751 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1752 ram_addr_t size,
1753 ram_addr_t phys_offset,
1754 bool log_dirty)
1756 CPUPhysMemoryClient *client;
1757 QLIST_FOREACH(client, &memory_client_list, list) {
1758 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1762 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1763 target_phys_addr_t end)
1765 CPUPhysMemoryClient *client;
1766 QLIST_FOREACH(client, &memory_client_list, list) {
1767 int r = client->sync_dirty_bitmap(client, start, end);
1768 if (r < 0)
1769 return r;
1771 return 0;
1774 static int cpu_notify_migration_log(int enable)
1776 CPUPhysMemoryClient *client;
1777 QLIST_FOREACH(client, &memory_client_list, list) {
1778 int r = client->migration_log(client, enable);
1779 if (r < 0)
1780 return r;
1782 return 0;
1785 struct last_map {
1786 target_phys_addr_t start_addr;
1787 ram_addr_t size;
1788 ram_addr_t phys_offset;
1791 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1792 * address. Each intermediate table provides the next L2_BITs of guest
1793 * physical address space. The number of levels vary based on host and
1794 * guest configuration, making it efficient to build the final guest
1795 * physical address by seeding the L1 offset and shifting and adding in
1796 * each L2 offset as we recurse through them. */
1797 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1798 void **lp, target_phys_addr_t addr,
1799 struct last_map *map)
1801 int i;
1803 if (*lp == NULL) {
1804 return;
1806 if (level == 0) {
1807 PhysPageDesc *pd = *lp;
1808 addr <<= L2_BITS + TARGET_PAGE_BITS;
1809 for (i = 0; i < L2_SIZE; ++i) {
1810 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1811 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1813 if (map->size &&
1814 start_addr == map->start_addr + map->size &&
1815 pd[i].phys_offset == map->phys_offset + map->size) {
1817 map->size += TARGET_PAGE_SIZE;
1818 continue;
1819 } else if (map->size) {
1820 client->set_memory(client, map->start_addr,
1821 map->size, map->phys_offset, false);
1824 map->start_addr = start_addr;
1825 map->size = TARGET_PAGE_SIZE;
1826 map->phys_offset = pd[i].phys_offset;
1829 } else {
1830 void **pp = *lp;
1831 for (i = 0; i < L2_SIZE; ++i) {
1832 phys_page_for_each_1(client, level - 1, pp + i,
1833 (addr << L2_BITS) | i, map);
1838 static void phys_page_for_each(CPUPhysMemoryClient *client)
1840 int i;
1841 struct last_map map = { };
1843 for (i = 0; i < P_L1_SIZE; ++i) {
1844 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1845 l1_phys_map + i, i, &map);
1847 if (map.size) {
1848 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1849 false);
1853 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1855 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1856 phys_page_for_each(client);
1859 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1861 QLIST_REMOVE(client, list);
1863 #endif
1865 static int cmp1(const char *s1, int n, const char *s2)
1867 if (strlen(s2) != n)
1868 return 0;
1869 return memcmp(s1, s2, n) == 0;
1872 /* takes a comma separated list of log masks. Return 0 if error. */
1873 int cpu_str_to_log_mask(const char *str)
1875 const CPULogItem *item;
1876 int mask;
1877 const char *p, *p1;
1879 p = str;
1880 mask = 0;
1881 for(;;) {
1882 p1 = strchr(p, ',');
1883 if (!p1)
1884 p1 = p + strlen(p);
1885 if(cmp1(p,p1-p,"all")) {
1886 for(item = cpu_log_items; item->mask != 0; item++) {
1887 mask |= item->mask;
1889 } else {
1890 for(item = cpu_log_items; item->mask != 0; item++) {
1891 if (cmp1(p, p1 - p, item->name))
1892 goto found;
1894 return 0;
1896 found:
1897 mask |= item->mask;
1898 if (*p1 != ',')
1899 break;
1900 p = p1 + 1;
1902 return mask;
1905 void cpu_abort(CPUState *env, const char *fmt, ...)
1907 va_list ap;
1908 va_list ap2;
1910 va_start(ap, fmt);
1911 va_copy(ap2, ap);
1912 fprintf(stderr, "qemu: fatal: ");
1913 vfprintf(stderr, fmt, ap);
1914 fprintf(stderr, "\n");
1915 #ifdef TARGET_I386
1916 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1917 #else
1918 cpu_dump_state(env, stderr, fprintf, 0);
1919 #endif
1920 if (qemu_log_enabled()) {
1921 qemu_log("qemu: fatal: ");
1922 qemu_log_vprintf(fmt, ap2);
1923 qemu_log("\n");
1924 #ifdef TARGET_I386
1925 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1926 #else
1927 log_cpu_state(env, 0);
1928 #endif
1929 qemu_log_flush();
1930 qemu_log_close();
1932 va_end(ap2);
1933 va_end(ap);
1934 #if defined(CONFIG_USER_ONLY)
1936 struct sigaction act;
1937 sigfillset(&act.sa_mask);
1938 act.sa_handler = SIG_DFL;
1939 sigaction(SIGABRT, &act, NULL);
1941 #endif
1942 abort();
1945 CPUState *cpu_copy(CPUState *env)
1947 CPUState *new_env = cpu_init(env->cpu_model_str);
1948 CPUState *next_cpu = new_env->next_cpu;
1949 int cpu_index = new_env->cpu_index;
1950 #if defined(TARGET_HAS_ICE)
1951 CPUBreakpoint *bp;
1952 CPUWatchpoint *wp;
1953 #endif
1955 memcpy(new_env, env, sizeof(CPUState));
1957 /* Preserve chaining and index. */
1958 new_env->next_cpu = next_cpu;
1959 new_env->cpu_index = cpu_index;
1961 /* Clone all break/watchpoints.
1962 Note: Once we support ptrace with hw-debug register access, make sure
1963 BP_CPU break/watchpoints are handled correctly on clone. */
1964 QTAILQ_INIT(&env->breakpoints);
1965 QTAILQ_INIT(&env->watchpoints);
1966 #if defined(TARGET_HAS_ICE)
1967 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1968 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1970 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1971 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1972 wp->flags, NULL);
1974 #endif
1976 return new_env;
1979 #if !defined(CONFIG_USER_ONLY)
1981 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1983 unsigned int i;
1985 /* Discard jump cache entries for any tb which might potentially
1986 overlap the flushed page. */
1987 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1988 memset (&env->tb_jmp_cache[i], 0,
1989 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1991 i = tb_jmp_cache_hash_page(addr);
1992 memset (&env->tb_jmp_cache[i], 0,
1993 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1996 static CPUTLBEntry s_cputlb_empty_entry = {
1997 .addr_read = -1,
1998 .addr_write = -1,
1999 .addr_code = -1,
2000 .addend = -1,
2003 /* NOTE: if flush_global is true, also flush global entries (not
2004 implemented yet) */
2005 void tlb_flush(CPUState *env, int flush_global)
2007 int i;
2009 #if defined(DEBUG_TLB)
2010 printf("tlb_flush:\n");
2011 #endif
2012 /* must reset current TB so that interrupts cannot modify the
2013 links while we are modifying them */
2014 env->current_tb = NULL;
2016 for(i = 0; i < CPU_TLB_SIZE; i++) {
2017 int mmu_idx;
2018 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2019 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2023 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2025 env->tlb_flush_addr = -1;
2026 env->tlb_flush_mask = 0;
2027 tlb_flush_count++;
2030 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2032 if (addr == (tlb_entry->addr_read &
2033 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2034 addr == (tlb_entry->addr_write &
2035 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2036 addr == (tlb_entry->addr_code &
2037 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2038 *tlb_entry = s_cputlb_empty_entry;
2042 void tlb_flush_page(CPUState *env, target_ulong addr)
2044 int i;
2045 int mmu_idx;
2047 #if defined(DEBUG_TLB)
2048 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2049 #endif
2050 /* Check if we need to flush due to large pages. */
2051 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2052 #if defined(DEBUG_TLB)
2053 printf("tlb_flush_page: forced full flush ("
2054 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2055 env->tlb_flush_addr, env->tlb_flush_mask);
2056 #endif
2057 tlb_flush(env, 1);
2058 return;
2060 /* must reset current TB so that interrupts cannot modify the
2061 links while we are modifying them */
2062 env->current_tb = NULL;
2064 addr &= TARGET_PAGE_MASK;
2065 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2066 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2067 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2069 tlb_flush_jmp_cache(env, addr);
2072 /* update the TLBs so that writes to code in the virtual page 'addr'
2073 can be detected */
2074 static void tlb_protect_code(ram_addr_t ram_addr)
2076 cpu_physical_memory_reset_dirty(ram_addr,
2077 ram_addr + TARGET_PAGE_SIZE,
2078 CODE_DIRTY_FLAG);
2081 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2082 tested for self modifying code */
2083 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2084 target_ulong vaddr)
2086 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2089 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2090 unsigned long start, unsigned long length)
2092 unsigned long addr;
2093 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2094 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2095 if ((addr - start) < length) {
2096 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2101 /* Note: start and end must be within the same ram block. */
2102 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2103 int dirty_flags)
2105 CPUState *env;
2106 unsigned long length, start1;
2107 int i;
2109 start &= TARGET_PAGE_MASK;
2110 end = TARGET_PAGE_ALIGN(end);
2112 length = end - start;
2113 if (length == 0)
2114 return;
2115 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2117 /* we modify the TLB cache so that the dirty bit will be set again
2118 when accessing the range */
2119 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2120 /* Check that we don't span multiple blocks - this breaks the
2121 address comparisons below. */
2122 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2123 != (end - 1) - start) {
2124 abort();
2127 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2128 int mmu_idx;
2129 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2130 for(i = 0; i < CPU_TLB_SIZE; i++)
2131 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2132 start1, length);
2137 int cpu_physical_memory_set_dirty_tracking(int enable)
2139 int ret = 0;
2140 in_migration = enable;
2141 ret = cpu_notify_migration_log(!!enable);
2142 return ret;
2145 int cpu_physical_memory_get_dirty_tracking(void)
2147 return in_migration;
2150 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2151 target_phys_addr_t end_addr)
2153 int ret;
2155 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2156 return ret;
2159 int cpu_physical_log_start(target_phys_addr_t start_addr,
2160 ram_addr_t size)
2162 CPUPhysMemoryClient *client;
2163 QLIST_FOREACH(client, &memory_client_list, list) {
2164 if (client->log_start) {
2165 int r = client->log_start(client, start_addr, size);
2166 if (r < 0) {
2167 return r;
2171 return 0;
2174 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2175 ram_addr_t size)
2177 CPUPhysMemoryClient *client;
2178 QLIST_FOREACH(client, &memory_client_list, list) {
2179 if (client->log_stop) {
2180 int r = client->log_stop(client, start_addr, size);
2181 if (r < 0) {
2182 return r;
2186 return 0;
2189 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2191 ram_addr_t ram_addr;
2192 void *p;
2194 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2195 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2196 + tlb_entry->addend);
2197 ram_addr = qemu_ram_addr_from_host_nofail(p);
2198 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2199 tlb_entry->addr_write |= TLB_NOTDIRTY;
2204 /* update the TLB according to the current state of the dirty bits */
2205 void cpu_tlb_update_dirty(CPUState *env)
2207 int i;
2208 int mmu_idx;
2209 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2210 for(i = 0; i < CPU_TLB_SIZE; i++)
2211 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2215 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2217 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2218 tlb_entry->addr_write = vaddr;
2221 /* update the TLB corresponding to virtual page vaddr
2222 so that it is no longer dirty */
2223 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2225 int i;
2226 int mmu_idx;
2228 vaddr &= TARGET_PAGE_MASK;
2229 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2230 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2231 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2234 /* Our TLB does not support large pages, so remember the area covered by
2235 large pages and trigger a full TLB flush if these are invalidated. */
2236 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2237 target_ulong size)
2239 target_ulong mask = ~(size - 1);
2241 if (env->tlb_flush_addr == (target_ulong)-1) {
2242 env->tlb_flush_addr = vaddr & mask;
2243 env->tlb_flush_mask = mask;
2244 return;
2246 /* Extend the existing region to include the new page.
2247 This is a compromise between unnecessary flushes and the cost
2248 of maintaining a full variable size TLB. */
2249 mask &= env->tlb_flush_mask;
2250 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2251 mask <<= 1;
2253 env->tlb_flush_addr &= mask;
2254 env->tlb_flush_mask = mask;
2257 /* Add a new TLB entry. At most one entry for a given virtual address
2258 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2259 supplied size is only used by tlb_flush_page. */
2260 void tlb_set_page(CPUState *env, target_ulong vaddr,
2261 target_phys_addr_t paddr, int prot,
2262 int mmu_idx, target_ulong size)
2264 PhysPageDesc *p;
2265 unsigned long pd;
2266 unsigned int index;
2267 target_ulong address;
2268 target_ulong code_address;
2269 unsigned long addend;
2270 CPUTLBEntry *te;
2271 CPUWatchpoint *wp;
2272 target_phys_addr_t iotlb;
2274 assert(size >= TARGET_PAGE_SIZE);
2275 if (size != TARGET_PAGE_SIZE) {
2276 tlb_add_large_page(env, vaddr, size);
2278 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2279 if (!p) {
2280 pd = IO_MEM_UNASSIGNED;
2281 } else {
2282 pd = p->phys_offset;
2284 #if defined(DEBUG_TLB)
2285 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2286 " prot=%x idx=%d pd=0x%08lx\n",
2287 vaddr, paddr, prot, mmu_idx, pd);
2288 #endif
2290 address = vaddr;
2291 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2292 /* IO memory case (romd handled later) */
2293 address |= TLB_MMIO;
2295 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2296 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2297 /* Normal RAM. */
2298 iotlb = pd & TARGET_PAGE_MASK;
2299 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2300 iotlb |= IO_MEM_NOTDIRTY;
2301 else
2302 iotlb |= IO_MEM_ROM;
2303 } else {
2304 /* IO handlers are currently passed a physical address.
2305 It would be nice to pass an offset from the base address
2306 of that region. This would avoid having to special case RAM,
2307 and avoid full address decoding in every device.
2308 We can't use the high bits of pd for this because
2309 IO_MEM_ROMD uses these as a ram address. */
2310 iotlb = (pd & ~TARGET_PAGE_MASK);
2311 if (p) {
2312 iotlb += p->region_offset;
2313 } else {
2314 iotlb += paddr;
2318 code_address = address;
2319 /* Make accesses to pages with watchpoints go via the
2320 watchpoint trap routines. */
2321 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2322 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2323 /* Avoid trapping reads of pages with a write breakpoint. */
2324 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2325 iotlb = io_mem_watch + paddr;
2326 address |= TLB_MMIO;
2327 break;
2332 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2333 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2334 te = &env->tlb_table[mmu_idx][index];
2335 te->addend = addend - vaddr;
2336 if (prot & PAGE_READ) {
2337 te->addr_read = address;
2338 } else {
2339 te->addr_read = -1;
2342 if (prot & PAGE_EXEC) {
2343 te->addr_code = code_address;
2344 } else {
2345 te->addr_code = -1;
2347 if (prot & PAGE_WRITE) {
2348 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2349 (pd & IO_MEM_ROMD)) {
2350 /* Write access calls the I/O callback. */
2351 te->addr_write = address | TLB_MMIO;
2352 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2353 !cpu_physical_memory_is_dirty(pd)) {
2354 te->addr_write = address | TLB_NOTDIRTY;
2355 } else {
2356 te->addr_write = address;
2358 } else {
2359 te->addr_write = -1;
2363 #else
2365 void tlb_flush(CPUState *env, int flush_global)
2369 void tlb_flush_page(CPUState *env, target_ulong addr)
2374 * Walks guest process memory "regions" one by one
2375 * and calls callback function 'fn' for each region.
2378 struct walk_memory_regions_data
2380 walk_memory_regions_fn fn;
2381 void *priv;
2382 unsigned long start;
2383 int prot;
2386 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2387 abi_ulong end, int new_prot)
2389 if (data->start != -1ul) {
2390 int rc = data->fn(data->priv, data->start, end, data->prot);
2391 if (rc != 0) {
2392 return rc;
2396 data->start = (new_prot ? end : -1ul);
2397 data->prot = new_prot;
2399 return 0;
2402 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2403 abi_ulong base, int level, void **lp)
2405 abi_ulong pa;
2406 int i, rc;
2408 if (*lp == NULL) {
2409 return walk_memory_regions_end(data, base, 0);
2412 if (level == 0) {
2413 PageDesc *pd = *lp;
2414 for (i = 0; i < L2_SIZE; ++i) {
2415 int prot = pd[i].flags;
2417 pa = base | (i << TARGET_PAGE_BITS);
2418 if (prot != data->prot) {
2419 rc = walk_memory_regions_end(data, pa, prot);
2420 if (rc != 0) {
2421 return rc;
2425 } else {
2426 void **pp = *lp;
2427 for (i = 0; i < L2_SIZE; ++i) {
2428 pa = base | ((abi_ulong)i <<
2429 (TARGET_PAGE_BITS + L2_BITS * level));
2430 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2431 if (rc != 0) {
2432 return rc;
2437 return 0;
2440 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2442 struct walk_memory_regions_data data;
2443 unsigned long i;
2445 data.fn = fn;
2446 data.priv = priv;
2447 data.start = -1ul;
2448 data.prot = 0;
2450 for (i = 0; i < V_L1_SIZE; i++) {
2451 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2452 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2453 if (rc != 0) {
2454 return rc;
2458 return walk_memory_regions_end(&data, 0, 0);
2461 static int dump_region(void *priv, abi_ulong start,
2462 abi_ulong end, unsigned long prot)
2464 FILE *f = (FILE *)priv;
2466 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2467 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2468 start, end, end - start,
2469 ((prot & PAGE_READ) ? 'r' : '-'),
2470 ((prot & PAGE_WRITE) ? 'w' : '-'),
2471 ((prot & PAGE_EXEC) ? 'x' : '-'));
2473 return (0);
2476 /* dump memory mappings */
2477 void page_dump(FILE *f)
2479 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2480 "start", "end", "size", "prot");
2481 walk_memory_regions(f, dump_region);
2484 int page_get_flags(target_ulong address)
2486 PageDesc *p;
2488 p = page_find(address >> TARGET_PAGE_BITS);
2489 if (!p)
2490 return 0;
2491 return p->flags;
2494 /* Modify the flags of a page and invalidate the code if necessary.
2495 The flag PAGE_WRITE_ORG is positioned automatically depending
2496 on PAGE_WRITE. The mmap_lock should already be held. */
2497 void page_set_flags(target_ulong start, target_ulong end, int flags)
2499 target_ulong addr, len;
2501 /* This function should never be called with addresses outside the
2502 guest address space. If this assert fires, it probably indicates
2503 a missing call to h2g_valid. */
2504 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2505 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2506 #endif
2507 assert(start < end);
2509 start = start & TARGET_PAGE_MASK;
2510 end = TARGET_PAGE_ALIGN(end);
2512 if (flags & PAGE_WRITE) {
2513 flags |= PAGE_WRITE_ORG;
2516 for (addr = start, len = end - start;
2517 len != 0;
2518 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2519 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2521 /* If the write protection bit is set, then we invalidate
2522 the code inside. */
2523 if (!(p->flags & PAGE_WRITE) &&
2524 (flags & PAGE_WRITE) &&
2525 p->first_tb) {
2526 tb_invalidate_phys_page(addr, 0, NULL);
2528 p->flags = flags;
2532 int page_check_range(target_ulong start, target_ulong len, int flags)
2534 PageDesc *p;
2535 target_ulong end;
2536 target_ulong addr;
2538 /* This function should never be called with addresses outside the
2539 guest address space. If this assert fires, it probably indicates
2540 a missing call to h2g_valid. */
2541 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2542 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2543 #endif
2545 if (len == 0) {
2546 return 0;
2548 if (start + len - 1 < start) {
2549 /* We've wrapped around. */
2550 return -1;
2553 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2554 start = start & TARGET_PAGE_MASK;
2556 for (addr = start, len = end - start;
2557 len != 0;
2558 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2559 p = page_find(addr >> TARGET_PAGE_BITS);
2560 if( !p )
2561 return -1;
2562 if( !(p->flags & PAGE_VALID) )
2563 return -1;
2565 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2566 return -1;
2567 if (flags & PAGE_WRITE) {
2568 if (!(p->flags & PAGE_WRITE_ORG))
2569 return -1;
2570 /* unprotect the page if it was put read-only because it
2571 contains translated code */
2572 if (!(p->flags & PAGE_WRITE)) {
2573 if (!page_unprotect(addr, 0, NULL))
2574 return -1;
2576 return 0;
2579 return 0;
2582 /* called from signal handler: invalidate the code and unprotect the
2583 page. Return TRUE if the fault was successfully handled. */
2584 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2586 unsigned int prot;
2587 PageDesc *p;
2588 target_ulong host_start, host_end, addr;
2590 /* Technically this isn't safe inside a signal handler. However we
2591 know this only ever happens in a synchronous SEGV handler, so in
2592 practice it seems to be ok. */
2593 mmap_lock();
2595 p = page_find(address >> TARGET_PAGE_BITS);
2596 if (!p) {
2597 mmap_unlock();
2598 return 0;
2601 /* if the page was really writable, then we change its
2602 protection back to writable */
2603 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2604 host_start = address & qemu_host_page_mask;
2605 host_end = host_start + qemu_host_page_size;
2607 prot = 0;
2608 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2609 p = page_find(addr >> TARGET_PAGE_BITS);
2610 p->flags |= PAGE_WRITE;
2611 prot |= p->flags;
2613 /* and since the content will be modified, we must invalidate
2614 the corresponding translated code. */
2615 tb_invalidate_phys_page(addr, pc, puc);
2616 #ifdef DEBUG_TB_CHECK
2617 tb_invalidate_check(addr);
2618 #endif
2620 mprotect((void *)g2h(host_start), qemu_host_page_size,
2621 prot & PAGE_BITS);
2623 mmap_unlock();
2624 return 1;
2626 mmap_unlock();
2627 return 0;
2630 static inline void tlb_set_dirty(CPUState *env,
2631 unsigned long addr, target_ulong vaddr)
2634 #endif /* defined(CONFIG_USER_ONLY) */
2636 #if !defined(CONFIG_USER_ONLY)
2638 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2639 typedef struct subpage_t {
2640 target_phys_addr_t base;
2641 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2642 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2643 } subpage_t;
2645 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2646 ram_addr_t memory, ram_addr_t region_offset);
2647 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2648 ram_addr_t orig_memory,
2649 ram_addr_t region_offset);
2650 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2651 need_subpage) \
2652 do { \
2653 if (addr > start_addr) \
2654 start_addr2 = 0; \
2655 else { \
2656 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2657 if (start_addr2 > 0) \
2658 need_subpage = 1; \
2661 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2662 end_addr2 = TARGET_PAGE_SIZE - 1; \
2663 else { \
2664 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2665 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2666 need_subpage = 1; \
2668 } while (0)
2670 /* register physical memory.
2671 For RAM, 'size' must be a multiple of the target page size.
2672 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2673 io memory page. The address used when calling the IO function is
2674 the offset from the start of the region, plus region_offset. Both
2675 start_addr and region_offset are rounded down to a page boundary
2676 before calculating this offset. This should not be a problem unless
2677 the low bits of start_addr and region_offset differ. */
2678 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2679 ram_addr_t size,
2680 ram_addr_t phys_offset,
2681 ram_addr_t region_offset,
2682 bool log_dirty)
2684 target_phys_addr_t addr, end_addr;
2685 PhysPageDesc *p;
2686 CPUState *env;
2687 ram_addr_t orig_size = size;
2688 subpage_t *subpage;
2690 assert(size);
2691 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2693 if (phys_offset == IO_MEM_UNASSIGNED) {
2694 region_offset = start_addr;
2696 region_offset &= TARGET_PAGE_MASK;
2697 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2698 end_addr = start_addr + (target_phys_addr_t)size;
2700 addr = start_addr;
2701 do {
2702 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2703 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2704 ram_addr_t orig_memory = p->phys_offset;
2705 target_phys_addr_t start_addr2, end_addr2;
2706 int need_subpage = 0;
2708 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2709 need_subpage);
2710 if (need_subpage) {
2711 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2712 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2713 &p->phys_offset, orig_memory,
2714 p->region_offset);
2715 } else {
2716 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2717 >> IO_MEM_SHIFT];
2719 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2720 region_offset);
2721 p->region_offset = 0;
2722 } else {
2723 p->phys_offset = phys_offset;
2724 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2725 (phys_offset & IO_MEM_ROMD))
2726 phys_offset += TARGET_PAGE_SIZE;
2728 } else {
2729 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2730 p->phys_offset = phys_offset;
2731 p->region_offset = region_offset;
2732 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2733 (phys_offset & IO_MEM_ROMD)) {
2734 phys_offset += TARGET_PAGE_SIZE;
2735 } else {
2736 target_phys_addr_t start_addr2, end_addr2;
2737 int need_subpage = 0;
2739 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2740 end_addr2, need_subpage);
2742 if (need_subpage) {
2743 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2744 &p->phys_offset, IO_MEM_UNASSIGNED,
2745 addr & TARGET_PAGE_MASK);
2746 subpage_register(subpage, start_addr2, end_addr2,
2747 phys_offset, region_offset);
2748 p->region_offset = 0;
2752 region_offset += TARGET_PAGE_SIZE;
2753 addr += TARGET_PAGE_SIZE;
2754 } while (addr != end_addr);
2756 /* since each CPU stores ram addresses in its TLB cache, we must
2757 reset the modified entries */
2758 /* XXX: slow ! */
2759 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2760 tlb_flush(env, 1);
2764 /* XXX: temporary until new memory mapping API */
2765 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2767 PhysPageDesc *p;
2769 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2770 if (!p)
2771 return IO_MEM_UNASSIGNED;
2772 return p->phys_offset;
2775 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2777 if (kvm_enabled())
2778 kvm_coalesce_mmio_region(addr, size);
2781 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2783 if (kvm_enabled())
2784 kvm_uncoalesce_mmio_region(addr, size);
2787 void qemu_flush_coalesced_mmio_buffer(void)
2789 if (kvm_enabled())
2790 kvm_flush_coalesced_mmio_buffer();
2793 #if defined(__linux__) && !defined(TARGET_S390X)
2795 #include <sys/vfs.h>
2797 #define HUGETLBFS_MAGIC 0x958458f6
2799 static long gethugepagesize(const char *path)
2801 struct statfs fs;
2802 int ret;
2804 do {
2805 ret = statfs(path, &fs);
2806 } while (ret != 0 && errno == EINTR);
2808 if (ret != 0) {
2809 perror(path);
2810 return 0;
2813 if (fs.f_type != HUGETLBFS_MAGIC)
2814 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2816 return fs.f_bsize;
2819 static void *file_ram_alloc(RAMBlock *block,
2820 ram_addr_t memory,
2821 const char *path)
2823 char *filename;
2824 void *area;
2825 int fd;
2826 #ifdef MAP_POPULATE
2827 int flags;
2828 #endif
2829 unsigned long hpagesize;
2831 hpagesize = gethugepagesize(path);
2832 if (!hpagesize) {
2833 return NULL;
2836 if (memory < hpagesize) {
2837 return NULL;
2840 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2841 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2842 return NULL;
2845 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2846 return NULL;
2849 fd = mkstemp(filename);
2850 if (fd < 0) {
2851 perror("unable to create backing store for hugepages");
2852 free(filename);
2853 return NULL;
2855 unlink(filename);
2856 free(filename);
2858 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2861 * ftruncate is not supported by hugetlbfs in older
2862 * hosts, so don't bother bailing out on errors.
2863 * If anything goes wrong with it under other filesystems,
2864 * mmap will fail.
2866 if (ftruncate(fd, memory))
2867 perror("ftruncate");
2869 #ifdef MAP_POPULATE
2870 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2871 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2872 * to sidestep this quirk.
2874 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2875 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2876 #else
2877 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2878 #endif
2879 if (area == MAP_FAILED) {
2880 perror("file_ram_alloc: can't mmap RAM pages");
2881 close(fd);
2882 return (NULL);
2884 block->fd = fd;
2885 return area;
2887 #endif
2889 static ram_addr_t find_ram_offset(ram_addr_t size)
2891 RAMBlock *block, *next_block;
2892 ram_addr_t offset = 0, mingap = RAM_ADDR_MAX;
2894 if (QLIST_EMPTY(&ram_list.blocks))
2895 return 0;
2897 QLIST_FOREACH(block, &ram_list.blocks, next) {
2898 ram_addr_t end, next = RAM_ADDR_MAX;
2900 end = block->offset + block->length;
2902 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2903 if (next_block->offset >= end) {
2904 next = MIN(next, next_block->offset);
2907 if (next - end >= size && next - end < mingap) {
2908 offset = end;
2909 mingap = next - end;
2912 return offset;
2915 static ram_addr_t last_ram_offset(void)
2917 RAMBlock *block;
2918 ram_addr_t last = 0;
2920 QLIST_FOREACH(block, &ram_list.blocks, next)
2921 last = MAX(last, block->offset + block->length);
2923 return last;
2926 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2927 ram_addr_t size, void *host)
2929 RAMBlock *new_block, *block;
2931 size = TARGET_PAGE_ALIGN(size);
2932 new_block = qemu_mallocz(sizeof(*new_block));
2934 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2935 char *id = dev->parent_bus->info->get_dev_path(dev);
2936 if (id) {
2937 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2938 qemu_free(id);
2941 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2943 QLIST_FOREACH(block, &ram_list.blocks, next) {
2944 if (!strcmp(block->idstr, new_block->idstr)) {
2945 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2946 new_block->idstr);
2947 abort();
2951 new_block->offset = find_ram_offset(size);
2952 if (host) {
2953 new_block->host = host;
2954 new_block->flags |= RAM_PREALLOC_MASK;
2955 } else {
2956 if (mem_path) {
2957 #if defined (__linux__) && !defined(TARGET_S390X)
2958 new_block->host = file_ram_alloc(new_block, size, mem_path);
2959 if (!new_block->host) {
2960 new_block->host = qemu_vmalloc(size);
2961 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2963 #else
2964 fprintf(stderr, "-mem-path option unsupported\n");
2965 exit(1);
2966 #endif
2967 } else {
2968 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2969 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2970 an system defined value, which is at least 256GB. Larger systems
2971 have larger values. We put the guest between the end of data
2972 segment (system break) and this value. We use 32GB as a base to
2973 have enough room for the system break to grow. */
2974 new_block->host = mmap((void*)0x800000000, size,
2975 PROT_EXEC|PROT_READ|PROT_WRITE,
2976 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2977 if (new_block->host == MAP_FAILED) {
2978 fprintf(stderr, "Allocating RAM failed\n");
2979 abort();
2981 #else
2982 if (xen_enabled()) {
2983 xen_ram_alloc(new_block->offset, size);
2984 } else {
2985 new_block->host = qemu_vmalloc(size);
2987 #endif
2988 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2991 new_block->length = size;
2993 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2995 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2996 last_ram_offset() >> TARGET_PAGE_BITS);
2997 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2998 0xff, size >> TARGET_PAGE_BITS);
3000 if (kvm_enabled())
3001 kvm_setup_guest_memory(new_block->host, size);
3003 return new_block->offset;
3006 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
3008 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
3011 void qemu_ram_free_from_ptr(ram_addr_t addr)
3013 RAMBlock *block;
3015 QLIST_FOREACH(block, &ram_list.blocks, next) {
3016 if (addr == block->offset) {
3017 QLIST_REMOVE(block, next);
3018 qemu_free(block);
3019 return;
3024 void qemu_ram_free(ram_addr_t addr)
3026 RAMBlock *block;
3028 QLIST_FOREACH(block, &ram_list.blocks, next) {
3029 if (addr == block->offset) {
3030 QLIST_REMOVE(block, next);
3031 if (block->flags & RAM_PREALLOC_MASK) {
3033 } else if (mem_path) {
3034 #if defined (__linux__) && !defined(TARGET_S390X)
3035 if (block->fd) {
3036 munmap(block->host, block->length);
3037 close(block->fd);
3038 } else {
3039 qemu_vfree(block->host);
3041 #else
3042 abort();
3043 #endif
3044 } else {
3045 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3046 munmap(block->host, block->length);
3047 #else
3048 if (xen_enabled()) {
3049 xen_invalidate_map_cache_entry(block->host);
3050 } else {
3051 qemu_vfree(block->host);
3053 #endif
3055 qemu_free(block);
3056 return;
3062 #ifndef _WIN32
3063 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3065 RAMBlock *block;
3066 ram_addr_t offset;
3067 int flags;
3068 void *area, *vaddr;
3070 QLIST_FOREACH(block, &ram_list.blocks, next) {
3071 offset = addr - block->offset;
3072 if (offset < block->length) {
3073 vaddr = block->host + offset;
3074 if (block->flags & RAM_PREALLOC_MASK) {
3076 } else {
3077 flags = MAP_FIXED;
3078 munmap(vaddr, length);
3079 if (mem_path) {
3080 #if defined(__linux__) && !defined(TARGET_S390X)
3081 if (block->fd) {
3082 #ifdef MAP_POPULATE
3083 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3084 MAP_PRIVATE;
3085 #else
3086 flags |= MAP_PRIVATE;
3087 #endif
3088 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3089 flags, block->fd, offset);
3090 } else {
3091 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3092 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3093 flags, -1, 0);
3095 #else
3096 abort();
3097 #endif
3098 } else {
3099 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3100 flags |= MAP_SHARED | MAP_ANONYMOUS;
3101 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3102 flags, -1, 0);
3103 #else
3104 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3105 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3106 flags, -1, 0);
3107 #endif
3109 if (area != vaddr) {
3110 fprintf(stderr, "Could not remap addr: "
3111 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3112 length, addr);
3113 exit(1);
3115 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3117 return;
3121 #endif /* !_WIN32 */
3123 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3124 With the exception of the softmmu code in this file, this should
3125 only be used for local memory (e.g. video ram) that the device owns,
3126 and knows it isn't going to access beyond the end of the block.
3128 It should not be used for general purpose DMA.
3129 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3131 void *qemu_get_ram_ptr(ram_addr_t addr)
3133 RAMBlock *block;
3135 QLIST_FOREACH(block, &ram_list.blocks, next) {
3136 if (addr - block->offset < block->length) {
3137 /* Move this entry to to start of the list. */
3138 if (block != QLIST_FIRST(&ram_list.blocks)) {
3139 QLIST_REMOVE(block, next);
3140 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3142 if (xen_enabled()) {
3143 /* We need to check if the requested address is in the RAM
3144 * because we don't want to map the entire memory in QEMU.
3145 * In that case just map until the end of the page.
3147 if (block->offset == 0) {
3148 return xen_map_cache(addr, 0, 0);
3149 } else if (block->host == NULL) {
3150 block->host =
3151 xen_map_cache(block->offset, block->length, 1);
3154 return block->host + (addr - block->offset);
3158 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3159 abort();
3161 return NULL;
3164 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3165 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3167 void *qemu_safe_ram_ptr(ram_addr_t addr)
3169 RAMBlock *block;
3171 QLIST_FOREACH(block, &ram_list.blocks, next) {
3172 if (addr - block->offset < block->length) {
3173 if (xen_enabled()) {
3174 /* We need to check if the requested address is in the RAM
3175 * because we don't want to map the entire memory in QEMU.
3176 * In that case just map until the end of the page.
3178 if (block->offset == 0) {
3179 return xen_map_cache(addr, 0, 0);
3180 } else if (block->host == NULL) {
3181 block->host =
3182 xen_map_cache(block->offset, block->length, 1);
3185 return block->host + (addr - block->offset);
3189 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3190 abort();
3192 return NULL;
3195 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3196 * but takes a size argument */
3197 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3199 if (*size == 0) {
3200 return NULL;
3202 if (xen_enabled()) {
3203 return xen_map_cache(addr, *size, 1);
3204 } else {
3205 RAMBlock *block;
3207 QLIST_FOREACH(block, &ram_list.blocks, next) {
3208 if (addr - block->offset < block->length) {
3209 if (addr - block->offset + *size > block->length)
3210 *size = block->length - addr + block->offset;
3211 return block->host + (addr - block->offset);
3215 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3216 abort();
3220 void qemu_put_ram_ptr(void *addr)
3222 trace_qemu_put_ram_ptr(addr);
3225 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3227 RAMBlock *block;
3228 uint8_t *host = ptr;
3230 if (xen_enabled()) {
3231 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3232 return 0;
3235 QLIST_FOREACH(block, &ram_list.blocks, next) {
3236 /* This case append when the block is not mapped. */
3237 if (block->host == NULL) {
3238 continue;
3240 if (host - block->host < block->length) {
3241 *ram_addr = block->offset + (host - block->host);
3242 return 0;
3246 return -1;
3249 /* Some of the softmmu routines need to translate from a host pointer
3250 (typically a TLB entry) back to a ram offset. */
3251 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3253 ram_addr_t ram_addr;
3255 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3256 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3257 abort();
3259 return ram_addr;
3262 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3264 #ifdef DEBUG_UNASSIGNED
3265 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3266 #endif
3267 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3268 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3269 #endif
3270 return 0;
3273 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3275 #ifdef DEBUG_UNASSIGNED
3276 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3277 #endif
3278 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3279 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3280 #endif
3281 return 0;
3284 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3286 #ifdef DEBUG_UNASSIGNED
3287 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3288 #endif
3289 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3290 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3291 #endif
3292 return 0;
3295 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3297 #ifdef DEBUG_UNASSIGNED
3298 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3299 #endif
3300 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3301 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3302 #endif
3305 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3307 #ifdef DEBUG_UNASSIGNED
3308 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3309 #endif
3310 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3311 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3312 #endif
3315 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3317 #ifdef DEBUG_UNASSIGNED
3318 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3319 #endif
3320 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3321 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3322 #endif
3325 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3326 unassigned_mem_readb,
3327 unassigned_mem_readw,
3328 unassigned_mem_readl,
3331 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3332 unassigned_mem_writeb,
3333 unassigned_mem_writew,
3334 unassigned_mem_writel,
3337 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3338 uint32_t val)
3340 int dirty_flags;
3341 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3342 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3343 #if !defined(CONFIG_USER_ONLY)
3344 tb_invalidate_phys_page_fast(ram_addr, 1);
3345 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3346 #endif
3348 stb_p(qemu_get_ram_ptr(ram_addr), val);
3349 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3350 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3351 /* we remove the notdirty callback only if the code has been
3352 flushed */
3353 if (dirty_flags == 0xff)
3354 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3357 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3358 uint32_t val)
3360 int dirty_flags;
3361 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3362 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3363 #if !defined(CONFIG_USER_ONLY)
3364 tb_invalidate_phys_page_fast(ram_addr, 2);
3365 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3366 #endif
3368 stw_p(qemu_get_ram_ptr(ram_addr), val);
3369 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3370 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3371 /* we remove the notdirty callback only if the code has been
3372 flushed */
3373 if (dirty_flags == 0xff)
3374 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3377 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3378 uint32_t val)
3380 int dirty_flags;
3381 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3382 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3383 #if !defined(CONFIG_USER_ONLY)
3384 tb_invalidate_phys_page_fast(ram_addr, 4);
3385 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3386 #endif
3388 stl_p(qemu_get_ram_ptr(ram_addr), val);
3389 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3390 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3391 /* we remove the notdirty callback only if the code has been
3392 flushed */
3393 if (dirty_flags == 0xff)
3394 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3397 static CPUReadMemoryFunc * const error_mem_read[3] = {
3398 NULL, /* never used */
3399 NULL, /* never used */
3400 NULL, /* never used */
3403 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3404 notdirty_mem_writeb,
3405 notdirty_mem_writew,
3406 notdirty_mem_writel,
3409 /* Generate a debug exception if a watchpoint has been hit. */
3410 static void check_watchpoint(int offset, int len_mask, int flags)
3412 CPUState *env = cpu_single_env;
3413 target_ulong pc, cs_base;
3414 TranslationBlock *tb;
3415 target_ulong vaddr;
3416 CPUWatchpoint *wp;
3417 int cpu_flags;
3419 if (env->watchpoint_hit) {
3420 /* We re-entered the check after replacing the TB. Now raise
3421 * the debug interrupt so that is will trigger after the
3422 * current instruction. */
3423 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3424 return;
3426 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3427 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3428 if ((vaddr == (wp->vaddr & len_mask) ||
3429 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3430 wp->flags |= BP_WATCHPOINT_HIT;
3431 if (!env->watchpoint_hit) {
3432 env->watchpoint_hit = wp;
3433 tb = tb_find_pc(env->mem_io_pc);
3434 if (!tb) {
3435 cpu_abort(env, "check_watchpoint: could not find TB for "
3436 "pc=%p", (void *)env->mem_io_pc);
3438 cpu_restore_state(tb, env, env->mem_io_pc);
3439 tb_phys_invalidate(tb, -1);
3440 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3441 env->exception_index = EXCP_DEBUG;
3442 } else {
3443 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3444 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3446 cpu_resume_from_signal(env, NULL);
3448 } else {
3449 wp->flags &= ~BP_WATCHPOINT_HIT;
3454 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3455 so these check for a hit then pass through to the normal out-of-line
3456 phys routines. */
3457 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3459 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3460 return ldub_phys(addr);
3463 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3465 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3466 return lduw_phys(addr);
3469 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3471 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3472 return ldl_phys(addr);
3475 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3476 uint32_t val)
3478 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3479 stb_phys(addr, val);
3482 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3483 uint32_t val)
3485 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3486 stw_phys(addr, val);
3489 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3490 uint32_t val)
3492 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3493 stl_phys(addr, val);
3496 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3497 watch_mem_readb,
3498 watch_mem_readw,
3499 watch_mem_readl,
3502 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3503 watch_mem_writeb,
3504 watch_mem_writew,
3505 watch_mem_writel,
3508 static inline uint32_t subpage_readlen (subpage_t *mmio,
3509 target_phys_addr_t addr,
3510 unsigned int len)
3512 unsigned int idx = SUBPAGE_IDX(addr);
3513 #if defined(DEBUG_SUBPAGE)
3514 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3515 mmio, len, addr, idx);
3516 #endif
3518 addr += mmio->region_offset[idx];
3519 idx = mmio->sub_io_index[idx];
3520 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3523 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3524 uint32_t value, unsigned int len)
3526 unsigned int idx = SUBPAGE_IDX(addr);
3527 #if defined(DEBUG_SUBPAGE)
3528 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3529 __func__, mmio, len, addr, idx, value);
3530 #endif
3532 addr += mmio->region_offset[idx];
3533 idx = mmio->sub_io_index[idx];
3534 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3537 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3539 return subpage_readlen(opaque, addr, 0);
3542 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3543 uint32_t value)
3545 subpage_writelen(opaque, addr, value, 0);
3548 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3550 return subpage_readlen(opaque, addr, 1);
3553 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3554 uint32_t value)
3556 subpage_writelen(opaque, addr, value, 1);
3559 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3561 return subpage_readlen(opaque, addr, 2);
3564 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3565 uint32_t value)
3567 subpage_writelen(opaque, addr, value, 2);
3570 static CPUReadMemoryFunc * const subpage_read[] = {
3571 &subpage_readb,
3572 &subpage_readw,
3573 &subpage_readl,
3576 static CPUWriteMemoryFunc * const subpage_write[] = {
3577 &subpage_writeb,
3578 &subpage_writew,
3579 &subpage_writel,
3582 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3583 ram_addr_t memory, ram_addr_t region_offset)
3585 int idx, eidx;
3587 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3588 return -1;
3589 idx = SUBPAGE_IDX(start);
3590 eidx = SUBPAGE_IDX(end);
3591 #if defined(DEBUG_SUBPAGE)
3592 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3593 mmio, start, end, idx, eidx, memory);
3594 #endif
3595 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3596 memory = IO_MEM_UNASSIGNED;
3597 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3598 for (; idx <= eidx; idx++) {
3599 mmio->sub_io_index[idx] = memory;
3600 mmio->region_offset[idx] = region_offset;
3603 return 0;
3606 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3607 ram_addr_t orig_memory,
3608 ram_addr_t region_offset)
3610 subpage_t *mmio;
3611 int subpage_memory;
3613 mmio = qemu_mallocz(sizeof(subpage_t));
3615 mmio->base = base;
3616 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3617 DEVICE_NATIVE_ENDIAN);
3618 #if defined(DEBUG_SUBPAGE)
3619 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3620 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3621 #endif
3622 *phys = subpage_memory | IO_MEM_SUBPAGE;
3623 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3625 return mmio;
3628 static int get_free_io_mem_idx(void)
3630 int i;
3632 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3633 if (!io_mem_used[i]) {
3634 io_mem_used[i] = 1;
3635 return i;
3637 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3638 return -1;
3642 * Usually, devices operate in little endian mode. There are devices out
3643 * there that operate in big endian too. Each device gets byte swapped
3644 * mmio if plugged onto a CPU that does the other endianness.
3646 * CPU Device swap?
3648 * little little no
3649 * little big yes
3650 * big little yes
3651 * big big no
3654 typedef struct SwapEndianContainer {
3655 CPUReadMemoryFunc *read[3];
3656 CPUWriteMemoryFunc *write[3];
3657 void *opaque;
3658 } SwapEndianContainer;
3660 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3662 uint32_t val;
3663 SwapEndianContainer *c = opaque;
3664 val = c->read[0](c->opaque, addr);
3665 return val;
3668 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3670 uint32_t val;
3671 SwapEndianContainer *c = opaque;
3672 val = bswap16(c->read[1](c->opaque, addr));
3673 return val;
3676 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3678 uint32_t val;
3679 SwapEndianContainer *c = opaque;
3680 val = bswap32(c->read[2](c->opaque, addr));
3681 return val;
3684 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3685 swapendian_mem_readb,
3686 swapendian_mem_readw,
3687 swapendian_mem_readl
3690 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3691 uint32_t val)
3693 SwapEndianContainer *c = opaque;
3694 c->write[0](c->opaque, addr, val);
3697 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3698 uint32_t val)
3700 SwapEndianContainer *c = opaque;
3701 c->write[1](c->opaque, addr, bswap16(val));
3704 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3705 uint32_t val)
3707 SwapEndianContainer *c = opaque;
3708 c->write[2](c->opaque, addr, bswap32(val));
3711 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3712 swapendian_mem_writeb,
3713 swapendian_mem_writew,
3714 swapendian_mem_writel
3717 static void swapendian_init(int io_index)
3719 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3720 int i;
3722 /* Swap mmio for big endian targets */
3723 c->opaque = io_mem_opaque[io_index];
3724 for (i = 0; i < 3; i++) {
3725 c->read[i] = io_mem_read[io_index][i];
3726 c->write[i] = io_mem_write[io_index][i];
3728 io_mem_read[io_index][i] = swapendian_readfn[i];
3729 io_mem_write[io_index][i] = swapendian_writefn[i];
3731 io_mem_opaque[io_index] = c;
3734 static void swapendian_del(int io_index)
3736 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3737 qemu_free(io_mem_opaque[io_index]);
3741 /* mem_read and mem_write are arrays of functions containing the
3742 function to access byte (index 0), word (index 1) and dword (index
3743 2). Functions can be omitted with a NULL function pointer.
3744 If io_index is non zero, the corresponding io zone is
3745 modified. If it is zero, a new io zone is allocated. The return
3746 value can be used with cpu_register_physical_memory(). (-1) is
3747 returned if error. */
3748 static int cpu_register_io_memory_fixed(int io_index,
3749 CPUReadMemoryFunc * const *mem_read,
3750 CPUWriteMemoryFunc * const *mem_write,
3751 void *opaque, enum device_endian endian)
3753 int i;
3755 if (io_index <= 0) {
3756 io_index = get_free_io_mem_idx();
3757 if (io_index == -1)
3758 return io_index;
3759 } else {
3760 io_index >>= IO_MEM_SHIFT;
3761 if (io_index >= IO_MEM_NB_ENTRIES)
3762 return -1;
3765 for (i = 0; i < 3; ++i) {
3766 io_mem_read[io_index][i]
3767 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3769 for (i = 0; i < 3; ++i) {
3770 io_mem_write[io_index][i]
3771 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3773 io_mem_opaque[io_index] = opaque;
3775 switch (endian) {
3776 case DEVICE_BIG_ENDIAN:
3777 #ifndef TARGET_WORDS_BIGENDIAN
3778 swapendian_init(io_index);
3779 #endif
3780 break;
3781 case DEVICE_LITTLE_ENDIAN:
3782 #ifdef TARGET_WORDS_BIGENDIAN
3783 swapendian_init(io_index);
3784 #endif
3785 break;
3786 case DEVICE_NATIVE_ENDIAN:
3787 default:
3788 break;
3791 return (io_index << IO_MEM_SHIFT);
3794 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3795 CPUWriteMemoryFunc * const *mem_write,
3796 void *opaque, enum device_endian endian)
3798 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3801 void cpu_unregister_io_memory(int io_table_address)
3803 int i;
3804 int io_index = io_table_address >> IO_MEM_SHIFT;
3806 swapendian_del(io_index);
3808 for (i=0;i < 3; i++) {
3809 io_mem_read[io_index][i] = unassigned_mem_read[i];
3810 io_mem_write[io_index][i] = unassigned_mem_write[i];
3812 io_mem_opaque[io_index] = NULL;
3813 io_mem_used[io_index] = 0;
3816 static void io_mem_init(void)
3818 int i;
3820 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3821 unassigned_mem_write, NULL,
3822 DEVICE_NATIVE_ENDIAN);
3823 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3824 unassigned_mem_write, NULL,
3825 DEVICE_NATIVE_ENDIAN);
3826 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3827 notdirty_mem_write, NULL,
3828 DEVICE_NATIVE_ENDIAN);
3829 for (i=0; i<5; i++)
3830 io_mem_used[i] = 1;
3832 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3833 watch_mem_write, NULL,
3834 DEVICE_NATIVE_ENDIAN);
3837 static void memory_map_init(void)
3839 system_memory = qemu_malloc(sizeof(*system_memory));
3840 memory_region_init(system_memory, "system", INT64_MAX);
3841 set_system_memory_map(system_memory);
3843 system_io = qemu_malloc(sizeof(*system_io));
3844 memory_region_init(system_io, "io", 65536);
3845 set_system_io_map(system_io);
3848 MemoryRegion *get_system_memory(void)
3850 return system_memory;
3853 MemoryRegion *get_system_io(void)
3855 return system_io;
3858 #endif /* !defined(CONFIG_USER_ONLY) */
3860 /* physical memory access (slow version, mainly for debug) */
3861 #if defined(CONFIG_USER_ONLY)
3862 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3863 uint8_t *buf, int len, int is_write)
3865 int l, flags;
3866 target_ulong page;
3867 void * p;
3869 while (len > 0) {
3870 page = addr & TARGET_PAGE_MASK;
3871 l = (page + TARGET_PAGE_SIZE) - addr;
3872 if (l > len)
3873 l = len;
3874 flags = page_get_flags(page);
3875 if (!(flags & PAGE_VALID))
3876 return -1;
3877 if (is_write) {
3878 if (!(flags & PAGE_WRITE))
3879 return -1;
3880 /* XXX: this code should not depend on lock_user */
3881 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3882 return -1;
3883 memcpy(p, buf, l);
3884 unlock_user(p, addr, l);
3885 } else {
3886 if (!(flags & PAGE_READ))
3887 return -1;
3888 /* XXX: this code should not depend on lock_user */
3889 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3890 return -1;
3891 memcpy(buf, p, l);
3892 unlock_user(p, addr, 0);
3894 len -= l;
3895 buf += l;
3896 addr += l;
3898 return 0;
3901 #else
3902 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3903 int len, int is_write)
3905 int l, io_index;
3906 uint8_t *ptr;
3907 uint32_t val;
3908 target_phys_addr_t page;
3909 ram_addr_t pd;
3910 PhysPageDesc *p;
3912 while (len > 0) {
3913 page = addr & TARGET_PAGE_MASK;
3914 l = (page + TARGET_PAGE_SIZE) - addr;
3915 if (l > len)
3916 l = len;
3917 p = phys_page_find(page >> TARGET_PAGE_BITS);
3918 if (!p) {
3919 pd = IO_MEM_UNASSIGNED;
3920 } else {
3921 pd = p->phys_offset;
3924 if (is_write) {
3925 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3926 target_phys_addr_t addr1 = addr;
3927 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3928 if (p)
3929 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3930 /* XXX: could force cpu_single_env to NULL to avoid
3931 potential bugs */
3932 if (l >= 4 && ((addr1 & 3) == 0)) {
3933 /* 32 bit write access */
3934 val = ldl_p(buf);
3935 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3936 l = 4;
3937 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3938 /* 16 bit write access */
3939 val = lduw_p(buf);
3940 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3941 l = 2;
3942 } else {
3943 /* 8 bit write access */
3944 val = ldub_p(buf);
3945 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3946 l = 1;
3948 } else {
3949 ram_addr_t addr1;
3950 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3951 /* RAM case */
3952 ptr = qemu_get_ram_ptr(addr1);
3953 memcpy(ptr, buf, l);
3954 if (!cpu_physical_memory_is_dirty(addr1)) {
3955 /* invalidate code */
3956 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3957 /* set dirty bit */
3958 cpu_physical_memory_set_dirty_flags(
3959 addr1, (0xff & ~CODE_DIRTY_FLAG));
3961 /* qemu doesn't execute guest code directly, but kvm does
3962 therefore flush instruction caches */
3963 if (kvm_enabled())
3964 flush_icache_range((unsigned long)ptr,
3965 ((unsigned long)ptr)+l);
3966 qemu_put_ram_ptr(ptr);
3968 } else {
3969 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3970 !(pd & IO_MEM_ROMD)) {
3971 target_phys_addr_t addr1 = addr;
3972 /* I/O case */
3973 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3974 if (p)
3975 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3976 if (l >= 4 && ((addr1 & 3) == 0)) {
3977 /* 32 bit read access */
3978 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3979 stl_p(buf, val);
3980 l = 4;
3981 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3982 /* 16 bit read access */
3983 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3984 stw_p(buf, val);
3985 l = 2;
3986 } else {
3987 /* 8 bit read access */
3988 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3989 stb_p(buf, val);
3990 l = 1;
3992 } else {
3993 /* RAM case */
3994 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3995 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3996 qemu_put_ram_ptr(ptr);
3999 len -= l;
4000 buf += l;
4001 addr += l;
4005 /* used for ROM loading : can write in RAM and ROM */
4006 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
4007 const uint8_t *buf, int len)
4009 int l;
4010 uint8_t *ptr;
4011 target_phys_addr_t page;
4012 unsigned long pd;
4013 PhysPageDesc *p;
4015 while (len > 0) {
4016 page = addr & TARGET_PAGE_MASK;
4017 l = (page + TARGET_PAGE_SIZE) - addr;
4018 if (l > len)
4019 l = len;
4020 p = phys_page_find(page >> TARGET_PAGE_BITS);
4021 if (!p) {
4022 pd = IO_MEM_UNASSIGNED;
4023 } else {
4024 pd = p->phys_offset;
4027 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
4028 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
4029 !(pd & IO_MEM_ROMD)) {
4030 /* do nothing */
4031 } else {
4032 unsigned long addr1;
4033 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4034 /* ROM/RAM case */
4035 ptr = qemu_get_ram_ptr(addr1);
4036 memcpy(ptr, buf, l);
4037 qemu_put_ram_ptr(ptr);
4039 len -= l;
4040 buf += l;
4041 addr += l;
4045 typedef struct {
4046 void *buffer;
4047 target_phys_addr_t addr;
4048 target_phys_addr_t len;
4049 } BounceBuffer;
4051 static BounceBuffer bounce;
4053 typedef struct MapClient {
4054 void *opaque;
4055 void (*callback)(void *opaque);
4056 QLIST_ENTRY(MapClient) link;
4057 } MapClient;
4059 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4060 = QLIST_HEAD_INITIALIZER(map_client_list);
4062 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4064 MapClient *client = qemu_malloc(sizeof(*client));
4066 client->opaque = opaque;
4067 client->callback = callback;
4068 QLIST_INSERT_HEAD(&map_client_list, client, link);
4069 return client;
4072 void cpu_unregister_map_client(void *_client)
4074 MapClient *client = (MapClient *)_client;
4076 QLIST_REMOVE(client, link);
4077 qemu_free(client);
4080 static void cpu_notify_map_clients(void)
4082 MapClient *client;
4084 while (!QLIST_EMPTY(&map_client_list)) {
4085 client = QLIST_FIRST(&map_client_list);
4086 client->callback(client->opaque);
4087 cpu_unregister_map_client(client);
4091 /* Map a physical memory region into a host virtual address.
4092 * May map a subset of the requested range, given by and returned in *plen.
4093 * May return NULL if resources needed to perform the mapping are exhausted.
4094 * Use only for reads OR writes - not for read-modify-write operations.
4095 * Use cpu_register_map_client() to know when retrying the map operation is
4096 * likely to succeed.
4098 void *cpu_physical_memory_map(target_phys_addr_t addr,
4099 target_phys_addr_t *plen,
4100 int is_write)
4102 target_phys_addr_t len = *plen;
4103 target_phys_addr_t todo = 0;
4104 int l;
4105 target_phys_addr_t page;
4106 unsigned long pd;
4107 PhysPageDesc *p;
4108 ram_addr_t raddr = RAM_ADDR_MAX;
4109 ram_addr_t rlen;
4110 void *ret;
4112 while (len > 0) {
4113 page = addr & TARGET_PAGE_MASK;
4114 l = (page + TARGET_PAGE_SIZE) - addr;
4115 if (l > len)
4116 l = len;
4117 p = phys_page_find(page >> TARGET_PAGE_BITS);
4118 if (!p) {
4119 pd = IO_MEM_UNASSIGNED;
4120 } else {
4121 pd = p->phys_offset;
4124 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4125 if (todo || bounce.buffer) {
4126 break;
4128 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4129 bounce.addr = addr;
4130 bounce.len = l;
4131 if (!is_write) {
4132 cpu_physical_memory_read(addr, bounce.buffer, l);
4135 *plen = l;
4136 return bounce.buffer;
4138 if (!todo) {
4139 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4142 len -= l;
4143 addr += l;
4144 todo += l;
4146 rlen = todo;
4147 ret = qemu_ram_ptr_length(raddr, &rlen);
4148 *plen = rlen;
4149 return ret;
4152 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4153 * Will also mark the memory as dirty if is_write == 1. access_len gives
4154 * the amount of memory that was actually read or written by the caller.
4156 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4157 int is_write, target_phys_addr_t access_len)
4159 unsigned long flush_len = (unsigned long)access_len;
4161 if (buffer != bounce.buffer) {
4162 if (is_write) {
4163 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4164 while (access_len) {
4165 unsigned l;
4166 l = TARGET_PAGE_SIZE;
4167 if (l > access_len)
4168 l = access_len;
4169 if (!cpu_physical_memory_is_dirty(addr1)) {
4170 /* invalidate code */
4171 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4172 /* set dirty bit */
4173 cpu_physical_memory_set_dirty_flags(
4174 addr1, (0xff & ~CODE_DIRTY_FLAG));
4176 addr1 += l;
4177 access_len -= l;
4179 dma_flush_range((unsigned long)buffer,
4180 (unsigned long)buffer + flush_len);
4182 if (xen_enabled()) {
4183 xen_invalidate_map_cache_entry(buffer);
4185 return;
4187 if (is_write) {
4188 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4190 qemu_vfree(bounce.buffer);
4191 bounce.buffer = NULL;
4192 cpu_notify_map_clients();
4195 /* warning: addr must be aligned */
4196 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4197 enum device_endian endian)
4199 int io_index;
4200 uint8_t *ptr;
4201 uint32_t val;
4202 unsigned long pd;
4203 PhysPageDesc *p;
4205 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4206 if (!p) {
4207 pd = IO_MEM_UNASSIGNED;
4208 } else {
4209 pd = p->phys_offset;
4212 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4213 !(pd & IO_MEM_ROMD)) {
4214 /* I/O case */
4215 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4216 if (p)
4217 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4218 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4219 #if defined(TARGET_WORDS_BIGENDIAN)
4220 if (endian == DEVICE_LITTLE_ENDIAN) {
4221 val = bswap32(val);
4223 #else
4224 if (endian == DEVICE_BIG_ENDIAN) {
4225 val = bswap32(val);
4227 #endif
4228 } else {
4229 /* RAM case */
4230 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4231 (addr & ~TARGET_PAGE_MASK);
4232 switch (endian) {
4233 case DEVICE_LITTLE_ENDIAN:
4234 val = ldl_le_p(ptr);
4235 break;
4236 case DEVICE_BIG_ENDIAN:
4237 val = ldl_be_p(ptr);
4238 break;
4239 default:
4240 val = ldl_p(ptr);
4241 break;
4244 return val;
4247 uint32_t ldl_phys(target_phys_addr_t addr)
4249 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4252 uint32_t ldl_le_phys(target_phys_addr_t addr)
4254 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4257 uint32_t ldl_be_phys(target_phys_addr_t addr)
4259 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4262 /* warning: addr must be aligned */
4263 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4264 enum device_endian endian)
4266 int io_index;
4267 uint8_t *ptr;
4268 uint64_t val;
4269 unsigned long pd;
4270 PhysPageDesc *p;
4272 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4273 if (!p) {
4274 pd = IO_MEM_UNASSIGNED;
4275 } else {
4276 pd = p->phys_offset;
4279 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4280 !(pd & IO_MEM_ROMD)) {
4281 /* I/O case */
4282 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4283 if (p)
4284 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4286 /* XXX This is broken when device endian != cpu endian.
4287 Fix and add "endian" variable check */
4288 #ifdef TARGET_WORDS_BIGENDIAN
4289 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4290 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4291 #else
4292 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4293 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4294 #endif
4295 } else {
4296 /* RAM case */
4297 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4298 (addr & ~TARGET_PAGE_MASK);
4299 switch (endian) {
4300 case DEVICE_LITTLE_ENDIAN:
4301 val = ldq_le_p(ptr);
4302 break;
4303 case DEVICE_BIG_ENDIAN:
4304 val = ldq_be_p(ptr);
4305 break;
4306 default:
4307 val = ldq_p(ptr);
4308 break;
4311 return val;
4314 uint64_t ldq_phys(target_phys_addr_t addr)
4316 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4319 uint64_t ldq_le_phys(target_phys_addr_t addr)
4321 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4324 uint64_t ldq_be_phys(target_phys_addr_t addr)
4326 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4329 /* XXX: optimize */
4330 uint32_t ldub_phys(target_phys_addr_t addr)
4332 uint8_t val;
4333 cpu_physical_memory_read(addr, &val, 1);
4334 return val;
4337 /* warning: addr must be aligned */
4338 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4339 enum device_endian endian)
4341 int io_index;
4342 uint8_t *ptr;
4343 uint64_t val;
4344 unsigned long pd;
4345 PhysPageDesc *p;
4347 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4348 if (!p) {
4349 pd = IO_MEM_UNASSIGNED;
4350 } else {
4351 pd = p->phys_offset;
4354 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4355 !(pd & IO_MEM_ROMD)) {
4356 /* I/O case */
4357 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4358 if (p)
4359 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4360 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4361 #if defined(TARGET_WORDS_BIGENDIAN)
4362 if (endian == DEVICE_LITTLE_ENDIAN) {
4363 val = bswap16(val);
4365 #else
4366 if (endian == DEVICE_BIG_ENDIAN) {
4367 val = bswap16(val);
4369 #endif
4370 } else {
4371 /* RAM case */
4372 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4373 (addr & ~TARGET_PAGE_MASK);
4374 switch (endian) {
4375 case DEVICE_LITTLE_ENDIAN:
4376 val = lduw_le_p(ptr);
4377 break;
4378 case DEVICE_BIG_ENDIAN:
4379 val = lduw_be_p(ptr);
4380 break;
4381 default:
4382 val = lduw_p(ptr);
4383 break;
4386 return val;
4389 uint32_t lduw_phys(target_phys_addr_t addr)
4391 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4394 uint32_t lduw_le_phys(target_phys_addr_t addr)
4396 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4399 uint32_t lduw_be_phys(target_phys_addr_t addr)
4401 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4404 /* warning: addr must be aligned. The ram page is not masked as dirty
4405 and the code inside is not invalidated. It is useful if the dirty
4406 bits are used to track modified PTEs */
4407 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4409 int io_index;
4410 uint8_t *ptr;
4411 unsigned long pd;
4412 PhysPageDesc *p;
4414 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4415 if (!p) {
4416 pd = IO_MEM_UNASSIGNED;
4417 } else {
4418 pd = p->phys_offset;
4421 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4422 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4423 if (p)
4424 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4425 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4426 } else {
4427 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4428 ptr = qemu_get_ram_ptr(addr1);
4429 stl_p(ptr, val);
4431 if (unlikely(in_migration)) {
4432 if (!cpu_physical_memory_is_dirty(addr1)) {
4433 /* invalidate code */
4434 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4435 /* set dirty bit */
4436 cpu_physical_memory_set_dirty_flags(
4437 addr1, (0xff & ~CODE_DIRTY_FLAG));
4443 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4445 int io_index;
4446 uint8_t *ptr;
4447 unsigned long pd;
4448 PhysPageDesc *p;
4450 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4451 if (!p) {
4452 pd = IO_MEM_UNASSIGNED;
4453 } else {
4454 pd = p->phys_offset;
4457 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4458 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4459 if (p)
4460 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4461 #ifdef TARGET_WORDS_BIGENDIAN
4462 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4463 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4464 #else
4465 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4466 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4467 #endif
4468 } else {
4469 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4470 (addr & ~TARGET_PAGE_MASK);
4471 stq_p(ptr, val);
4475 /* warning: addr must be aligned */
4476 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4477 enum device_endian endian)
4479 int io_index;
4480 uint8_t *ptr;
4481 unsigned long pd;
4482 PhysPageDesc *p;
4484 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4485 if (!p) {
4486 pd = IO_MEM_UNASSIGNED;
4487 } else {
4488 pd = p->phys_offset;
4491 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4492 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4493 if (p)
4494 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4495 #if defined(TARGET_WORDS_BIGENDIAN)
4496 if (endian == DEVICE_LITTLE_ENDIAN) {
4497 val = bswap32(val);
4499 #else
4500 if (endian == DEVICE_BIG_ENDIAN) {
4501 val = bswap32(val);
4503 #endif
4504 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4505 } else {
4506 unsigned long addr1;
4507 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4508 /* RAM case */
4509 ptr = qemu_get_ram_ptr(addr1);
4510 switch (endian) {
4511 case DEVICE_LITTLE_ENDIAN:
4512 stl_le_p(ptr, val);
4513 break;
4514 case DEVICE_BIG_ENDIAN:
4515 stl_be_p(ptr, val);
4516 break;
4517 default:
4518 stl_p(ptr, val);
4519 break;
4521 if (!cpu_physical_memory_is_dirty(addr1)) {
4522 /* invalidate code */
4523 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4524 /* set dirty bit */
4525 cpu_physical_memory_set_dirty_flags(addr1,
4526 (0xff & ~CODE_DIRTY_FLAG));
4531 void stl_phys(target_phys_addr_t addr, uint32_t val)
4533 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4536 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4538 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4541 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4543 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4546 /* XXX: optimize */
4547 void stb_phys(target_phys_addr_t addr, uint32_t val)
4549 uint8_t v = val;
4550 cpu_physical_memory_write(addr, &v, 1);
4553 /* warning: addr must be aligned */
4554 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4555 enum device_endian endian)
4557 int io_index;
4558 uint8_t *ptr;
4559 unsigned long pd;
4560 PhysPageDesc *p;
4562 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4563 if (!p) {
4564 pd = IO_MEM_UNASSIGNED;
4565 } else {
4566 pd = p->phys_offset;
4569 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4570 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4571 if (p)
4572 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4573 #if defined(TARGET_WORDS_BIGENDIAN)
4574 if (endian == DEVICE_LITTLE_ENDIAN) {
4575 val = bswap16(val);
4577 #else
4578 if (endian == DEVICE_BIG_ENDIAN) {
4579 val = bswap16(val);
4581 #endif
4582 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4583 } else {
4584 unsigned long addr1;
4585 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4586 /* RAM case */
4587 ptr = qemu_get_ram_ptr(addr1);
4588 switch (endian) {
4589 case DEVICE_LITTLE_ENDIAN:
4590 stw_le_p(ptr, val);
4591 break;
4592 case DEVICE_BIG_ENDIAN:
4593 stw_be_p(ptr, val);
4594 break;
4595 default:
4596 stw_p(ptr, val);
4597 break;
4599 if (!cpu_physical_memory_is_dirty(addr1)) {
4600 /* invalidate code */
4601 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4602 /* set dirty bit */
4603 cpu_physical_memory_set_dirty_flags(addr1,
4604 (0xff & ~CODE_DIRTY_FLAG));
4609 void stw_phys(target_phys_addr_t addr, uint32_t val)
4611 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4614 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4616 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4619 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4621 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4624 /* XXX: optimize */
4625 void stq_phys(target_phys_addr_t addr, uint64_t val)
4627 val = tswap64(val);
4628 cpu_physical_memory_write(addr, &val, 8);
4631 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4633 val = cpu_to_le64(val);
4634 cpu_physical_memory_write(addr, &val, 8);
4637 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4639 val = cpu_to_be64(val);
4640 cpu_physical_memory_write(addr, &val, 8);
4643 /* virtual memory access for debug (includes writing to ROM) */
4644 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4645 uint8_t *buf, int len, int is_write)
4647 int l;
4648 target_phys_addr_t phys_addr;
4649 target_ulong page;
4651 while (len > 0) {
4652 page = addr & TARGET_PAGE_MASK;
4653 phys_addr = cpu_get_phys_page_debug(env, page);
4654 /* if no physical page mapped, return an error */
4655 if (phys_addr == -1)
4656 return -1;
4657 l = (page + TARGET_PAGE_SIZE) - addr;
4658 if (l > len)
4659 l = len;
4660 phys_addr += (addr & ~TARGET_PAGE_MASK);
4661 if (is_write)
4662 cpu_physical_memory_write_rom(phys_addr, buf, l);
4663 else
4664 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4665 len -= l;
4666 buf += l;
4667 addr += l;
4669 return 0;
4671 #endif
4673 /* in deterministic execution mode, instructions doing device I/Os
4674 must be at the end of the TB */
4675 void cpu_io_recompile(CPUState *env, void *retaddr)
4677 TranslationBlock *tb;
4678 uint32_t n, cflags;
4679 target_ulong pc, cs_base;
4680 uint64_t flags;
4682 tb = tb_find_pc((unsigned long)retaddr);
4683 if (!tb) {
4684 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4685 retaddr);
4687 n = env->icount_decr.u16.low + tb->icount;
4688 cpu_restore_state(tb, env, (unsigned long)retaddr);
4689 /* Calculate how many instructions had been executed before the fault
4690 occurred. */
4691 n = n - env->icount_decr.u16.low;
4692 /* Generate a new TB ending on the I/O insn. */
4693 n++;
4694 /* On MIPS and SH, delay slot instructions can only be restarted if
4695 they were already the first instruction in the TB. If this is not
4696 the first instruction in a TB then re-execute the preceding
4697 branch. */
4698 #if defined(TARGET_MIPS)
4699 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4700 env->active_tc.PC -= 4;
4701 env->icount_decr.u16.low++;
4702 env->hflags &= ~MIPS_HFLAG_BMASK;
4704 #elif defined(TARGET_SH4)
4705 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4706 && n > 1) {
4707 env->pc -= 2;
4708 env->icount_decr.u16.low++;
4709 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4711 #endif
4712 /* This should never happen. */
4713 if (n > CF_COUNT_MASK)
4714 cpu_abort(env, "TB too big during recompile");
4716 cflags = n | CF_LAST_IO;
4717 pc = tb->pc;
4718 cs_base = tb->cs_base;
4719 flags = tb->flags;
4720 tb_phys_invalidate(tb, -1);
4721 /* FIXME: In theory this could raise an exception. In practice
4722 we have already translated the block once so it's probably ok. */
4723 tb_gen_code(env, pc, cs_base, flags, cflags);
4724 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4725 the first in the TB) then we end up generating a whole new TB and
4726 repeating the fault, which is horribly inefficient.
4727 Better would be to execute just this insn uncached, or generate a
4728 second new TB. */
4729 cpu_resume_from_signal(env, NULL);
4732 #if !defined(CONFIG_USER_ONLY)
4734 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4736 int i, target_code_size, max_target_code_size;
4737 int direct_jmp_count, direct_jmp2_count, cross_page;
4738 TranslationBlock *tb;
4740 target_code_size = 0;
4741 max_target_code_size = 0;
4742 cross_page = 0;
4743 direct_jmp_count = 0;
4744 direct_jmp2_count = 0;
4745 for(i = 0; i < nb_tbs; i++) {
4746 tb = &tbs[i];
4747 target_code_size += tb->size;
4748 if (tb->size > max_target_code_size)
4749 max_target_code_size = tb->size;
4750 if (tb->page_addr[1] != -1)
4751 cross_page++;
4752 if (tb->tb_next_offset[0] != 0xffff) {
4753 direct_jmp_count++;
4754 if (tb->tb_next_offset[1] != 0xffff) {
4755 direct_jmp2_count++;
4759 /* XXX: avoid using doubles ? */
4760 cpu_fprintf(f, "Translation buffer state:\n");
4761 cpu_fprintf(f, "gen code size %td/%ld\n",
4762 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4763 cpu_fprintf(f, "TB count %d/%d\n",
4764 nb_tbs, code_gen_max_blocks);
4765 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4766 nb_tbs ? target_code_size / nb_tbs : 0,
4767 max_target_code_size);
4768 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4769 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4770 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4771 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4772 cross_page,
4773 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4774 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4775 direct_jmp_count,
4776 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4777 direct_jmp2_count,
4778 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4779 cpu_fprintf(f, "\nStatistics:\n");
4780 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4781 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4782 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4783 #ifdef CONFIG_PROFILER
4784 tcg_dump_info(f, cpu_fprintf);
4785 #endif
4788 #define MMUSUFFIX _cmmu
4789 #define GETPC() NULL
4790 #define env cpu_single_env
4791 #define SOFTMMU_CODE_ACCESS
4793 #define SHIFT 0
4794 #include "softmmu_template.h"
4796 #define SHIFT 1
4797 #include "softmmu_template.h"
4799 #define SHIFT 2
4800 #include "softmmu_template.h"
4802 #define SHIFT 3
4803 #include "softmmu_template.h"
4805 #undef env
4807 #endif