Merge branch 'master' of git://git.qemu.org/qemu into next
[qemu/qemu-dev-zwu.git] / exec.c
blob0e2ce5729b0baa361c8fef1d3ddfe599802cec33
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "cache-utils.h"
31 #if !defined(TARGET_IA64)
32 #include "tcg.h"
33 #endif
35 #include "hw/hw.h"
36 #include "hw/qdev.h"
37 #include "osdep.h"
38 #include "kvm.h"
39 #include "hw/xen.h"
40 #include "qemu-timer.h"
41 #if defined(CONFIG_USER_ONLY)
42 #include <qemu.h>
43 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
44 #include <sys/param.h>
45 #if __FreeBSD_version >= 700104
46 #define HAVE_KINFO_GETVMMAP
47 #define sigqueue sigqueue_freebsd /* avoid redefinition */
48 #include <sys/time.h>
49 #include <sys/proc.h>
50 #include <machine/profile.h>
51 #define _KERNEL
52 #include <sys/user.h>
53 #undef _KERNEL
54 #undef sigqueue
55 #include <libutil.h>
56 #endif
57 #endif
58 #else /* !CONFIG_USER_ONLY */
59 #include "xen-mapcache.h"
60 #include "trace.h"
61 #endif
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
117 #endif
119 CPUState *first_cpu;
120 /* current CPU in the current thread. It is only valid inside
121 cpu_exec() */
122 CPUState *cpu_single_env;
123 /* 0 = Do not count executed instructions.
124 1 = Precise instruction counting.
125 2 = Adaptive rate instruction counting. */
126 int use_icount = 0;
127 /* Current instruction counter. While executing translated code this may
128 include some instructions that have not yet been executed. */
129 int64_t qemu_icount;
131 typedef struct PageDesc {
132 /* list of TBs intersecting this ram page */
133 TranslationBlock *first_tb;
134 /* in order to optimize self modifying code, we count the number
135 of lookups we do to a given page to use a bitmap */
136 unsigned int code_write_count;
137 uint8_t *code_bitmap;
138 #if defined(CONFIG_USER_ONLY)
139 unsigned long flags;
140 #endif
141 } PageDesc;
143 /* In system mode we want L1_MAP to be based on ram offsets,
144 while in user mode we want it to be based on virtual addresses. */
145 #if !defined(CONFIG_USER_ONLY)
146 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
147 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
148 #else
149 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
150 #endif
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
153 #endif
155 /* Size of the L2 (and L3, etc) page tables. */
156 #define L2_BITS 10
157 #define L2_SIZE (1 << L2_BITS)
159 /* The bits remaining after N lower levels of page tables. */
160 #define P_L1_BITS_REM \
161 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
162 #define V_L1_BITS_REM \
163 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
165 /* Size of the L1 page table. Avoid silly small sizes. */
166 #if P_L1_BITS_REM < 4
167 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
168 #else
169 #define P_L1_BITS P_L1_BITS_REM
170 #endif
172 #if V_L1_BITS_REM < 4
173 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
174 #else
175 #define V_L1_BITS V_L1_BITS_REM
176 #endif
178 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
179 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
181 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
182 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
184 unsigned long qemu_real_host_page_size;
185 unsigned long qemu_host_page_bits;
186 unsigned long qemu_host_page_size;
187 unsigned long qemu_host_page_mask;
189 /* This is a multi-level map on the virtual address space.
190 The bottom level has pointers to PageDesc. */
191 static void *l1_map[V_L1_SIZE];
193 #if !defined(CONFIG_USER_ONLY)
194 typedef struct PhysPageDesc {
195 /* offset in host memory of the page + io_index in the low bits */
196 ram_addr_t phys_offset;
197 ram_addr_t region_offset;
198 } PhysPageDesc;
200 /* This is a multi-level map on the physical address space.
201 The bottom level has pointers to PhysPageDesc. */
202 static void *l1_phys_map[P_L1_SIZE];
204 static void io_mem_init(void);
206 /* io memory support */
207 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
208 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
209 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
210 static char io_mem_used[IO_MEM_NB_ENTRIES];
211 static int io_mem_watch;
212 #endif
214 /* log support */
215 #ifdef WIN32
216 static const char *logfilename = "qemu.log";
217 #else
218 static const char *logfilename = "/tmp/qemu.log";
219 #endif
220 FILE *logfile;
221 int loglevel;
222 static int log_append = 0;
224 /* statistics */
225 #if !defined(CONFIG_USER_ONLY)
226 static int tlb_flush_count;
227 #endif
228 static int tb_flush_count;
229 static int tb_phys_invalidate_count;
231 #ifdef _WIN32
232 static void map_exec(void *addr, long size)
234 DWORD old_protect;
235 VirtualProtect(addr, size,
236 PAGE_EXECUTE_READWRITE, &old_protect);
239 #else
240 static void map_exec(void *addr, long size)
242 unsigned long start, end, page_size;
244 page_size = getpagesize();
245 start = (unsigned long)addr;
246 start &= ~(page_size - 1);
248 end = (unsigned long)addr + size;
249 end += page_size - 1;
250 end &= ~(page_size - 1);
252 mprotect((void *)start, end - start,
253 PROT_READ | PROT_WRITE | PROT_EXEC);
255 #endif
257 static void page_init(void)
259 /* NOTE: we can always suppose that qemu_host_page_size >=
260 TARGET_PAGE_SIZE */
261 #ifdef _WIN32
263 SYSTEM_INFO system_info;
265 GetSystemInfo(&system_info);
266 qemu_real_host_page_size = system_info.dwPageSize;
268 #else
269 qemu_real_host_page_size = getpagesize();
270 #endif
271 if (qemu_host_page_size == 0)
272 qemu_host_page_size = qemu_real_host_page_size;
273 if (qemu_host_page_size < TARGET_PAGE_SIZE)
274 qemu_host_page_size = TARGET_PAGE_SIZE;
275 qemu_host_page_bits = 0;
276 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
277 qemu_host_page_bits++;
278 qemu_host_page_mask = ~(qemu_host_page_size - 1);
280 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
282 #ifdef HAVE_KINFO_GETVMMAP
283 struct kinfo_vmentry *freep;
284 int i, cnt;
286 freep = kinfo_getvmmap(getpid(), &cnt);
287 if (freep) {
288 mmap_lock();
289 for (i = 0; i < cnt; i++) {
290 unsigned long startaddr, endaddr;
292 startaddr = freep[i].kve_start;
293 endaddr = freep[i].kve_end;
294 if (h2g_valid(startaddr)) {
295 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
297 if (h2g_valid(endaddr)) {
298 endaddr = h2g(endaddr);
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
300 } else {
301 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
302 endaddr = ~0ul;
303 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
304 #endif
308 free(freep);
309 mmap_unlock();
311 #else
312 FILE *f;
314 last_brk = (unsigned long)sbrk(0);
316 f = fopen("/compat/linux/proc/self/maps", "r");
317 if (f) {
318 mmap_lock();
320 do {
321 unsigned long startaddr, endaddr;
322 int n;
324 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
326 if (n == 2 && h2g_valid(startaddr)) {
327 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
329 if (h2g_valid(endaddr)) {
330 endaddr = h2g(endaddr);
331 } else {
332 endaddr = ~0ul;
334 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
336 } while (!feof(f));
338 fclose(f);
339 mmap_unlock();
341 #endif
343 #endif
346 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
348 PageDesc *pd;
349 void **lp;
350 int i;
352 #if defined(CONFIG_USER_ONLY)
353 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
354 # define ALLOC(P, SIZE) \
355 do { \
356 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
357 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
358 } while (0)
359 #else
360 # define ALLOC(P, SIZE) \
361 do { P = qemu_mallocz(SIZE); } while (0)
362 #endif
364 /* Level 1. Always allocated. */
365 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
367 /* Level 2..N-1. */
368 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
369 void **p = *lp;
371 if (p == NULL) {
372 if (!alloc) {
373 return NULL;
375 ALLOC(p, sizeof(void *) * L2_SIZE);
376 *lp = p;
379 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
382 pd = *lp;
383 if (pd == NULL) {
384 if (!alloc) {
385 return NULL;
387 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
388 *lp = pd;
391 #undef ALLOC
393 return pd + (index & (L2_SIZE - 1));
396 static inline PageDesc *page_find(tb_page_addr_t index)
398 return page_find_alloc(index, 0);
401 #if !defined(CONFIG_USER_ONLY)
402 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
404 PhysPageDesc *pd;
405 void **lp;
406 int i;
408 /* Level 1. Always allocated. */
409 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
411 /* Level 2..N-1. */
412 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
413 void **p = *lp;
414 if (p == NULL) {
415 if (!alloc) {
416 return NULL;
418 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
420 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
423 pd = *lp;
424 if (pd == NULL) {
425 int i;
427 if (!alloc) {
428 return NULL;
431 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
433 for (i = 0; i < L2_SIZE; i++) {
434 pd[i].phys_offset = IO_MEM_UNASSIGNED;
435 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
439 return pd + (index & (L2_SIZE - 1));
442 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
444 return phys_page_find_alloc(index, 0);
447 static void tlb_protect_code(ram_addr_t ram_addr);
448 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
449 target_ulong vaddr);
450 #define mmap_lock() do { } while(0)
451 #define mmap_unlock() do { } while(0)
452 #endif
454 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
456 #if defined(CONFIG_USER_ONLY)
457 /* Currently it is not recommended to allocate big chunks of data in
458 user mode. It will change when a dedicated libc will be used */
459 #define USE_STATIC_CODE_GEN_BUFFER
460 #endif
462 #ifdef USE_STATIC_CODE_GEN_BUFFER
463 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
464 __attribute__((aligned (CODE_GEN_ALIGN)));
465 #endif
467 static void code_gen_alloc(unsigned long tb_size)
469 if (kvm_enabled())
470 return;
472 #ifdef USE_STATIC_CODE_GEN_BUFFER
473 code_gen_buffer = static_code_gen_buffer;
474 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
475 map_exec(code_gen_buffer, code_gen_buffer_size);
476 #else
477 code_gen_buffer_size = tb_size;
478 if (code_gen_buffer_size == 0) {
479 #if defined(CONFIG_USER_ONLY)
480 /* in user mode, phys_ram_size is not meaningful */
481 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
482 #else
483 /* XXX: needs adjustments */
484 code_gen_buffer_size = (unsigned long)(ram_size / 4);
485 #endif
487 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
488 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
489 /* The code gen buffer location may have constraints depending on
490 the host cpu and OS */
491 #if defined(__linux__)
493 int flags;
494 void *start = NULL;
496 flags = MAP_PRIVATE | MAP_ANONYMOUS;
497 #if defined(__x86_64__)
498 flags |= MAP_32BIT;
499 /* Cannot map more than that */
500 if (code_gen_buffer_size > (800 * 1024 * 1024))
501 code_gen_buffer_size = (800 * 1024 * 1024);
502 #elif defined(__sparc_v9__)
503 // Map the buffer below 2G, so we can use direct calls and branches
504 flags |= MAP_FIXED;
505 start = (void *) 0x60000000UL;
506 if (code_gen_buffer_size > (512 * 1024 * 1024))
507 code_gen_buffer_size = (512 * 1024 * 1024);
508 #elif defined(__arm__)
509 /* Map the buffer below 32M, so we can use direct calls and branches */
510 flags |= MAP_FIXED;
511 start = (void *) 0x01000000UL;
512 if (code_gen_buffer_size > 16 * 1024 * 1024)
513 code_gen_buffer_size = 16 * 1024 * 1024;
514 #elif defined(__s390x__)
515 /* Map the buffer so that we can use direct calls and branches. */
516 /* We have a +- 4GB range on the branches; leave some slop. */
517 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
518 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
520 start = (void *)0x90000000UL;
521 #endif
522 code_gen_buffer = mmap(start, code_gen_buffer_size,
523 PROT_WRITE | PROT_READ | PROT_EXEC,
524 flags, -1, 0);
525 if (code_gen_buffer == MAP_FAILED) {
526 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
527 exit(1);
530 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
531 || defined(__DragonFly__) || defined(__OpenBSD__)
533 int flags;
534 void *addr = NULL;
535 flags = MAP_PRIVATE | MAP_ANONYMOUS;
536 #if defined(__x86_64__)
537 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
538 * 0x40000000 is free */
539 flags |= MAP_FIXED;
540 addr = (void *)0x40000000;
541 /* Cannot map more than that */
542 if (code_gen_buffer_size > (800 * 1024 * 1024))
543 code_gen_buffer_size = (800 * 1024 * 1024);
544 #elif defined(__sparc_v9__)
545 // Map the buffer below 2G, so we can use direct calls and branches
546 flags |= MAP_FIXED;
547 addr = (void *) 0x60000000UL;
548 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
549 code_gen_buffer_size = (512 * 1024 * 1024);
551 #endif
552 code_gen_buffer = mmap(addr, code_gen_buffer_size,
553 PROT_WRITE | PROT_READ | PROT_EXEC,
554 flags, -1, 0);
555 if (code_gen_buffer == MAP_FAILED) {
556 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
557 exit(1);
560 #else
561 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
562 map_exec(code_gen_buffer, code_gen_buffer_size);
563 #endif
564 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
565 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
566 code_gen_buffer_max_size = code_gen_buffer_size -
567 (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
568 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
569 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
572 /* Must be called before using the QEMU cpus. 'tb_size' is the size
573 (in bytes) allocated to the translation buffer. Zero means default
574 size. */
575 void cpu_exec_init_all(unsigned long tb_size)
577 cpu_gen_init();
578 code_gen_alloc(tb_size);
579 code_gen_ptr = code_gen_buffer;
580 page_init();
581 #if !defined(CONFIG_USER_ONLY)
582 io_mem_init();
583 #endif
584 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
585 /* There's no guest base to take into account, so go ahead and
586 initialize the prologue now. */
587 tcg_prologue_init(&tcg_ctx);
588 #endif
591 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
593 static int cpu_common_post_load(void *opaque, int version_id)
595 CPUState *env = opaque;
597 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
598 version_id is increased. */
599 env->interrupt_request &= ~0x01;
600 tlb_flush(env, 1);
602 return 0;
605 static const VMStateDescription vmstate_cpu_common = {
606 .name = "cpu_common",
607 .version_id = 1,
608 .minimum_version_id = 1,
609 .minimum_version_id_old = 1,
610 .post_load = cpu_common_post_load,
611 .fields = (VMStateField []) {
612 VMSTATE_UINT32(halted, CPUState),
613 VMSTATE_UINT32(interrupt_request, CPUState),
614 VMSTATE_END_OF_LIST()
617 #endif
619 CPUState *qemu_get_cpu(int cpu)
621 CPUState *env = first_cpu;
623 while (env) {
624 if (env->cpu_index == cpu)
625 break;
626 env = env->next_cpu;
629 return env;
632 void cpu_exec_init(CPUState *env)
634 CPUState **penv;
635 int cpu_index;
637 #if defined(CONFIG_USER_ONLY)
638 cpu_list_lock();
639 #endif
640 env->next_cpu = NULL;
641 penv = &first_cpu;
642 cpu_index = 0;
643 while (*penv != NULL) {
644 penv = &(*penv)->next_cpu;
645 cpu_index++;
647 env->cpu_index = cpu_index;
648 env->numa_node = 0;
649 QTAILQ_INIT(&env->breakpoints);
650 QTAILQ_INIT(&env->watchpoints);
651 #ifndef CONFIG_USER_ONLY
652 env->thread_id = qemu_get_thread_id();
653 #endif
654 *penv = env;
655 #if defined(CONFIG_USER_ONLY)
656 cpu_list_unlock();
657 #endif
658 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
659 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
660 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
661 cpu_save, cpu_load, env);
662 #endif
665 /* Allocate a new translation block. Flush the translation buffer if
666 too many translation blocks or too much generated code. */
667 static TranslationBlock *tb_alloc(target_ulong pc)
669 TranslationBlock *tb;
671 if (nb_tbs >= code_gen_max_blocks ||
672 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
673 return NULL;
674 tb = &tbs[nb_tbs++];
675 tb->pc = pc;
676 tb->cflags = 0;
677 return tb;
680 void tb_free(TranslationBlock *tb)
682 /* In practice this is mostly used for single use temporary TB
683 Ignore the hard cases and just back up if this TB happens to
684 be the last one generated. */
685 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
686 code_gen_ptr = tb->tc_ptr;
687 nb_tbs--;
691 static inline void invalidate_page_bitmap(PageDesc *p)
693 if (p->code_bitmap) {
694 qemu_free(p->code_bitmap);
695 p->code_bitmap = NULL;
697 p->code_write_count = 0;
700 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
702 static void page_flush_tb_1 (int level, void **lp)
704 int i;
706 if (*lp == NULL) {
707 return;
709 if (level == 0) {
710 PageDesc *pd = *lp;
711 for (i = 0; i < L2_SIZE; ++i) {
712 pd[i].first_tb = NULL;
713 invalidate_page_bitmap(pd + i);
715 } else {
716 void **pp = *lp;
717 for (i = 0; i < L2_SIZE; ++i) {
718 page_flush_tb_1 (level - 1, pp + i);
723 static void page_flush_tb(void)
725 int i;
726 for (i = 0; i < V_L1_SIZE; i++) {
727 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
731 /* flush all the translation blocks */
732 /* XXX: tb_flush is currently not thread safe */
733 void tb_flush(CPUState *env1)
735 CPUState *env;
736 #if defined(DEBUG_FLUSH)
737 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
738 (unsigned long)(code_gen_ptr - code_gen_buffer),
739 nb_tbs, nb_tbs > 0 ?
740 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
741 #endif
742 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
743 cpu_abort(env1, "Internal error: code buffer overflow\n");
745 nb_tbs = 0;
747 for(env = first_cpu; env != NULL; env = env->next_cpu) {
748 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
751 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
752 page_flush_tb();
754 code_gen_ptr = code_gen_buffer;
755 /* XXX: flush processor icache at this point if cache flush is
756 expensive */
757 tb_flush_count++;
760 #ifdef DEBUG_TB_CHECK
762 static void tb_invalidate_check(target_ulong address)
764 TranslationBlock *tb;
765 int i;
766 address &= TARGET_PAGE_MASK;
767 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
768 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
769 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
770 address >= tb->pc + tb->size)) {
771 printf("ERROR invalidate: address=" TARGET_FMT_lx
772 " PC=%08lx size=%04x\n",
773 address, (long)tb->pc, tb->size);
779 /* verify that all the pages have correct rights for code */
780 static void tb_page_check(void)
782 TranslationBlock *tb;
783 int i, flags1, flags2;
785 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
786 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
787 flags1 = page_get_flags(tb->pc);
788 flags2 = page_get_flags(tb->pc + tb->size - 1);
789 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
790 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
791 (long)tb->pc, tb->size, flags1, flags2);
797 #endif
799 /* invalidate one TB */
800 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
801 int next_offset)
803 TranslationBlock *tb1;
804 for(;;) {
805 tb1 = *ptb;
806 if (tb1 == tb) {
807 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
808 break;
810 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
814 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
816 TranslationBlock *tb1;
817 unsigned int n1;
819 for(;;) {
820 tb1 = *ptb;
821 n1 = (long)tb1 & 3;
822 tb1 = (TranslationBlock *)((long)tb1 & ~3);
823 if (tb1 == tb) {
824 *ptb = tb1->page_next[n1];
825 break;
827 ptb = &tb1->page_next[n1];
831 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
833 TranslationBlock *tb1, **ptb;
834 unsigned int n1;
836 ptb = &tb->jmp_next[n];
837 tb1 = *ptb;
838 if (tb1) {
839 /* find tb(n) in circular list */
840 for(;;) {
841 tb1 = *ptb;
842 n1 = (long)tb1 & 3;
843 tb1 = (TranslationBlock *)((long)tb1 & ~3);
844 if (n1 == n && tb1 == tb)
845 break;
846 if (n1 == 2) {
847 ptb = &tb1->jmp_first;
848 } else {
849 ptb = &tb1->jmp_next[n1];
852 /* now we can suppress tb(n) from the list */
853 *ptb = tb->jmp_next[n];
855 tb->jmp_next[n] = NULL;
859 /* reset the jump entry 'n' of a TB so that it is not chained to
860 another TB */
861 static inline void tb_reset_jump(TranslationBlock *tb, int n)
863 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
866 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
868 CPUState *env;
869 PageDesc *p;
870 unsigned int h, n1;
871 tb_page_addr_t phys_pc;
872 TranslationBlock *tb1, *tb2;
874 /* remove the TB from the hash list */
875 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
876 h = tb_phys_hash_func(phys_pc);
877 tb_remove(&tb_phys_hash[h], tb,
878 offsetof(TranslationBlock, phys_hash_next));
880 /* remove the TB from the page list */
881 if (tb->page_addr[0] != page_addr) {
882 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
883 tb_page_remove(&p->first_tb, tb);
884 invalidate_page_bitmap(p);
886 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
887 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
888 tb_page_remove(&p->first_tb, tb);
889 invalidate_page_bitmap(p);
892 tb_invalidated_flag = 1;
894 /* remove the TB from the hash list */
895 h = tb_jmp_cache_hash_func(tb->pc);
896 for(env = first_cpu; env != NULL; env = env->next_cpu) {
897 if (env->tb_jmp_cache[h] == tb)
898 env->tb_jmp_cache[h] = NULL;
901 /* suppress this TB from the two jump lists */
902 tb_jmp_remove(tb, 0);
903 tb_jmp_remove(tb, 1);
905 /* suppress any remaining jumps to this TB */
906 tb1 = tb->jmp_first;
907 for(;;) {
908 n1 = (long)tb1 & 3;
909 if (n1 == 2)
910 break;
911 tb1 = (TranslationBlock *)((long)tb1 & ~3);
912 tb2 = tb1->jmp_next[n1];
913 tb_reset_jump(tb1, n1);
914 tb1->jmp_next[n1] = NULL;
915 tb1 = tb2;
917 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
919 tb_phys_invalidate_count++;
922 static inline void set_bits(uint8_t *tab, int start, int len)
924 int end, mask, end1;
926 end = start + len;
927 tab += start >> 3;
928 mask = 0xff << (start & 7);
929 if ((start & ~7) == (end & ~7)) {
930 if (start < end) {
931 mask &= ~(0xff << (end & 7));
932 *tab |= mask;
934 } else {
935 *tab++ |= mask;
936 start = (start + 8) & ~7;
937 end1 = end & ~7;
938 while (start < end1) {
939 *tab++ = 0xff;
940 start += 8;
942 if (start < end) {
943 mask = ~(0xff << (end & 7));
944 *tab |= mask;
949 static void build_page_bitmap(PageDesc *p)
951 int n, tb_start, tb_end;
952 TranslationBlock *tb;
954 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
956 tb = p->first_tb;
957 while (tb != NULL) {
958 n = (long)tb & 3;
959 tb = (TranslationBlock *)((long)tb & ~3);
960 /* NOTE: this is subtle as a TB may span two physical pages */
961 if (n == 0) {
962 /* NOTE: tb_end may be after the end of the page, but
963 it is not a problem */
964 tb_start = tb->pc & ~TARGET_PAGE_MASK;
965 tb_end = tb_start + tb->size;
966 if (tb_end > TARGET_PAGE_SIZE)
967 tb_end = TARGET_PAGE_SIZE;
968 } else {
969 tb_start = 0;
970 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
972 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
973 tb = tb->page_next[n];
977 TranslationBlock *tb_gen_code(CPUState *env,
978 target_ulong pc, target_ulong cs_base,
979 int flags, int cflags)
981 TranslationBlock *tb;
982 uint8_t *tc_ptr;
983 tb_page_addr_t phys_pc, phys_page2;
984 target_ulong virt_page2;
985 int code_gen_size;
987 phys_pc = get_page_addr_code(env, pc);
988 tb = tb_alloc(pc);
989 if (!tb) {
990 /* flush must be done */
991 tb_flush(env);
992 /* cannot fail at this point */
993 tb = tb_alloc(pc);
994 /* Don't forget to invalidate previous TB info. */
995 tb_invalidated_flag = 1;
997 tc_ptr = code_gen_ptr;
998 tb->tc_ptr = tc_ptr;
999 tb->cs_base = cs_base;
1000 tb->flags = flags;
1001 tb->cflags = cflags;
1002 cpu_gen_code(env, tb, &code_gen_size);
1003 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1005 /* check next page if needed */
1006 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1007 phys_page2 = -1;
1008 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1009 phys_page2 = get_page_addr_code(env, virt_page2);
1011 tb_link_page(tb, phys_pc, phys_page2);
1012 return tb;
1015 /* invalidate all TBs which intersect with the target physical page
1016 starting in range [start;end[. NOTE: start and end must refer to
1017 the same physical page. 'is_cpu_write_access' should be true if called
1018 from a real cpu write access: the virtual CPU will exit the current
1019 TB if code is modified inside this TB. */
1020 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1021 int is_cpu_write_access)
1023 TranslationBlock *tb, *tb_next, *saved_tb;
1024 CPUState *env = cpu_single_env;
1025 tb_page_addr_t tb_start, tb_end;
1026 PageDesc *p;
1027 int n;
1028 #ifdef TARGET_HAS_PRECISE_SMC
1029 int current_tb_not_found = is_cpu_write_access;
1030 TranslationBlock *current_tb = NULL;
1031 int current_tb_modified = 0;
1032 target_ulong current_pc = 0;
1033 target_ulong current_cs_base = 0;
1034 int current_flags = 0;
1035 #endif /* TARGET_HAS_PRECISE_SMC */
1037 p = page_find(start >> TARGET_PAGE_BITS);
1038 if (!p)
1039 return;
1040 if (!p->code_bitmap &&
1041 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1042 is_cpu_write_access) {
1043 /* build code bitmap */
1044 build_page_bitmap(p);
1047 /* we remove all the TBs in the range [start, end[ */
1048 /* XXX: see if in some cases it could be faster to invalidate all the code */
1049 tb = p->first_tb;
1050 while (tb != NULL) {
1051 n = (long)tb & 3;
1052 tb = (TranslationBlock *)((long)tb & ~3);
1053 tb_next = tb->page_next[n];
1054 /* NOTE: this is subtle as a TB may span two physical pages */
1055 if (n == 0) {
1056 /* NOTE: tb_end may be after the end of the page, but
1057 it is not a problem */
1058 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1059 tb_end = tb_start + tb->size;
1060 } else {
1061 tb_start = tb->page_addr[1];
1062 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1064 if (!(tb_end <= start || tb_start >= end)) {
1065 #ifdef TARGET_HAS_PRECISE_SMC
1066 if (current_tb_not_found) {
1067 current_tb_not_found = 0;
1068 current_tb = NULL;
1069 if (env->mem_io_pc) {
1070 /* now we have a real cpu fault */
1071 current_tb = tb_find_pc(env->mem_io_pc);
1074 if (current_tb == tb &&
1075 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1076 /* If we are modifying the current TB, we must stop
1077 its execution. We could be more precise by checking
1078 that the modification is after the current PC, but it
1079 would require a specialized function to partially
1080 restore the CPU state */
1082 current_tb_modified = 1;
1083 cpu_restore_state(current_tb, env, env->mem_io_pc);
1084 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1085 &current_flags);
1087 #endif /* TARGET_HAS_PRECISE_SMC */
1088 /* we need to do that to handle the case where a signal
1089 occurs while doing tb_phys_invalidate() */
1090 saved_tb = NULL;
1091 if (env) {
1092 saved_tb = env->current_tb;
1093 env->current_tb = NULL;
1095 tb_phys_invalidate(tb, -1);
1096 if (env) {
1097 env->current_tb = saved_tb;
1098 if (env->interrupt_request && env->current_tb)
1099 cpu_interrupt(env, env->interrupt_request);
1102 tb = tb_next;
1104 #if !defined(CONFIG_USER_ONLY)
1105 /* if no code remaining, no need to continue to use slow writes */
1106 if (!p->first_tb) {
1107 invalidate_page_bitmap(p);
1108 if (is_cpu_write_access) {
1109 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1112 #endif
1113 #ifdef TARGET_HAS_PRECISE_SMC
1114 if (current_tb_modified) {
1115 /* we generate a block containing just the instruction
1116 modifying the memory. It will ensure that it cannot modify
1117 itself */
1118 env->current_tb = NULL;
1119 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1120 cpu_resume_from_signal(env, NULL);
1122 #endif
1125 /* len must be <= 8 and start must be a multiple of len */
1126 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1128 PageDesc *p;
1129 int offset, b;
1130 #if 0
1131 if (1) {
1132 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1133 cpu_single_env->mem_io_vaddr, len,
1134 cpu_single_env->eip,
1135 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1137 #endif
1138 p = page_find(start >> TARGET_PAGE_BITS);
1139 if (!p)
1140 return;
1141 if (p->code_bitmap) {
1142 offset = start & ~TARGET_PAGE_MASK;
1143 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1144 if (b & ((1 << len) - 1))
1145 goto do_invalidate;
1146 } else {
1147 do_invalidate:
1148 tb_invalidate_phys_page_range(start, start + len, 1);
1152 #if !defined(CONFIG_SOFTMMU)
1153 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1154 unsigned long pc, void *puc)
1156 TranslationBlock *tb;
1157 PageDesc *p;
1158 int n;
1159 #ifdef TARGET_HAS_PRECISE_SMC
1160 TranslationBlock *current_tb = NULL;
1161 CPUState *env = cpu_single_env;
1162 int current_tb_modified = 0;
1163 target_ulong current_pc = 0;
1164 target_ulong current_cs_base = 0;
1165 int current_flags = 0;
1166 #endif
1168 addr &= TARGET_PAGE_MASK;
1169 p = page_find(addr >> TARGET_PAGE_BITS);
1170 if (!p)
1171 return;
1172 tb = p->first_tb;
1173 #ifdef TARGET_HAS_PRECISE_SMC
1174 if (tb && pc != 0) {
1175 current_tb = tb_find_pc(pc);
1177 #endif
1178 while (tb != NULL) {
1179 n = (long)tb & 3;
1180 tb = (TranslationBlock *)((long)tb & ~3);
1181 #ifdef TARGET_HAS_PRECISE_SMC
1182 if (current_tb == tb &&
1183 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1184 /* If we are modifying the current TB, we must stop
1185 its execution. We could be more precise by checking
1186 that the modification is after the current PC, but it
1187 would require a specialized function to partially
1188 restore the CPU state */
1190 current_tb_modified = 1;
1191 cpu_restore_state(current_tb, env, pc);
1192 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1193 &current_flags);
1195 #endif /* TARGET_HAS_PRECISE_SMC */
1196 tb_phys_invalidate(tb, addr);
1197 tb = tb->page_next[n];
1199 p->first_tb = NULL;
1200 #ifdef TARGET_HAS_PRECISE_SMC
1201 if (current_tb_modified) {
1202 /* we generate a block containing just the instruction
1203 modifying the memory. It will ensure that it cannot modify
1204 itself */
1205 env->current_tb = NULL;
1206 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1207 cpu_resume_from_signal(env, puc);
1209 #endif
1211 #endif
1213 /* add the tb in the target page and protect it if necessary */
1214 static inline void tb_alloc_page(TranslationBlock *tb,
1215 unsigned int n, tb_page_addr_t page_addr)
1217 PageDesc *p;
1218 #ifndef CONFIG_USER_ONLY
1219 bool page_already_protected;
1220 #endif
1222 tb->page_addr[n] = page_addr;
1223 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1224 tb->page_next[n] = p->first_tb;
1225 #ifndef CONFIG_USER_ONLY
1226 page_already_protected = p->first_tb != NULL;
1227 #endif
1228 p->first_tb = (TranslationBlock *)((long)tb | n);
1229 invalidate_page_bitmap(p);
1231 #if defined(TARGET_HAS_SMC) || 1
1233 #if defined(CONFIG_USER_ONLY)
1234 if (p->flags & PAGE_WRITE) {
1235 target_ulong addr;
1236 PageDesc *p2;
1237 int prot;
1239 /* force the host page as non writable (writes will have a
1240 page fault + mprotect overhead) */
1241 page_addr &= qemu_host_page_mask;
1242 prot = 0;
1243 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1244 addr += TARGET_PAGE_SIZE) {
1246 p2 = page_find (addr >> TARGET_PAGE_BITS);
1247 if (!p2)
1248 continue;
1249 prot |= p2->flags;
1250 p2->flags &= ~PAGE_WRITE;
1252 mprotect(g2h(page_addr), qemu_host_page_size,
1253 (prot & PAGE_BITS) & ~PAGE_WRITE);
1254 #ifdef DEBUG_TB_INVALIDATE
1255 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1256 page_addr);
1257 #endif
1259 #else
1260 /* if some code is already present, then the pages are already
1261 protected. So we handle the case where only the first TB is
1262 allocated in a physical page */
1263 if (!page_already_protected) {
1264 tlb_protect_code(page_addr);
1266 #endif
1268 #endif /* TARGET_HAS_SMC */
1271 /* add a new TB and link it to the physical page tables. phys_page2 is
1272 (-1) to indicate that only one page contains the TB. */
1273 void tb_link_page(TranslationBlock *tb,
1274 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1276 unsigned int h;
1277 TranslationBlock **ptb;
1279 /* Grab the mmap lock to stop another thread invalidating this TB
1280 before we are done. */
1281 mmap_lock();
1282 /* add in the physical hash table */
1283 h = tb_phys_hash_func(phys_pc);
1284 ptb = &tb_phys_hash[h];
1285 tb->phys_hash_next = *ptb;
1286 *ptb = tb;
1288 /* add in the page list */
1289 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1290 if (phys_page2 != -1)
1291 tb_alloc_page(tb, 1, phys_page2);
1292 else
1293 tb->page_addr[1] = -1;
1295 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1296 tb->jmp_next[0] = NULL;
1297 tb->jmp_next[1] = NULL;
1299 /* init original jump addresses */
1300 if (tb->tb_next_offset[0] != 0xffff)
1301 tb_reset_jump(tb, 0);
1302 if (tb->tb_next_offset[1] != 0xffff)
1303 tb_reset_jump(tb, 1);
1305 #ifdef DEBUG_TB_CHECK
1306 tb_page_check();
1307 #endif
1308 mmap_unlock();
1311 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1312 tb[1].tc_ptr. Return NULL if not found */
1313 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1315 int m_min, m_max, m;
1316 unsigned long v;
1317 TranslationBlock *tb;
1319 if (nb_tbs <= 0)
1320 return NULL;
1321 if (tc_ptr < (unsigned long)code_gen_buffer ||
1322 tc_ptr >= (unsigned long)code_gen_ptr)
1323 return NULL;
1324 /* binary search (cf Knuth) */
1325 m_min = 0;
1326 m_max = nb_tbs - 1;
1327 while (m_min <= m_max) {
1328 m = (m_min + m_max) >> 1;
1329 tb = &tbs[m];
1330 v = (unsigned long)tb->tc_ptr;
1331 if (v == tc_ptr)
1332 return tb;
1333 else if (tc_ptr < v) {
1334 m_max = m - 1;
1335 } else {
1336 m_min = m + 1;
1339 return &tbs[m_max];
1342 static void tb_reset_jump_recursive(TranslationBlock *tb);
1344 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1346 TranslationBlock *tb1, *tb_next, **ptb;
1347 unsigned int n1;
1349 tb1 = tb->jmp_next[n];
1350 if (tb1 != NULL) {
1351 /* find head of list */
1352 for(;;) {
1353 n1 = (long)tb1 & 3;
1354 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1355 if (n1 == 2)
1356 break;
1357 tb1 = tb1->jmp_next[n1];
1359 /* we are now sure now that tb jumps to tb1 */
1360 tb_next = tb1;
1362 /* remove tb from the jmp_first list */
1363 ptb = &tb_next->jmp_first;
1364 for(;;) {
1365 tb1 = *ptb;
1366 n1 = (long)tb1 & 3;
1367 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1368 if (n1 == n && tb1 == tb)
1369 break;
1370 ptb = &tb1->jmp_next[n1];
1372 *ptb = tb->jmp_next[n];
1373 tb->jmp_next[n] = NULL;
1375 /* suppress the jump to next tb in generated code */
1376 tb_reset_jump(tb, n);
1378 /* suppress jumps in the tb on which we could have jumped */
1379 tb_reset_jump_recursive(tb_next);
1383 static void tb_reset_jump_recursive(TranslationBlock *tb)
1385 tb_reset_jump_recursive2(tb, 0);
1386 tb_reset_jump_recursive2(tb, 1);
1389 #if defined(TARGET_HAS_ICE)
1390 #if defined(CONFIG_USER_ONLY)
1391 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1393 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1395 #else
1396 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1398 target_phys_addr_t addr;
1399 target_ulong pd;
1400 ram_addr_t ram_addr;
1401 PhysPageDesc *p;
1403 addr = cpu_get_phys_page_debug(env, pc);
1404 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1405 if (!p) {
1406 pd = IO_MEM_UNASSIGNED;
1407 } else {
1408 pd = p->phys_offset;
1410 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1411 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1413 #endif
1414 #endif /* TARGET_HAS_ICE */
1416 #if defined(CONFIG_USER_ONLY)
1417 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1422 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1423 int flags, CPUWatchpoint **watchpoint)
1425 return -ENOSYS;
1427 #else
1428 /* Add a watchpoint. */
1429 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1430 int flags, CPUWatchpoint **watchpoint)
1432 target_ulong len_mask = ~(len - 1);
1433 CPUWatchpoint *wp;
1435 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1436 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1437 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1438 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1439 return -EINVAL;
1441 wp = qemu_malloc(sizeof(*wp));
1443 wp->vaddr = addr;
1444 wp->len_mask = len_mask;
1445 wp->flags = flags;
1447 /* keep all GDB-injected watchpoints in front */
1448 if (flags & BP_GDB)
1449 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1450 else
1451 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1453 tlb_flush_page(env, addr);
1455 if (watchpoint)
1456 *watchpoint = wp;
1457 return 0;
1460 /* Remove a specific watchpoint. */
1461 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1462 int flags)
1464 target_ulong len_mask = ~(len - 1);
1465 CPUWatchpoint *wp;
1467 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1468 if (addr == wp->vaddr && len_mask == wp->len_mask
1469 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1470 cpu_watchpoint_remove_by_ref(env, wp);
1471 return 0;
1474 return -ENOENT;
1477 /* Remove a specific watchpoint by reference. */
1478 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1480 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1482 tlb_flush_page(env, watchpoint->vaddr);
1484 qemu_free(watchpoint);
1487 /* Remove all matching watchpoints. */
1488 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1490 CPUWatchpoint *wp, *next;
1492 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1493 if (wp->flags & mask)
1494 cpu_watchpoint_remove_by_ref(env, wp);
1497 #endif
1499 /* Add a breakpoint. */
1500 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1501 CPUBreakpoint **breakpoint)
1503 #if defined(TARGET_HAS_ICE)
1504 CPUBreakpoint *bp;
1506 bp = qemu_malloc(sizeof(*bp));
1508 bp->pc = pc;
1509 bp->flags = flags;
1511 /* keep all GDB-injected breakpoints in front */
1512 if (flags & BP_GDB)
1513 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1514 else
1515 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1517 breakpoint_invalidate(env, pc);
1519 if (breakpoint)
1520 *breakpoint = bp;
1521 return 0;
1522 #else
1523 return -ENOSYS;
1524 #endif
1527 /* Remove a specific breakpoint. */
1528 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1530 #if defined(TARGET_HAS_ICE)
1531 CPUBreakpoint *bp;
1533 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1534 if (bp->pc == pc && bp->flags == flags) {
1535 cpu_breakpoint_remove_by_ref(env, bp);
1536 return 0;
1539 return -ENOENT;
1540 #else
1541 return -ENOSYS;
1542 #endif
1545 /* Remove a specific breakpoint by reference. */
1546 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1548 #if defined(TARGET_HAS_ICE)
1549 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1551 breakpoint_invalidate(env, breakpoint->pc);
1553 qemu_free(breakpoint);
1554 #endif
1557 /* Remove all matching breakpoints. */
1558 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1560 #if defined(TARGET_HAS_ICE)
1561 CPUBreakpoint *bp, *next;
1563 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1564 if (bp->flags & mask)
1565 cpu_breakpoint_remove_by_ref(env, bp);
1567 #endif
1570 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1571 CPU loop after each instruction */
1572 void cpu_single_step(CPUState *env, int enabled)
1574 #if defined(TARGET_HAS_ICE)
1575 if (env->singlestep_enabled != enabled) {
1576 env->singlestep_enabled = enabled;
1577 if (kvm_enabled())
1578 kvm_update_guest_debug(env, 0);
1579 else {
1580 /* must flush all the translated code to avoid inconsistencies */
1581 /* XXX: only flush what is necessary */
1582 tb_flush(env);
1585 #endif
1588 /* enable or disable low levels log */
1589 void cpu_set_log(int log_flags)
1591 loglevel = log_flags;
1592 if (loglevel && !logfile) {
1593 logfile = fopen(logfilename, log_append ? "a" : "w");
1594 if (!logfile) {
1595 perror(logfilename);
1596 _exit(1);
1598 #if !defined(CONFIG_SOFTMMU)
1599 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1601 static char logfile_buf[4096];
1602 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1604 #elif !defined(_WIN32)
1605 /* Win32 doesn't support line-buffering and requires size >= 2 */
1606 setvbuf(logfile, NULL, _IOLBF, 0);
1607 #endif
1608 log_append = 1;
1610 if (!loglevel && logfile) {
1611 fclose(logfile);
1612 logfile = NULL;
1616 void cpu_set_log_filename(const char *filename)
1618 logfilename = strdup(filename);
1619 if (logfile) {
1620 fclose(logfile);
1621 logfile = NULL;
1623 cpu_set_log(loglevel);
1626 static void cpu_unlink_tb(CPUState *env)
1628 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1629 problem and hope the cpu will stop of its own accord. For userspace
1630 emulation this often isn't actually as bad as it sounds. Often
1631 signals are used primarily to interrupt blocking syscalls. */
1632 TranslationBlock *tb;
1633 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1635 spin_lock(&interrupt_lock);
1636 tb = env->current_tb;
1637 /* if the cpu is currently executing code, we must unlink it and
1638 all the potentially executing TB */
1639 if (tb) {
1640 env->current_tb = NULL;
1641 tb_reset_jump_recursive(tb);
1643 spin_unlock(&interrupt_lock);
1646 #ifndef CONFIG_USER_ONLY
1647 /* mask must never be zero, except for A20 change call */
1648 static void tcg_handle_interrupt(CPUState *env, int mask)
1650 int old_mask;
1652 old_mask = env->interrupt_request;
1653 env->interrupt_request |= mask;
1656 * If called from iothread context, wake the target cpu in
1657 * case its halted.
1659 if (!qemu_cpu_is_self(env)) {
1660 qemu_cpu_kick(env);
1661 return;
1664 if (use_icount) {
1665 env->icount_decr.u16.high = 0xffff;
1666 if (!can_do_io(env)
1667 && (mask & ~old_mask) != 0) {
1668 cpu_abort(env, "Raised interrupt while not in I/O function");
1670 } else {
1671 cpu_unlink_tb(env);
1675 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1677 #else /* CONFIG_USER_ONLY */
1679 void cpu_interrupt(CPUState *env, int mask)
1681 env->interrupt_request |= mask;
1682 cpu_unlink_tb(env);
1684 #endif /* CONFIG_USER_ONLY */
1686 void cpu_reset_interrupt(CPUState *env, int mask)
1688 env->interrupt_request &= ~mask;
1691 void cpu_exit(CPUState *env)
1693 env->exit_request = 1;
1694 cpu_unlink_tb(env);
1697 const CPULogItem cpu_log_items[] = {
1698 { CPU_LOG_TB_OUT_ASM, "out_asm",
1699 "show generated host assembly code for each compiled TB" },
1700 { CPU_LOG_TB_IN_ASM, "in_asm",
1701 "show target assembly code for each compiled TB" },
1702 { CPU_LOG_TB_OP, "op",
1703 "show micro ops for each compiled TB" },
1704 { CPU_LOG_TB_OP_OPT, "op_opt",
1705 "show micro ops "
1706 #ifdef TARGET_I386
1707 "before eflags optimization and "
1708 #endif
1709 "after liveness analysis" },
1710 { CPU_LOG_INT, "int",
1711 "show interrupts/exceptions in short format" },
1712 { CPU_LOG_EXEC, "exec",
1713 "show trace before each executed TB (lots of logs)" },
1714 { CPU_LOG_TB_CPU, "cpu",
1715 "show CPU state before block translation" },
1716 #ifdef TARGET_I386
1717 { CPU_LOG_PCALL, "pcall",
1718 "show protected mode far calls/returns/exceptions" },
1719 { CPU_LOG_RESET, "cpu_reset",
1720 "show CPU state before CPU resets" },
1721 #endif
1722 #ifdef DEBUG_IOPORT
1723 { CPU_LOG_IOPORT, "ioport",
1724 "show all i/o ports accesses" },
1725 #endif
1726 { 0, NULL, NULL },
1729 #ifndef CONFIG_USER_ONLY
1730 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1731 = QLIST_HEAD_INITIALIZER(memory_client_list);
1733 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1734 ram_addr_t size,
1735 ram_addr_t phys_offset,
1736 bool log_dirty)
1738 CPUPhysMemoryClient *client;
1739 QLIST_FOREACH(client, &memory_client_list, list) {
1740 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1744 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1745 target_phys_addr_t end)
1747 CPUPhysMemoryClient *client;
1748 QLIST_FOREACH(client, &memory_client_list, list) {
1749 int r = client->sync_dirty_bitmap(client, start, end);
1750 if (r < 0)
1751 return r;
1753 return 0;
1756 static int cpu_notify_migration_log(int enable)
1758 CPUPhysMemoryClient *client;
1759 QLIST_FOREACH(client, &memory_client_list, list) {
1760 int r = client->migration_log(client, enable);
1761 if (r < 0)
1762 return r;
1764 return 0;
1767 struct last_map {
1768 target_phys_addr_t start_addr;
1769 ram_addr_t size;
1770 ram_addr_t phys_offset;
1773 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1774 * address. Each intermediate table provides the next L2_BITs of guest
1775 * physical address space. The number of levels vary based on host and
1776 * guest configuration, making it efficient to build the final guest
1777 * physical address by seeding the L1 offset and shifting and adding in
1778 * each L2 offset as we recurse through them. */
1779 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1780 void **lp, target_phys_addr_t addr,
1781 struct last_map *map)
1783 int i;
1785 if (*lp == NULL) {
1786 return;
1788 if (level == 0) {
1789 PhysPageDesc *pd = *lp;
1790 addr <<= L2_BITS + TARGET_PAGE_BITS;
1791 for (i = 0; i < L2_SIZE; ++i) {
1792 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1793 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1795 if (map->size &&
1796 start_addr == map->start_addr + map->size &&
1797 pd[i].phys_offset == map->phys_offset + map->size) {
1799 map->size += TARGET_PAGE_SIZE;
1800 continue;
1801 } else if (map->size) {
1802 client->set_memory(client, map->start_addr,
1803 map->size, map->phys_offset, false);
1806 map->start_addr = start_addr;
1807 map->size = TARGET_PAGE_SIZE;
1808 map->phys_offset = pd[i].phys_offset;
1811 } else {
1812 void **pp = *lp;
1813 for (i = 0; i < L2_SIZE; ++i) {
1814 phys_page_for_each_1(client, level - 1, pp + i,
1815 (addr << L2_BITS) | i, map);
1820 static void phys_page_for_each(CPUPhysMemoryClient *client)
1822 int i;
1823 struct last_map map = { };
1825 for (i = 0; i < P_L1_SIZE; ++i) {
1826 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1827 l1_phys_map + i, i, &map);
1829 if (map.size) {
1830 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1831 false);
1835 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1837 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1838 phys_page_for_each(client);
1841 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1843 QLIST_REMOVE(client, list);
1845 #endif
1847 static int cmp1(const char *s1, int n, const char *s2)
1849 if (strlen(s2) != n)
1850 return 0;
1851 return memcmp(s1, s2, n) == 0;
1854 /* takes a comma separated list of log masks. Return 0 if error. */
1855 int cpu_str_to_log_mask(const char *str)
1857 const CPULogItem *item;
1858 int mask;
1859 const char *p, *p1;
1861 p = str;
1862 mask = 0;
1863 for(;;) {
1864 p1 = strchr(p, ',');
1865 if (!p1)
1866 p1 = p + strlen(p);
1867 if(cmp1(p,p1-p,"all")) {
1868 for(item = cpu_log_items; item->mask != 0; item++) {
1869 mask |= item->mask;
1871 } else {
1872 for(item = cpu_log_items; item->mask != 0; item++) {
1873 if (cmp1(p, p1 - p, item->name))
1874 goto found;
1876 return 0;
1878 found:
1879 mask |= item->mask;
1880 if (*p1 != ',')
1881 break;
1882 p = p1 + 1;
1884 return mask;
1887 void cpu_abort(CPUState *env, const char *fmt, ...)
1889 va_list ap;
1890 va_list ap2;
1892 va_start(ap, fmt);
1893 va_copy(ap2, ap);
1894 fprintf(stderr, "qemu: fatal: ");
1895 vfprintf(stderr, fmt, ap);
1896 fprintf(stderr, "\n");
1897 #ifdef TARGET_I386
1898 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1899 #else
1900 cpu_dump_state(env, stderr, fprintf, 0);
1901 #endif
1902 if (qemu_log_enabled()) {
1903 qemu_log("qemu: fatal: ");
1904 qemu_log_vprintf(fmt, ap2);
1905 qemu_log("\n");
1906 #ifdef TARGET_I386
1907 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1908 #else
1909 log_cpu_state(env, 0);
1910 #endif
1911 qemu_log_flush();
1912 qemu_log_close();
1914 va_end(ap2);
1915 va_end(ap);
1916 #if defined(CONFIG_USER_ONLY)
1918 struct sigaction act;
1919 sigfillset(&act.sa_mask);
1920 act.sa_handler = SIG_DFL;
1921 sigaction(SIGABRT, &act, NULL);
1923 #endif
1924 abort();
1927 CPUState *cpu_copy(CPUState *env)
1929 CPUState *new_env = cpu_init(env->cpu_model_str);
1930 CPUState *next_cpu = new_env->next_cpu;
1931 int cpu_index = new_env->cpu_index;
1932 #if defined(TARGET_HAS_ICE)
1933 CPUBreakpoint *bp;
1934 CPUWatchpoint *wp;
1935 #endif
1937 memcpy(new_env, env, sizeof(CPUState));
1939 /* Preserve chaining and index. */
1940 new_env->next_cpu = next_cpu;
1941 new_env->cpu_index = cpu_index;
1943 /* Clone all break/watchpoints.
1944 Note: Once we support ptrace with hw-debug register access, make sure
1945 BP_CPU break/watchpoints are handled correctly on clone. */
1946 QTAILQ_INIT(&env->breakpoints);
1947 QTAILQ_INIT(&env->watchpoints);
1948 #if defined(TARGET_HAS_ICE)
1949 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1950 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1952 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1953 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1954 wp->flags, NULL);
1956 #endif
1958 return new_env;
1961 #if !defined(CONFIG_USER_ONLY)
1963 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1965 unsigned int i;
1967 /* Discard jump cache entries for any tb which might potentially
1968 overlap the flushed page. */
1969 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1970 memset (&env->tb_jmp_cache[i], 0,
1971 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1973 i = tb_jmp_cache_hash_page(addr);
1974 memset (&env->tb_jmp_cache[i], 0,
1975 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1978 static CPUTLBEntry s_cputlb_empty_entry = {
1979 .addr_read = -1,
1980 .addr_write = -1,
1981 .addr_code = -1,
1982 .addend = -1,
1985 /* NOTE: if flush_global is true, also flush global entries (not
1986 implemented yet) */
1987 void tlb_flush(CPUState *env, int flush_global)
1989 int i;
1991 #if defined(DEBUG_TLB)
1992 printf("tlb_flush:\n");
1993 #endif
1994 /* must reset current TB so that interrupts cannot modify the
1995 links while we are modifying them */
1996 env->current_tb = NULL;
1998 for(i = 0; i < CPU_TLB_SIZE; i++) {
1999 int mmu_idx;
2000 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2001 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2005 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2007 env->tlb_flush_addr = -1;
2008 env->tlb_flush_mask = 0;
2009 tlb_flush_count++;
2012 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2014 if (addr == (tlb_entry->addr_read &
2015 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2016 addr == (tlb_entry->addr_write &
2017 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2018 addr == (tlb_entry->addr_code &
2019 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2020 *tlb_entry = s_cputlb_empty_entry;
2024 void tlb_flush_page(CPUState *env, target_ulong addr)
2026 int i;
2027 int mmu_idx;
2029 #if defined(DEBUG_TLB)
2030 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2031 #endif
2032 /* Check if we need to flush due to large pages. */
2033 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2034 #if defined(DEBUG_TLB)
2035 printf("tlb_flush_page: forced full flush ("
2036 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2037 env->tlb_flush_addr, env->tlb_flush_mask);
2038 #endif
2039 tlb_flush(env, 1);
2040 return;
2042 /* must reset current TB so that interrupts cannot modify the
2043 links while we are modifying them */
2044 env->current_tb = NULL;
2046 addr &= TARGET_PAGE_MASK;
2047 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2048 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2049 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2051 tlb_flush_jmp_cache(env, addr);
2054 /* update the TLBs so that writes to code in the virtual page 'addr'
2055 can be detected */
2056 static void tlb_protect_code(ram_addr_t ram_addr)
2058 cpu_physical_memory_reset_dirty(ram_addr,
2059 ram_addr + TARGET_PAGE_SIZE,
2060 CODE_DIRTY_FLAG);
2063 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2064 tested for self modifying code */
2065 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2066 target_ulong vaddr)
2068 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2071 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2072 unsigned long start, unsigned long length)
2074 unsigned long addr;
2075 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2076 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2077 if ((addr - start) < length) {
2078 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2083 /* Note: start and end must be within the same ram block. */
2084 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2085 int dirty_flags)
2087 CPUState *env;
2088 unsigned long length, start1;
2089 int i;
2091 start &= TARGET_PAGE_MASK;
2092 end = TARGET_PAGE_ALIGN(end);
2094 length = end - start;
2095 if (length == 0)
2096 return;
2097 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2099 /* we modify the TLB cache so that the dirty bit will be set again
2100 when accessing the range */
2101 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2102 /* Check that we don't span multiple blocks - this breaks the
2103 address comparisons below. */
2104 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2105 != (end - 1) - start) {
2106 abort();
2109 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2110 int mmu_idx;
2111 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2112 for(i = 0; i < CPU_TLB_SIZE; i++)
2113 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2114 start1, length);
2119 int cpu_physical_memory_set_dirty_tracking(int enable)
2121 int ret = 0;
2122 in_migration = enable;
2123 ret = cpu_notify_migration_log(!!enable);
2124 return ret;
2127 int cpu_physical_memory_get_dirty_tracking(void)
2129 return in_migration;
2132 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2133 target_phys_addr_t end_addr)
2135 int ret;
2137 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2138 return ret;
2141 int cpu_physical_log_start(target_phys_addr_t start_addr,
2142 ram_addr_t size)
2144 CPUPhysMemoryClient *client;
2145 QLIST_FOREACH(client, &memory_client_list, list) {
2146 if (client->log_start) {
2147 int r = client->log_start(client, start_addr, size);
2148 if (r < 0) {
2149 return r;
2153 return 0;
2156 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2157 ram_addr_t size)
2159 CPUPhysMemoryClient *client;
2160 QLIST_FOREACH(client, &memory_client_list, list) {
2161 if (client->log_stop) {
2162 int r = client->log_stop(client, start_addr, size);
2163 if (r < 0) {
2164 return r;
2168 return 0;
2171 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2173 ram_addr_t ram_addr;
2174 void *p;
2176 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2177 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2178 + tlb_entry->addend);
2179 ram_addr = qemu_ram_addr_from_host_nofail(p);
2180 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2181 tlb_entry->addr_write |= TLB_NOTDIRTY;
2186 /* update the TLB according to the current state of the dirty bits */
2187 void cpu_tlb_update_dirty(CPUState *env)
2189 int i;
2190 int mmu_idx;
2191 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2192 for(i = 0; i < CPU_TLB_SIZE; i++)
2193 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2197 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2199 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2200 tlb_entry->addr_write = vaddr;
2203 /* update the TLB corresponding to virtual page vaddr
2204 so that it is no longer dirty */
2205 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2207 int i;
2208 int mmu_idx;
2210 vaddr &= TARGET_PAGE_MASK;
2211 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2212 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2213 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2216 /* Our TLB does not support large pages, so remember the area covered by
2217 large pages and trigger a full TLB flush if these are invalidated. */
2218 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2219 target_ulong size)
2221 target_ulong mask = ~(size - 1);
2223 if (env->tlb_flush_addr == (target_ulong)-1) {
2224 env->tlb_flush_addr = vaddr & mask;
2225 env->tlb_flush_mask = mask;
2226 return;
2228 /* Extend the existing region to include the new page.
2229 This is a compromise between unnecessary flushes and the cost
2230 of maintaining a full variable size TLB. */
2231 mask &= env->tlb_flush_mask;
2232 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2233 mask <<= 1;
2235 env->tlb_flush_addr &= mask;
2236 env->tlb_flush_mask = mask;
2239 /* Add a new TLB entry. At most one entry for a given virtual address
2240 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2241 supplied size is only used by tlb_flush_page. */
2242 void tlb_set_page(CPUState *env, target_ulong vaddr,
2243 target_phys_addr_t paddr, int prot,
2244 int mmu_idx, target_ulong size)
2246 PhysPageDesc *p;
2247 unsigned long pd;
2248 unsigned int index;
2249 target_ulong address;
2250 target_ulong code_address;
2251 unsigned long addend;
2252 CPUTLBEntry *te;
2253 CPUWatchpoint *wp;
2254 target_phys_addr_t iotlb;
2256 assert(size >= TARGET_PAGE_SIZE);
2257 if (size != TARGET_PAGE_SIZE) {
2258 tlb_add_large_page(env, vaddr, size);
2260 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2261 if (!p) {
2262 pd = IO_MEM_UNASSIGNED;
2263 } else {
2264 pd = p->phys_offset;
2266 #if defined(DEBUG_TLB)
2267 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2268 " prot=%x idx=%d pd=0x%08lx\n",
2269 vaddr, paddr, prot, mmu_idx, pd);
2270 #endif
2272 address = vaddr;
2273 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2274 /* IO memory case (romd handled later) */
2275 address |= TLB_MMIO;
2277 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2278 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2279 /* Normal RAM. */
2280 iotlb = pd & TARGET_PAGE_MASK;
2281 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2282 iotlb |= IO_MEM_NOTDIRTY;
2283 else
2284 iotlb |= IO_MEM_ROM;
2285 } else {
2286 /* IO handlers are currently passed a physical address.
2287 It would be nice to pass an offset from the base address
2288 of that region. This would avoid having to special case RAM,
2289 and avoid full address decoding in every device.
2290 We can't use the high bits of pd for this because
2291 IO_MEM_ROMD uses these as a ram address. */
2292 iotlb = (pd & ~TARGET_PAGE_MASK);
2293 if (p) {
2294 iotlb += p->region_offset;
2295 } else {
2296 iotlb += paddr;
2300 code_address = address;
2301 /* Make accesses to pages with watchpoints go via the
2302 watchpoint trap routines. */
2303 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2304 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2305 /* Avoid trapping reads of pages with a write breakpoint. */
2306 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2307 iotlb = io_mem_watch + paddr;
2308 address |= TLB_MMIO;
2309 break;
2314 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2315 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2316 te = &env->tlb_table[mmu_idx][index];
2317 te->addend = addend - vaddr;
2318 if (prot & PAGE_READ) {
2319 te->addr_read = address;
2320 } else {
2321 te->addr_read = -1;
2324 if (prot & PAGE_EXEC) {
2325 te->addr_code = code_address;
2326 } else {
2327 te->addr_code = -1;
2329 if (prot & PAGE_WRITE) {
2330 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2331 (pd & IO_MEM_ROMD)) {
2332 /* Write access calls the I/O callback. */
2333 te->addr_write = address | TLB_MMIO;
2334 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2335 !cpu_physical_memory_is_dirty(pd)) {
2336 te->addr_write = address | TLB_NOTDIRTY;
2337 } else {
2338 te->addr_write = address;
2340 } else {
2341 te->addr_write = -1;
2345 #else
2347 void tlb_flush(CPUState *env, int flush_global)
2351 void tlb_flush_page(CPUState *env, target_ulong addr)
2356 * Walks guest process memory "regions" one by one
2357 * and calls callback function 'fn' for each region.
2360 struct walk_memory_regions_data
2362 walk_memory_regions_fn fn;
2363 void *priv;
2364 unsigned long start;
2365 int prot;
2368 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2369 abi_ulong end, int new_prot)
2371 if (data->start != -1ul) {
2372 int rc = data->fn(data->priv, data->start, end, data->prot);
2373 if (rc != 0) {
2374 return rc;
2378 data->start = (new_prot ? end : -1ul);
2379 data->prot = new_prot;
2381 return 0;
2384 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2385 abi_ulong base, int level, void **lp)
2387 abi_ulong pa;
2388 int i, rc;
2390 if (*lp == NULL) {
2391 return walk_memory_regions_end(data, base, 0);
2394 if (level == 0) {
2395 PageDesc *pd = *lp;
2396 for (i = 0; i < L2_SIZE; ++i) {
2397 int prot = pd[i].flags;
2399 pa = base | (i << TARGET_PAGE_BITS);
2400 if (prot != data->prot) {
2401 rc = walk_memory_regions_end(data, pa, prot);
2402 if (rc != 0) {
2403 return rc;
2407 } else {
2408 void **pp = *lp;
2409 for (i = 0; i < L2_SIZE; ++i) {
2410 pa = base | ((abi_ulong)i <<
2411 (TARGET_PAGE_BITS + L2_BITS * level));
2412 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2413 if (rc != 0) {
2414 return rc;
2419 return 0;
2422 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2424 struct walk_memory_regions_data data;
2425 unsigned long i;
2427 data.fn = fn;
2428 data.priv = priv;
2429 data.start = -1ul;
2430 data.prot = 0;
2432 for (i = 0; i < V_L1_SIZE; i++) {
2433 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2434 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2435 if (rc != 0) {
2436 return rc;
2440 return walk_memory_regions_end(&data, 0, 0);
2443 static int dump_region(void *priv, abi_ulong start,
2444 abi_ulong end, unsigned long prot)
2446 FILE *f = (FILE *)priv;
2448 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2449 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2450 start, end, end - start,
2451 ((prot & PAGE_READ) ? 'r' : '-'),
2452 ((prot & PAGE_WRITE) ? 'w' : '-'),
2453 ((prot & PAGE_EXEC) ? 'x' : '-'));
2455 return (0);
2458 /* dump memory mappings */
2459 void page_dump(FILE *f)
2461 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2462 "start", "end", "size", "prot");
2463 walk_memory_regions(f, dump_region);
2466 int page_get_flags(target_ulong address)
2468 PageDesc *p;
2470 p = page_find(address >> TARGET_PAGE_BITS);
2471 if (!p)
2472 return 0;
2473 return p->flags;
2476 /* Modify the flags of a page and invalidate the code if necessary.
2477 The flag PAGE_WRITE_ORG is positioned automatically depending
2478 on PAGE_WRITE. The mmap_lock should already be held. */
2479 void page_set_flags(target_ulong start, target_ulong end, int flags)
2481 target_ulong addr, len;
2483 /* This function should never be called with addresses outside the
2484 guest address space. If this assert fires, it probably indicates
2485 a missing call to h2g_valid. */
2486 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2487 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2488 #endif
2489 assert(start < end);
2491 start = start & TARGET_PAGE_MASK;
2492 end = TARGET_PAGE_ALIGN(end);
2494 if (flags & PAGE_WRITE) {
2495 flags |= PAGE_WRITE_ORG;
2498 for (addr = start, len = end - start;
2499 len != 0;
2500 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2501 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2503 /* If the write protection bit is set, then we invalidate
2504 the code inside. */
2505 if (!(p->flags & PAGE_WRITE) &&
2506 (flags & PAGE_WRITE) &&
2507 p->first_tb) {
2508 tb_invalidate_phys_page(addr, 0, NULL);
2510 p->flags = flags;
2514 int page_check_range(target_ulong start, target_ulong len, int flags)
2516 PageDesc *p;
2517 target_ulong end;
2518 target_ulong addr;
2520 /* This function should never be called with addresses outside the
2521 guest address space. If this assert fires, it probably indicates
2522 a missing call to h2g_valid. */
2523 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2524 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2525 #endif
2527 if (len == 0) {
2528 return 0;
2530 if (start + len - 1 < start) {
2531 /* We've wrapped around. */
2532 return -1;
2535 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2536 start = start & TARGET_PAGE_MASK;
2538 for (addr = start, len = end - start;
2539 len != 0;
2540 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2541 p = page_find(addr >> TARGET_PAGE_BITS);
2542 if( !p )
2543 return -1;
2544 if( !(p->flags & PAGE_VALID) )
2545 return -1;
2547 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2548 return -1;
2549 if (flags & PAGE_WRITE) {
2550 if (!(p->flags & PAGE_WRITE_ORG))
2551 return -1;
2552 /* unprotect the page if it was put read-only because it
2553 contains translated code */
2554 if (!(p->flags & PAGE_WRITE)) {
2555 if (!page_unprotect(addr, 0, NULL))
2556 return -1;
2558 return 0;
2561 return 0;
2564 /* called from signal handler: invalidate the code and unprotect the
2565 page. Return TRUE if the fault was successfully handled. */
2566 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2568 unsigned int prot;
2569 PageDesc *p;
2570 target_ulong host_start, host_end, addr;
2572 /* Technically this isn't safe inside a signal handler. However we
2573 know this only ever happens in a synchronous SEGV handler, so in
2574 practice it seems to be ok. */
2575 mmap_lock();
2577 p = page_find(address >> TARGET_PAGE_BITS);
2578 if (!p) {
2579 mmap_unlock();
2580 return 0;
2583 /* if the page was really writable, then we change its
2584 protection back to writable */
2585 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2586 host_start = address & qemu_host_page_mask;
2587 host_end = host_start + qemu_host_page_size;
2589 prot = 0;
2590 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2591 p = page_find(addr >> TARGET_PAGE_BITS);
2592 p->flags |= PAGE_WRITE;
2593 prot |= p->flags;
2595 /* and since the content will be modified, we must invalidate
2596 the corresponding translated code. */
2597 tb_invalidate_phys_page(addr, pc, puc);
2598 #ifdef DEBUG_TB_CHECK
2599 tb_invalidate_check(addr);
2600 #endif
2602 mprotect((void *)g2h(host_start), qemu_host_page_size,
2603 prot & PAGE_BITS);
2605 mmap_unlock();
2606 return 1;
2608 mmap_unlock();
2609 return 0;
2612 static inline void tlb_set_dirty(CPUState *env,
2613 unsigned long addr, target_ulong vaddr)
2616 #endif /* defined(CONFIG_USER_ONLY) */
2618 #if !defined(CONFIG_USER_ONLY)
2620 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2621 typedef struct subpage_t {
2622 target_phys_addr_t base;
2623 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2624 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2625 } subpage_t;
2627 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2628 ram_addr_t memory, ram_addr_t region_offset);
2629 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2630 ram_addr_t orig_memory,
2631 ram_addr_t region_offset);
2632 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2633 need_subpage) \
2634 do { \
2635 if (addr > start_addr) \
2636 start_addr2 = 0; \
2637 else { \
2638 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2639 if (start_addr2 > 0) \
2640 need_subpage = 1; \
2643 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2644 end_addr2 = TARGET_PAGE_SIZE - 1; \
2645 else { \
2646 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2647 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2648 need_subpage = 1; \
2650 } while (0)
2652 /* register physical memory.
2653 For RAM, 'size' must be a multiple of the target page size.
2654 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2655 io memory page. The address used when calling the IO function is
2656 the offset from the start of the region, plus region_offset. Both
2657 start_addr and region_offset are rounded down to a page boundary
2658 before calculating this offset. This should not be a problem unless
2659 the low bits of start_addr and region_offset differ. */
2660 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2661 ram_addr_t size,
2662 ram_addr_t phys_offset,
2663 ram_addr_t region_offset,
2664 bool log_dirty)
2666 target_phys_addr_t addr, end_addr;
2667 PhysPageDesc *p;
2668 CPUState *env;
2669 ram_addr_t orig_size = size;
2670 subpage_t *subpage;
2672 assert(size);
2673 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2675 if (phys_offset == IO_MEM_UNASSIGNED) {
2676 region_offset = start_addr;
2678 region_offset &= TARGET_PAGE_MASK;
2679 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2680 end_addr = start_addr + (target_phys_addr_t)size;
2682 addr = start_addr;
2683 do {
2684 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2685 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2686 ram_addr_t orig_memory = p->phys_offset;
2687 target_phys_addr_t start_addr2, end_addr2;
2688 int need_subpage = 0;
2690 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2691 need_subpage);
2692 if (need_subpage) {
2693 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2694 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2695 &p->phys_offset, orig_memory,
2696 p->region_offset);
2697 } else {
2698 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2699 >> IO_MEM_SHIFT];
2701 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2702 region_offset);
2703 p->region_offset = 0;
2704 } else {
2705 p->phys_offset = phys_offset;
2706 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2707 (phys_offset & IO_MEM_ROMD))
2708 phys_offset += TARGET_PAGE_SIZE;
2710 } else {
2711 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2712 p->phys_offset = phys_offset;
2713 p->region_offset = region_offset;
2714 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2715 (phys_offset & IO_MEM_ROMD)) {
2716 phys_offset += TARGET_PAGE_SIZE;
2717 } else {
2718 target_phys_addr_t start_addr2, end_addr2;
2719 int need_subpage = 0;
2721 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2722 end_addr2, need_subpage);
2724 if (need_subpage) {
2725 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2726 &p->phys_offset, IO_MEM_UNASSIGNED,
2727 addr & TARGET_PAGE_MASK);
2728 subpage_register(subpage, start_addr2, end_addr2,
2729 phys_offset, region_offset);
2730 p->region_offset = 0;
2734 region_offset += TARGET_PAGE_SIZE;
2735 addr += TARGET_PAGE_SIZE;
2736 } while (addr != end_addr);
2738 /* since each CPU stores ram addresses in its TLB cache, we must
2739 reset the modified entries */
2740 /* XXX: slow ! */
2741 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2742 tlb_flush(env, 1);
2746 /* XXX: temporary until new memory mapping API */
2747 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2749 PhysPageDesc *p;
2751 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2752 if (!p)
2753 return IO_MEM_UNASSIGNED;
2754 return p->phys_offset;
2757 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2759 if (kvm_enabled())
2760 kvm_coalesce_mmio_region(addr, size);
2763 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2765 if (kvm_enabled())
2766 kvm_uncoalesce_mmio_region(addr, size);
2769 void qemu_flush_coalesced_mmio_buffer(void)
2771 if (kvm_enabled())
2772 kvm_flush_coalesced_mmio_buffer();
2775 #if defined(__linux__) && !defined(TARGET_S390X)
2777 #include <sys/vfs.h>
2779 #define HUGETLBFS_MAGIC 0x958458f6
2781 static long gethugepagesize(const char *path)
2783 struct statfs fs;
2784 int ret;
2786 do {
2787 ret = statfs(path, &fs);
2788 } while (ret != 0 && errno == EINTR);
2790 if (ret != 0) {
2791 perror(path);
2792 return 0;
2795 if (fs.f_type != HUGETLBFS_MAGIC)
2796 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2798 return fs.f_bsize;
2801 static void *file_ram_alloc(RAMBlock *block,
2802 ram_addr_t memory,
2803 const char *path)
2805 char *filename;
2806 void *area;
2807 int fd;
2808 #ifdef MAP_POPULATE
2809 int flags;
2810 #endif
2811 unsigned long hpagesize;
2813 hpagesize = gethugepagesize(path);
2814 if (!hpagesize) {
2815 return NULL;
2818 if (memory < hpagesize) {
2819 return NULL;
2822 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2823 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2824 return NULL;
2827 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2828 return NULL;
2831 fd = mkstemp(filename);
2832 if (fd < 0) {
2833 perror("unable to create backing store for hugepages");
2834 free(filename);
2835 return NULL;
2837 unlink(filename);
2838 free(filename);
2840 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2843 * ftruncate is not supported by hugetlbfs in older
2844 * hosts, so don't bother bailing out on errors.
2845 * If anything goes wrong with it under other filesystems,
2846 * mmap will fail.
2848 if (ftruncate(fd, memory))
2849 perror("ftruncate");
2851 #ifdef MAP_POPULATE
2852 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2853 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2854 * to sidestep this quirk.
2856 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2857 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2858 #else
2859 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2860 #endif
2861 if (area == MAP_FAILED) {
2862 perror("file_ram_alloc: can't mmap RAM pages");
2863 close(fd);
2864 return (NULL);
2866 block->fd = fd;
2867 return area;
2869 #endif
2871 static ram_addr_t find_ram_offset(ram_addr_t size)
2873 RAMBlock *block, *next_block;
2874 ram_addr_t offset = 0, mingap = ULONG_MAX;
2876 if (QLIST_EMPTY(&ram_list.blocks))
2877 return 0;
2879 QLIST_FOREACH(block, &ram_list.blocks, next) {
2880 ram_addr_t end, next = ULONG_MAX;
2882 end = block->offset + block->length;
2884 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2885 if (next_block->offset >= end) {
2886 next = MIN(next, next_block->offset);
2889 if (next - end >= size && next - end < mingap) {
2890 offset = end;
2891 mingap = next - end;
2894 return offset;
2897 static ram_addr_t last_ram_offset(void)
2899 RAMBlock *block;
2900 ram_addr_t last = 0;
2902 QLIST_FOREACH(block, &ram_list.blocks, next)
2903 last = MAX(last, block->offset + block->length);
2905 return last;
2908 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2909 ram_addr_t size, void *host)
2911 RAMBlock *new_block, *block;
2913 size = TARGET_PAGE_ALIGN(size);
2914 new_block = qemu_mallocz(sizeof(*new_block));
2916 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2917 char *id = dev->parent_bus->info->get_dev_path(dev);
2918 if (id) {
2919 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2920 qemu_free(id);
2923 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2925 QLIST_FOREACH(block, &ram_list.blocks, next) {
2926 if (!strcmp(block->idstr, new_block->idstr)) {
2927 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2928 new_block->idstr);
2929 abort();
2933 new_block->offset = find_ram_offset(size);
2934 if (host) {
2935 new_block->host = host;
2936 new_block->flags |= RAM_PREALLOC_MASK;
2937 } else {
2938 if (mem_path) {
2939 #if defined (__linux__) && !defined(TARGET_S390X)
2940 new_block->host = file_ram_alloc(new_block, size, mem_path);
2941 if (!new_block->host) {
2942 new_block->host = qemu_vmalloc(size);
2943 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2945 #else
2946 fprintf(stderr, "-mem-path option unsupported\n");
2947 exit(1);
2948 #endif
2949 } else {
2950 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2951 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2952 an system defined value, which is at least 256GB. Larger systems
2953 have larger values. We put the guest between the end of data
2954 segment (system break) and this value. We use 32GB as a base to
2955 have enough room for the system break to grow. */
2956 new_block->host = mmap((void*)0x800000000, size,
2957 PROT_EXEC|PROT_READ|PROT_WRITE,
2958 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2959 if (new_block->host == MAP_FAILED) {
2960 fprintf(stderr, "Allocating RAM failed\n");
2961 abort();
2963 #else
2964 if (xen_mapcache_enabled()) {
2965 xen_ram_alloc(new_block->offset, size);
2966 } else {
2967 new_block->host = qemu_vmalloc(size);
2969 #endif
2970 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2973 new_block->length = size;
2975 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2977 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2978 last_ram_offset() >> TARGET_PAGE_BITS);
2979 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2980 0xff, size >> TARGET_PAGE_BITS);
2982 if (kvm_enabled())
2983 kvm_setup_guest_memory(new_block->host, size);
2985 return new_block->offset;
2988 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2990 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2993 void qemu_ram_free_from_ptr(ram_addr_t addr)
2995 RAMBlock *block;
2997 QLIST_FOREACH(block, &ram_list.blocks, next) {
2998 if (addr == block->offset) {
2999 QLIST_REMOVE(block, next);
3000 qemu_free(block);
3001 return;
3006 void qemu_ram_free(ram_addr_t addr)
3008 RAMBlock *block;
3010 QLIST_FOREACH(block, &ram_list.blocks, next) {
3011 if (addr == block->offset) {
3012 QLIST_REMOVE(block, next);
3013 if (block->flags & RAM_PREALLOC_MASK) {
3015 } else if (mem_path) {
3016 #if defined (__linux__) && !defined(TARGET_S390X)
3017 if (block->fd) {
3018 munmap(block->host, block->length);
3019 close(block->fd);
3020 } else {
3021 qemu_vfree(block->host);
3023 #else
3024 abort();
3025 #endif
3026 } else {
3027 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3028 munmap(block->host, block->length);
3029 #else
3030 if (xen_mapcache_enabled()) {
3031 qemu_invalidate_entry(block->host);
3032 } else {
3033 qemu_vfree(block->host);
3035 #endif
3037 qemu_free(block);
3038 return;
3044 #ifndef _WIN32
3045 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3047 RAMBlock *block;
3048 ram_addr_t offset;
3049 int flags;
3050 void *area, *vaddr;
3052 QLIST_FOREACH(block, &ram_list.blocks, next) {
3053 offset = addr - block->offset;
3054 if (offset < block->length) {
3055 vaddr = block->host + offset;
3056 if (block->flags & RAM_PREALLOC_MASK) {
3058 } else {
3059 flags = MAP_FIXED;
3060 munmap(vaddr, length);
3061 if (mem_path) {
3062 #if defined(__linux__) && !defined(TARGET_S390X)
3063 if (block->fd) {
3064 #ifdef MAP_POPULATE
3065 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3066 MAP_PRIVATE;
3067 #else
3068 flags |= MAP_PRIVATE;
3069 #endif
3070 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3071 flags, block->fd, offset);
3072 } else {
3073 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3074 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3075 flags, -1, 0);
3077 #else
3078 abort();
3079 #endif
3080 } else {
3081 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3082 flags |= MAP_SHARED | MAP_ANONYMOUS;
3083 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3084 flags, -1, 0);
3085 #else
3086 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3087 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3088 flags, -1, 0);
3089 #endif
3091 if (area != vaddr) {
3092 fprintf(stderr, "Could not remap addr: %lx@%lx\n",
3093 length, addr);
3094 exit(1);
3096 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3098 return;
3102 #endif /* !_WIN32 */
3104 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3105 With the exception of the softmmu code in this file, this should
3106 only be used for local memory (e.g. video ram) that the device owns,
3107 and knows it isn't going to access beyond the end of the block.
3109 It should not be used for general purpose DMA.
3110 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3112 void *qemu_get_ram_ptr(ram_addr_t addr)
3114 RAMBlock *block;
3116 QLIST_FOREACH(block, &ram_list.blocks, next) {
3117 if (addr - block->offset < block->length) {
3118 /* Move this entry to to start of the list. */
3119 if (block != QLIST_FIRST(&ram_list.blocks)) {
3120 QLIST_REMOVE(block, next);
3121 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3123 if (xen_mapcache_enabled()) {
3124 /* We need to check if the requested address is in the RAM
3125 * because we don't want to map the entire memory in QEMU.
3126 * In that case just map until the end of the page.
3128 if (block->offset == 0) {
3129 return qemu_map_cache(addr, 0, 0);
3130 } else if (block->host == NULL) {
3131 block->host = qemu_map_cache(block->offset, block->length, 1);
3134 return block->host + (addr - block->offset);
3138 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3139 abort();
3141 return NULL;
3144 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3145 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3147 void *qemu_safe_ram_ptr(ram_addr_t addr)
3149 RAMBlock *block;
3151 QLIST_FOREACH(block, &ram_list.blocks, next) {
3152 if (addr - block->offset < block->length) {
3153 if (xen_mapcache_enabled()) {
3154 /* We need to check if the requested address is in the RAM
3155 * because we don't want to map the entire memory in QEMU.
3156 * In that case just map until the end of the page.
3158 if (block->offset == 0) {
3159 return qemu_map_cache(addr, 0, 0);
3160 } else if (block->host == NULL) {
3161 block->host = qemu_map_cache(block->offset, block->length, 1);
3164 return block->host + (addr - block->offset);
3168 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3169 abort();
3171 return NULL;
3174 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3175 * but takes a size argument */
3176 void *qemu_ram_ptr_length(target_phys_addr_t addr, target_phys_addr_t *size)
3178 if (xen_mapcache_enabled())
3179 return qemu_map_cache(addr, *size, 1);
3180 else {
3181 RAMBlock *block;
3183 QLIST_FOREACH(block, &ram_list.blocks, next) {
3184 if (addr - block->offset < block->length) {
3185 if (addr - block->offset + *size > block->length)
3186 *size = block->length - addr + block->offset;
3187 return block->host + (addr - block->offset);
3191 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3192 abort();
3194 *size = 0;
3195 return NULL;
3199 void qemu_put_ram_ptr(void *addr)
3201 trace_qemu_put_ram_ptr(addr);
3204 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3206 RAMBlock *block;
3207 uint8_t *host = ptr;
3209 if (xen_mapcache_enabled()) {
3210 *ram_addr = qemu_ram_addr_from_mapcache(ptr);
3211 return 0;
3214 QLIST_FOREACH(block, &ram_list.blocks, next) {
3215 /* This case append when the block is not mapped. */
3216 if (block->host == NULL) {
3217 continue;
3219 if (host - block->host < block->length) {
3220 *ram_addr = block->offset + (host - block->host);
3221 return 0;
3225 return -1;
3228 /* Some of the softmmu routines need to translate from a host pointer
3229 (typically a TLB entry) back to a ram offset. */
3230 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3232 ram_addr_t ram_addr;
3234 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3235 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3236 abort();
3238 return ram_addr;
3241 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3243 #ifdef DEBUG_UNASSIGNED
3244 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3245 #endif
3246 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3247 do_unassigned_access(addr, 0, 0, 0, 1);
3248 #endif
3249 return 0;
3252 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3254 #ifdef DEBUG_UNASSIGNED
3255 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3256 #endif
3257 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3258 do_unassigned_access(addr, 0, 0, 0, 2);
3259 #endif
3260 return 0;
3263 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3265 #ifdef DEBUG_UNASSIGNED
3266 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3267 #endif
3268 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3269 do_unassigned_access(addr, 0, 0, 0, 4);
3270 #endif
3271 return 0;
3274 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3276 #ifdef DEBUG_UNASSIGNED
3277 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3278 #endif
3279 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3280 do_unassigned_access(addr, 1, 0, 0, 1);
3281 #endif
3284 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3286 #ifdef DEBUG_UNASSIGNED
3287 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3288 #endif
3289 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3290 do_unassigned_access(addr, 1, 0, 0, 2);
3291 #endif
3294 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3296 #ifdef DEBUG_UNASSIGNED
3297 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3298 #endif
3299 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3300 do_unassigned_access(addr, 1, 0, 0, 4);
3301 #endif
3304 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3305 unassigned_mem_readb,
3306 unassigned_mem_readw,
3307 unassigned_mem_readl,
3310 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3311 unassigned_mem_writeb,
3312 unassigned_mem_writew,
3313 unassigned_mem_writel,
3316 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3317 uint32_t val)
3319 int dirty_flags;
3320 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3321 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3322 #if !defined(CONFIG_USER_ONLY)
3323 tb_invalidate_phys_page_fast(ram_addr, 1);
3324 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3325 #endif
3327 stb_p(qemu_get_ram_ptr(ram_addr), val);
3328 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3329 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3330 /* we remove the notdirty callback only if the code has been
3331 flushed */
3332 if (dirty_flags == 0xff)
3333 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3336 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3337 uint32_t val)
3339 int dirty_flags;
3340 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3341 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3342 #if !defined(CONFIG_USER_ONLY)
3343 tb_invalidate_phys_page_fast(ram_addr, 2);
3344 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3345 #endif
3347 stw_p(qemu_get_ram_ptr(ram_addr), val);
3348 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3349 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3350 /* we remove the notdirty callback only if the code has been
3351 flushed */
3352 if (dirty_flags == 0xff)
3353 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3356 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3357 uint32_t val)
3359 int dirty_flags;
3360 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3361 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3362 #if !defined(CONFIG_USER_ONLY)
3363 tb_invalidate_phys_page_fast(ram_addr, 4);
3364 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3365 #endif
3367 stl_p(qemu_get_ram_ptr(ram_addr), val);
3368 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3369 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3370 /* we remove the notdirty callback only if the code has been
3371 flushed */
3372 if (dirty_flags == 0xff)
3373 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3376 static CPUReadMemoryFunc * const error_mem_read[3] = {
3377 NULL, /* never used */
3378 NULL, /* never used */
3379 NULL, /* never used */
3382 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3383 notdirty_mem_writeb,
3384 notdirty_mem_writew,
3385 notdirty_mem_writel,
3388 /* Generate a debug exception if a watchpoint has been hit. */
3389 static void check_watchpoint(int offset, int len_mask, int flags)
3391 CPUState *env = cpu_single_env;
3392 target_ulong pc, cs_base;
3393 TranslationBlock *tb;
3394 target_ulong vaddr;
3395 CPUWatchpoint *wp;
3396 int cpu_flags;
3398 if (env->watchpoint_hit) {
3399 /* We re-entered the check after replacing the TB. Now raise
3400 * the debug interrupt so that is will trigger after the
3401 * current instruction. */
3402 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3403 return;
3405 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3406 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3407 if ((vaddr == (wp->vaddr & len_mask) ||
3408 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3409 wp->flags |= BP_WATCHPOINT_HIT;
3410 if (!env->watchpoint_hit) {
3411 env->watchpoint_hit = wp;
3412 tb = tb_find_pc(env->mem_io_pc);
3413 if (!tb) {
3414 cpu_abort(env, "check_watchpoint: could not find TB for "
3415 "pc=%p", (void *)env->mem_io_pc);
3417 cpu_restore_state(tb, env, env->mem_io_pc);
3418 tb_phys_invalidate(tb, -1);
3419 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3420 env->exception_index = EXCP_DEBUG;
3421 } else {
3422 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3423 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3425 cpu_resume_from_signal(env, NULL);
3427 } else {
3428 wp->flags &= ~BP_WATCHPOINT_HIT;
3433 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3434 so these check for a hit then pass through to the normal out-of-line
3435 phys routines. */
3436 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3438 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3439 return ldub_phys(addr);
3442 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3444 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3445 return lduw_phys(addr);
3448 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3450 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3451 return ldl_phys(addr);
3454 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3455 uint32_t val)
3457 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3458 stb_phys(addr, val);
3461 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3462 uint32_t val)
3464 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3465 stw_phys(addr, val);
3468 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3469 uint32_t val)
3471 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3472 stl_phys(addr, val);
3475 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3476 watch_mem_readb,
3477 watch_mem_readw,
3478 watch_mem_readl,
3481 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3482 watch_mem_writeb,
3483 watch_mem_writew,
3484 watch_mem_writel,
3487 static inline uint32_t subpage_readlen (subpage_t *mmio,
3488 target_phys_addr_t addr,
3489 unsigned int len)
3491 unsigned int idx = SUBPAGE_IDX(addr);
3492 #if defined(DEBUG_SUBPAGE)
3493 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3494 mmio, len, addr, idx);
3495 #endif
3497 addr += mmio->region_offset[idx];
3498 idx = mmio->sub_io_index[idx];
3499 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3502 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3503 uint32_t value, unsigned int len)
3505 unsigned int idx = SUBPAGE_IDX(addr);
3506 #if defined(DEBUG_SUBPAGE)
3507 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3508 __func__, mmio, len, addr, idx, value);
3509 #endif
3511 addr += mmio->region_offset[idx];
3512 idx = mmio->sub_io_index[idx];
3513 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3516 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3518 return subpage_readlen(opaque, addr, 0);
3521 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3522 uint32_t value)
3524 subpage_writelen(opaque, addr, value, 0);
3527 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3529 return subpage_readlen(opaque, addr, 1);
3532 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3533 uint32_t value)
3535 subpage_writelen(opaque, addr, value, 1);
3538 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3540 return subpage_readlen(opaque, addr, 2);
3543 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3544 uint32_t value)
3546 subpage_writelen(opaque, addr, value, 2);
3549 static CPUReadMemoryFunc * const subpage_read[] = {
3550 &subpage_readb,
3551 &subpage_readw,
3552 &subpage_readl,
3555 static CPUWriteMemoryFunc * const subpage_write[] = {
3556 &subpage_writeb,
3557 &subpage_writew,
3558 &subpage_writel,
3561 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3562 ram_addr_t memory, ram_addr_t region_offset)
3564 int idx, eidx;
3566 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3567 return -1;
3568 idx = SUBPAGE_IDX(start);
3569 eidx = SUBPAGE_IDX(end);
3570 #if defined(DEBUG_SUBPAGE)
3571 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3572 mmio, start, end, idx, eidx, memory);
3573 #endif
3574 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3575 memory = IO_MEM_UNASSIGNED;
3576 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3577 for (; idx <= eidx; idx++) {
3578 mmio->sub_io_index[idx] = memory;
3579 mmio->region_offset[idx] = region_offset;
3582 return 0;
3585 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3586 ram_addr_t orig_memory,
3587 ram_addr_t region_offset)
3589 subpage_t *mmio;
3590 int subpage_memory;
3592 mmio = qemu_mallocz(sizeof(subpage_t));
3594 mmio->base = base;
3595 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3596 DEVICE_NATIVE_ENDIAN);
3597 #if defined(DEBUG_SUBPAGE)
3598 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3599 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3600 #endif
3601 *phys = subpage_memory | IO_MEM_SUBPAGE;
3602 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3604 return mmio;
3607 static int get_free_io_mem_idx(void)
3609 int i;
3611 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3612 if (!io_mem_used[i]) {
3613 io_mem_used[i] = 1;
3614 return i;
3616 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3617 return -1;
3621 * Usually, devices operate in little endian mode. There are devices out
3622 * there that operate in big endian too. Each device gets byte swapped
3623 * mmio if plugged onto a CPU that does the other endianness.
3625 * CPU Device swap?
3627 * little little no
3628 * little big yes
3629 * big little yes
3630 * big big no
3633 typedef struct SwapEndianContainer {
3634 CPUReadMemoryFunc *read[3];
3635 CPUWriteMemoryFunc *write[3];
3636 void *opaque;
3637 } SwapEndianContainer;
3639 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3641 uint32_t val;
3642 SwapEndianContainer *c = opaque;
3643 val = c->read[0](c->opaque, addr);
3644 return val;
3647 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3649 uint32_t val;
3650 SwapEndianContainer *c = opaque;
3651 val = bswap16(c->read[1](c->opaque, addr));
3652 return val;
3655 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3657 uint32_t val;
3658 SwapEndianContainer *c = opaque;
3659 val = bswap32(c->read[2](c->opaque, addr));
3660 return val;
3663 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3664 swapendian_mem_readb,
3665 swapendian_mem_readw,
3666 swapendian_mem_readl
3669 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3670 uint32_t val)
3672 SwapEndianContainer *c = opaque;
3673 c->write[0](c->opaque, addr, val);
3676 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3677 uint32_t val)
3679 SwapEndianContainer *c = opaque;
3680 c->write[1](c->opaque, addr, bswap16(val));
3683 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3684 uint32_t val)
3686 SwapEndianContainer *c = opaque;
3687 c->write[2](c->opaque, addr, bswap32(val));
3690 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3691 swapendian_mem_writeb,
3692 swapendian_mem_writew,
3693 swapendian_mem_writel
3696 static void swapendian_init(int io_index)
3698 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3699 int i;
3701 /* Swap mmio for big endian targets */
3702 c->opaque = io_mem_opaque[io_index];
3703 for (i = 0; i < 3; i++) {
3704 c->read[i] = io_mem_read[io_index][i];
3705 c->write[i] = io_mem_write[io_index][i];
3707 io_mem_read[io_index][i] = swapendian_readfn[i];
3708 io_mem_write[io_index][i] = swapendian_writefn[i];
3710 io_mem_opaque[io_index] = c;
3713 static void swapendian_del(int io_index)
3715 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3716 qemu_free(io_mem_opaque[io_index]);
3720 /* mem_read and mem_write are arrays of functions containing the
3721 function to access byte (index 0), word (index 1) and dword (index
3722 2). Functions can be omitted with a NULL function pointer.
3723 If io_index is non zero, the corresponding io zone is
3724 modified. If it is zero, a new io zone is allocated. The return
3725 value can be used with cpu_register_physical_memory(). (-1) is
3726 returned if error. */
3727 static int cpu_register_io_memory_fixed(int io_index,
3728 CPUReadMemoryFunc * const *mem_read,
3729 CPUWriteMemoryFunc * const *mem_write,
3730 void *opaque, enum device_endian endian)
3732 int i;
3734 if (io_index <= 0) {
3735 io_index = get_free_io_mem_idx();
3736 if (io_index == -1)
3737 return io_index;
3738 } else {
3739 io_index >>= IO_MEM_SHIFT;
3740 if (io_index >= IO_MEM_NB_ENTRIES)
3741 return -1;
3744 for (i = 0; i < 3; ++i) {
3745 io_mem_read[io_index][i]
3746 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3748 for (i = 0; i < 3; ++i) {
3749 io_mem_write[io_index][i]
3750 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3752 io_mem_opaque[io_index] = opaque;
3754 switch (endian) {
3755 case DEVICE_BIG_ENDIAN:
3756 #ifndef TARGET_WORDS_BIGENDIAN
3757 swapendian_init(io_index);
3758 #endif
3759 break;
3760 case DEVICE_LITTLE_ENDIAN:
3761 #ifdef TARGET_WORDS_BIGENDIAN
3762 swapendian_init(io_index);
3763 #endif
3764 break;
3765 case DEVICE_NATIVE_ENDIAN:
3766 default:
3767 break;
3770 return (io_index << IO_MEM_SHIFT);
3773 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3774 CPUWriteMemoryFunc * const *mem_write,
3775 void *opaque, enum device_endian endian)
3777 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3780 void cpu_unregister_io_memory(int io_table_address)
3782 int i;
3783 int io_index = io_table_address >> IO_MEM_SHIFT;
3785 swapendian_del(io_index);
3787 for (i=0;i < 3; i++) {
3788 io_mem_read[io_index][i] = unassigned_mem_read[i];
3789 io_mem_write[io_index][i] = unassigned_mem_write[i];
3791 io_mem_opaque[io_index] = NULL;
3792 io_mem_used[io_index] = 0;
3795 static void io_mem_init(void)
3797 int i;
3799 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3800 unassigned_mem_write, NULL,
3801 DEVICE_NATIVE_ENDIAN);
3802 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3803 unassigned_mem_write, NULL,
3804 DEVICE_NATIVE_ENDIAN);
3805 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3806 notdirty_mem_write, NULL,
3807 DEVICE_NATIVE_ENDIAN);
3808 for (i=0; i<5; i++)
3809 io_mem_used[i] = 1;
3811 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3812 watch_mem_write, NULL,
3813 DEVICE_NATIVE_ENDIAN);
3816 #endif /* !defined(CONFIG_USER_ONLY) */
3818 /* physical memory access (slow version, mainly for debug) */
3819 #if defined(CONFIG_USER_ONLY)
3820 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3821 uint8_t *buf, int len, int is_write)
3823 int l, flags;
3824 target_ulong page;
3825 void * p;
3827 while (len > 0) {
3828 page = addr & TARGET_PAGE_MASK;
3829 l = (page + TARGET_PAGE_SIZE) - addr;
3830 if (l > len)
3831 l = len;
3832 flags = page_get_flags(page);
3833 if (!(flags & PAGE_VALID))
3834 return -1;
3835 if (is_write) {
3836 if (!(flags & PAGE_WRITE))
3837 return -1;
3838 /* XXX: this code should not depend on lock_user */
3839 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3840 return -1;
3841 memcpy(p, buf, l);
3842 unlock_user(p, addr, l);
3843 } else {
3844 if (!(flags & PAGE_READ))
3845 return -1;
3846 /* XXX: this code should not depend on lock_user */
3847 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3848 return -1;
3849 memcpy(buf, p, l);
3850 unlock_user(p, addr, 0);
3852 len -= l;
3853 buf += l;
3854 addr += l;
3856 return 0;
3859 #else
3860 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3861 int len, int is_write)
3863 int l, io_index;
3864 uint8_t *ptr;
3865 uint32_t val;
3866 target_phys_addr_t page;
3867 unsigned long pd;
3868 PhysPageDesc *p;
3870 while (len > 0) {
3871 page = addr & TARGET_PAGE_MASK;
3872 l = (page + TARGET_PAGE_SIZE) - addr;
3873 if (l > len)
3874 l = len;
3875 p = phys_page_find(page >> TARGET_PAGE_BITS);
3876 if (!p) {
3877 pd = IO_MEM_UNASSIGNED;
3878 } else {
3879 pd = p->phys_offset;
3882 if (is_write) {
3883 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3884 target_phys_addr_t addr1 = addr;
3885 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3886 if (p)
3887 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3888 /* XXX: could force cpu_single_env to NULL to avoid
3889 potential bugs */
3890 if (l >= 4 && ((addr1 & 3) == 0)) {
3891 /* 32 bit write access */
3892 val = ldl_p(buf);
3893 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3894 l = 4;
3895 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3896 /* 16 bit write access */
3897 val = lduw_p(buf);
3898 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3899 l = 2;
3900 } else {
3901 /* 8 bit write access */
3902 val = ldub_p(buf);
3903 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3904 l = 1;
3906 } else {
3907 unsigned long addr1;
3908 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3909 /* RAM case */
3910 ptr = qemu_get_ram_ptr(addr1);
3911 memcpy(ptr, buf, l);
3912 if (!cpu_physical_memory_is_dirty(addr1)) {
3913 /* invalidate code */
3914 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3915 /* set dirty bit */
3916 cpu_physical_memory_set_dirty_flags(
3917 addr1, (0xff & ~CODE_DIRTY_FLAG));
3919 /* qemu doesn't execute guest code directly, but kvm does
3920 therefore flush instruction caches */
3921 if (kvm_enabled())
3922 flush_icache_range((unsigned long)ptr,
3923 ((unsigned long)ptr)+l);
3924 qemu_put_ram_ptr(ptr);
3926 } else {
3927 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3928 !(pd & IO_MEM_ROMD)) {
3929 target_phys_addr_t addr1 = addr;
3930 /* I/O case */
3931 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3932 if (p)
3933 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3934 if (l >= 4 && ((addr1 & 3) == 0)) {
3935 /* 32 bit read access */
3936 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3937 stl_p(buf, val);
3938 l = 4;
3939 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3940 /* 16 bit read access */
3941 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3942 stw_p(buf, val);
3943 l = 2;
3944 } else {
3945 /* 8 bit read access */
3946 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3947 stb_p(buf, val);
3948 l = 1;
3950 } else {
3951 /* RAM case */
3952 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3953 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3954 qemu_put_ram_ptr(ptr);
3957 len -= l;
3958 buf += l;
3959 addr += l;
3963 /* used for ROM loading : can write in RAM and ROM */
3964 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3965 const uint8_t *buf, int len)
3967 int l;
3968 uint8_t *ptr;
3969 target_phys_addr_t page;
3970 unsigned long pd;
3971 PhysPageDesc *p;
3973 while (len > 0) {
3974 page = addr & TARGET_PAGE_MASK;
3975 l = (page + TARGET_PAGE_SIZE) - addr;
3976 if (l > len)
3977 l = len;
3978 p = phys_page_find(page >> TARGET_PAGE_BITS);
3979 if (!p) {
3980 pd = IO_MEM_UNASSIGNED;
3981 } else {
3982 pd = p->phys_offset;
3985 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3986 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3987 !(pd & IO_MEM_ROMD)) {
3988 /* do nothing */
3989 } else {
3990 unsigned long addr1;
3991 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3992 /* ROM/RAM case */
3993 ptr = qemu_get_ram_ptr(addr1);
3994 memcpy(ptr, buf, l);
3995 qemu_put_ram_ptr(ptr);
3997 len -= l;
3998 buf += l;
3999 addr += l;
4003 typedef struct {
4004 void *buffer;
4005 target_phys_addr_t addr;
4006 target_phys_addr_t len;
4007 } BounceBuffer;
4009 static BounceBuffer bounce;
4011 typedef struct MapClient {
4012 void *opaque;
4013 void (*callback)(void *opaque);
4014 QLIST_ENTRY(MapClient) link;
4015 } MapClient;
4017 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4018 = QLIST_HEAD_INITIALIZER(map_client_list);
4020 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4022 MapClient *client = qemu_malloc(sizeof(*client));
4024 client->opaque = opaque;
4025 client->callback = callback;
4026 QLIST_INSERT_HEAD(&map_client_list, client, link);
4027 return client;
4030 void cpu_unregister_map_client(void *_client)
4032 MapClient *client = (MapClient *)_client;
4034 QLIST_REMOVE(client, link);
4035 qemu_free(client);
4038 static void cpu_notify_map_clients(void)
4040 MapClient *client;
4042 while (!QLIST_EMPTY(&map_client_list)) {
4043 client = QLIST_FIRST(&map_client_list);
4044 client->callback(client->opaque);
4045 cpu_unregister_map_client(client);
4049 /* Map a physical memory region into a host virtual address.
4050 * May map a subset of the requested range, given by and returned in *plen.
4051 * May return NULL if resources needed to perform the mapping are exhausted.
4052 * Use only for reads OR writes - not for read-modify-write operations.
4053 * Use cpu_register_map_client() to know when retrying the map operation is
4054 * likely to succeed.
4056 void *cpu_physical_memory_map(target_phys_addr_t addr,
4057 target_phys_addr_t *plen,
4058 int is_write)
4060 target_phys_addr_t len = *plen;
4061 target_phys_addr_t todo = 0;
4062 int l;
4063 target_phys_addr_t page;
4064 unsigned long pd;
4065 PhysPageDesc *p;
4066 target_phys_addr_t addr1 = addr;
4068 while (len > 0) {
4069 page = addr & TARGET_PAGE_MASK;
4070 l = (page + TARGET_PAGE_SIZE) - addr;
4071 if (l > len)
4072 l = len;
4073 p = phys_page_find(page >> TARGET_PAGE_BITS);
4074 if (!p) {
4075 pd = IO_MEM_UNASSIGNED;
4076 } else {
4077 pd = p->phys_offset;
4080 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4081 if (todo || bounce.buffer) {
4082 break;
4084 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4085 bounce.addr = addr;
4086 bounce.len = l;
4087 if (!is_write) {
4088 cpu_physical_memory_read(addr, bounce.buffer, l);
4091 *plen = l;
4092 return bounce.buffer;
4095 len -= l;
4096 addr += l;
4097 todo += l;
4099 *plen = todo;
4100 return qemu_ram_ptr_length(addr1, plen);
4103 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4104 * Will also mark the memory as dirty if is_write == 1. access_len gives
4105 * the amount of memory that was actually read or written by the caller.
4107 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4108 int is_write, target_phys_addr_t access_len)
4110 unsigned long flush_len = (unsigned long)access_len;
4112 if (buffer != bounce.buffer) {
4113 if (is_write) {
4114 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4115 while (access_len) {
4116 unsigned l;
4117 l = TARGET_PAGE_SIZE;
4118 if (l > access_len)
4119 l = access_len;
4120 if (!cpu_physical_memory_is_dirty(addr1)) {
4121 /* invalidate code */
4122 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4123 /* set dirty bit */
4124 cpu_physical_memory_set_dirty_flags(
4125 addr1, (0xff & ~CODE_DIRTY_FLAG));
4127 addr1 += l;
4128 access_len -= l;
4130 dma_flush_range((unsigned long)buffer,
4131 (unsigned long)buffer + flush_len);
4133 if (xen_mapcache_enabled()) {
4134 qemu_invalidate_entry(buffer);
4136 return;
4138 if (is_write) {
4139 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4141 qemu_vfree(bounce.buffer);
4142 bounce.buffer = NULL;
4143 cpu_notify_map_clients();
4146 /* warning: addr must be aligned */
4147 uint32_t ldl_phys(target_phys_addr_t addr)
4149 int io_index;
4150 uint8_t *ptr;
4151 uint32_t val;
4152 unsigned long pd;
4153 PhysPageDesc *p;
4155 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4156 if (!p) {
4157 pd = IO_MEM_UNASSIGNED;
4158 } else {
4159 pd = p->phys_offset;
4162 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4163 !(pd & IO_MEM_ROMD)) {
4164 /* I/O case */
4165 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4166 if (p)
4167 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4168 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4169 } else {
4170 /* RAM case */
4171 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4172 (addr & ~TARGET_PAGE_MASK);
4173 val = ldl_p(ptr);
4175 return val;
4178 /* warning: addr must be aligned */
4179 uint64_t ldq_phys(target_phys_addr_t addr)
4181 int io_index;
4182 uint8_t *ptr;
4183 uint64_t val;
4184 unsigned long pd;
4185 PhysPageDesc *p;
4187 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4188 if (!p) {
4189 pd = IO_MEM_UNASSIGNED;
4190 } else {
4191 pd = p->phys_offset;
4194 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4195 !(pd & IO_MEM_ROMD)) {
4196 /* I/O case */
4197 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4198 if (p)
4199 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4200 #ifdef TARGET_WORDS_BIGENDIAN
4201 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4202 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4203 #else
4204 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4205 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4206 #endif
4207 } else {
4208 /* RAM case */
4209 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4210 (addr & ~TARGET_PAGE_MASK);
4211 val = ldq_p(ptr);
4213 return val;
4216 /* XXX: optimize */
4217 uint32_t ldub_phys(target_phys_addr_t addr)
4219 uint8_t val;
4220 cpu_physical_memory_read(addr, &val, 1);
4221 return val;
4224 /* warning: addr must be aligned */
4225 uint32_t lduw_phys(target_phys_addr_t addr)
4227 int io_index;
4228 uint8_t *ptr;
4229 uint64_t val;
4230 unsigned long pd;
4231 PhysPageDesc *p;
4233 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4234 if (!p) {
4235 pd = IO_MEM_UNASSIGNED;
4236 } else {
4237 pd = p->phys_offset;
4240 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4241 !(pd & IO_MEM_ROMD)) {
4242 /* I/O case */
4243 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4244 if (p)
4245 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4246 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4247 } else {
4248 /* RAM case */
4249 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4250 (addr & ~TARGET_PAGE_MASK);
4251 val = lduw_p(ptr);
4253 return val;
4256 /* warning: addr must be aligned. The ram page is not masked as dirty
4257 and the code inside is not invalidated. It is useful if the dirty
4258 bits are used to track modified PTEs */
4259 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4261 int io_index;
4262 uint8_t *ptr;
4263 unsigned long pd;
4264 PhysPageDesc *p;
4266 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4267 if (!p) {
4268 pd = IO_MEM_UNASSIGNED;
4269 } else {
4270 pd = p->phys_offset;
4273 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4274 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4275 if (p)
4276 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4277 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4278 } else {
4279 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4280 ptr = qemu_get_ram_ptr(addr1);
4281 stl_p(ptr, val);
4283 if (unlikely(in_migration)) {
4284 if (!cpu_physical_memory_is_dirty(addr1)) {
4285 /* invalidate code */
4286 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4287 /* set dirty bit */
4288 cpu_physical_memory_set_dirty_flags(
4289 addr1, (0xff & ~CODE_DIRTY_FLAG));
4295 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4297 int io_index;
4298 uint8_t *ptr;
4299 unsigned long pd;
4300 PhysPageDesc *p;
4302 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4303 if (!p) {
4304 pd = IO_MEM_UNASSIGNED;
4305 } else {
4306 pd = p->phys_offset;
4309 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4310 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4311 if (p)
4312 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4313 #ifdef TARGET_WORDS_BIGENDIAN
4314 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4315 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4316 #else
4317 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4318 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4319 #endif
4320 } else {
4321 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4322 (addr & ~TARGET_PAGE_MASK);
4323 stq_p(ptr, val);
4327 /* warning: addr must be aligned */
4328 void stl_phys(target_phys_addr_t addr, uint32_t val)
4330 int io_index;
4331 uint8_t *ptr;
4332 unsigned long pd;
4333 PhysPageDesc *p;
4335 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4336 if (!p) {
4337 pd = IO_MEM_UNASSIGNED;
4338 } else {
4339 pd = p->phys_offset;
4342 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4343 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4344 if (p)
4345 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4346 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4347 } else {
4348 unsigned long addr1;
4349 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4350 /* RAM case */
4351 ptr = qemu_get_ram_ptr(addr1);
4352 stl_p(ptr, val);
4353 if (!cpu_physical_memory_is_dirty(addr1)) {
4354 /* invalidate code */
4355 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4356 /* set dirty bit */
4357 cpu_physical_memory_set_dirty_flags(addr1,
4358 (0xff & ~CODE_DIRTY_FLAG));
4363 /* XXX: optimize */
4364 void stb_phys(target_phys_addr_t addr, uint32_t val)
4366 uint8_t v = val;
4367 cpu_physical_memory_write(addr, &v, 1);
4370 /* warning: addr must be aligned */
4371 void stw_phys(target_phys_addr_t addr, uint32_t val)
4373 int io_index;
4374 uint8_t *ptr;
4375 unsigned long pd;
4376 PhysPageDesc *p;
4378 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4379 if (!p) {
4380 pd = IO_MEM_UNASSIGNED;
4381 } else {
4382 pd = p->phys_offset;
4385 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4386 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4387 if (p)
4388 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4389 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4390 } else {
4391 unsigned long addr1;
4392 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4393 /* RAM case */
4394 ptr = qemu_get_ram_ptr(addr1);
4395 stw_p(ptr, val);
4396 if (!cpu_physical_memory_is_dirty(addr1)) {
4397 /* invalidate code */
4398 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4399 /* set dirty bit */
4400 cpu_physical_memory_set_dirty_flags(addr1,
4401 (0xff & ~CODE_DIRTY_FLAG));
4406 /* XXX: optimize */
4407 void stq_phys(target_phys_addr_t addr, uint64_t val)
4409 val = tswap64(val);
4410 cpu_physical_memory_write(addr, &val, 8);
4413 /* virtual memory access for debug (includes writing to ROM) */
4414 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4415 uint8_t *buf, int len, int is_write)
4417 int l;
4418 target_phys_addr_t phys_addr;
4419 target_ulong page;
4421 while (len > 0) {
4422 page = addr & TARGET_PAGE_MASK;
4423 phys_addr = cpu_get_phys_page_debug(env, page);
4424 /* if no physical page mapped, return an error */
4425 if (phys_addr == -1)
4426 return -1;
4427 l = (page + TARGET_PAGE_SIZE) - addr;
4428 if (l > len)
4429 l = len;
4430 phys_addr += (addr & ~TARGET_PAGE_MASK);
4431 if (is_write)
4432 cpu_physical_memory_write_rom(phys_addr, buf, l);
4433 else
4434 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4435 len -= l;
4436 buf += l;
4437 addr += l;
4439 return 0;
4441 #endif
4443 /* in deterministic execution mode, instructions doing device I/Os
4444 must be at the end of the TB */
4445 void cpu_io_recompile(CPUState *env, void *retaddr)
4447 TranslationBlock *tb;
4448 uint32_t n, cflags;
4449 target_ulong pc, cs_base;
4450 uint64_t flags;
4452 tb = tb_find_pc((unsigned long)retaddr);
4453 if (!tb) {
4454 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4455 retaddr);
4457 n = env->icount_decr.u16.low + tb->icount;
4458 cpu_restore_state(tb, env, (unsigned long)retaddr);
4459 /* Calculate how many instructions had been executed before the fault
4460 occurred. */
4461 n = n - env->icount_decr.u16.low;
4462 /* Generate a new TB ending on the I/O insn. */
4463 n++;
4464 /* On MIPS and SH, delay slot instructions can only be restarted if
4465 they were already the first instruction in the TB. If this is not
4466 the first instruction in a TB then re-execute the preceding
4467 branch. */
4468 #if defined(TARGET_MIPS)
4469 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4470 env->active_tc.PC -= 4;
4471 env->icount_decr.u16.low++;
4472 env->hflags &= ~MIPS_HFLAG_BMASK;
4474 #elif defined(TARGET_SH4)
4475 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4476 && n > 1) {
4477 env->pc -= 2;
4478 env->icount_decr.u16.low++;
4479 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4481 #endif
4482 /* This should never happen. */
4483 if (n > CF_COUNT_MASK)
4484 cpu_abort(env, "TB too big during recompile");
4486 cflags = n | CF_LAST_IO;
4487 pc = tb->pc;
4488 cs_base = tb->cs_base;
4489 flags = tb->flags;
4490 tb_phys_invalidate(tb, -1);
4491 /* FIXME: In theory this could raise an exception. In practice
4492 we have already translated the block once so it's probably ok. */
4493 tb_gen_code(env, pc, cs_base, flags, cflags);
4494 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4495 the first in the TB) then we end up generating a whole new TB and
4496 repeating the fault, which is horribly inefficient.
4497 Better would be to execute just this insn uncached, or generate a
4498 second new TB. */
4499 cpu_resume_from_signal(env, NULL);
4502 #if !defined(CONFIG_USER_ONLY)
4504 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4506 int i, target_code_size, max_target_code_size;
4507 int direct_jmp_count, direct_jmp2_count, cross_page;
4508 TranslationBlock *tb;
4510 target_code_size = 0;
4511 max_target_code_size = 0;
4512 cross_page = 0;
4513 direct_jmp_count = 0;
4514 direct_jmp2_count = 0;
4515 for(i = 0; i < nb_tbs; i++) {
4516 tb = &tbs[i];
4517 target_code_size += tb->size;
4518 if (tb->size > max_target_code_size)
4519 max_target_code_size = tb->size;
4520 if (tb->page_addr[1] != -1)
4521 cross_page++;
4522 if (tb->tb_next_offset[0] != 0xffff) {
4523 direct_jmp_count++;
4524 if (tb->tb_next_offset[1] != 0xffff) {
4525 direct_jmp2_count++;
4529 /* XXX: avoid using doubles ? */
4530 cpu_fprintf(f, "Translation buffer state:\n");
4531 cpu_fprintf(f, "gen code size %td/%ld\n",
4532 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4533 cpu_fprintf(f, "TB count %d/%d\n",
4534 nb_tbs, code_gen_max_blocks);
4535 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4536 nb_tbs ? target_code_size / nb_tbs : 0,
4537 max_target_code_size);
4538 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4539 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4540 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4541 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4542 cross_page,
4543 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4544 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4545 direct_jmp_count,
4546 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4547 direct_jmp2_count,
4548 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4549 cpu_fprintf(f, "\nStatistics:\n");
4550 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4551 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4552 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4553 #ifdef CONFIG_PROFILER
4554 tcg_dump_info(f, cpu_fprintf);
4555 #endif
4558 #define MMUSUFFIX _cmmu
4559 #define GETPC() NULL
4560 #define env cpu_single_env
4561 #define SOFTMMU_CODE_ACCESS
4563 #define SHIFT 0
4564 #include "softmmu_template.h"
4566 #define SHIFT 1
4567 #include "softmmu_template.h"
4569 #define SHIFT 2
4570 #include "softmmu_template.h"
4572 #define SHIFT 3
4573 #include "softmmu_template.h"
4575 #undef env
4577 #endif