Merge commit '3c688828bcb3afa2744e1f1729a40eef4a575b56' into upstream-merge
[qemu/qemu-dev-zwu.git] / exec.c
blob68f5b33a5bcb024111c1bbc2241c2b9daf22f29c
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "exec-all.h"
30 #include "cache-utils.h"
32 #if !defined(TARGET_IA64)
33 #include "tcg.h"
34 #endif
36 #include "hw/hw.h"
37 #include "hw/qdev.h"
38 #include "osdep.h"
39 #include "kvm.h"
40 #include "hw/xen.h"
41 #include "qemu-timer.h"
42 #if defined(CONFIG_USER_ONLY)
43 #include <qemu.h>
44 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
45 #include <sys/param.h>
46 #if __FreeBSD_version >= 700104
47 #define HAVE_KINFO_GETVMMAP
48 #define sigqueue sigqueue_freebsd /* avoid redefinition */
49 #include <sys/time.h>
50 #include <sys/proc.h>
51 #include <machine/profile.h>
52 #define _KERNEL
53 #include <sys/user.h>
54 #undef _KERNEL
55 #undef sigqueue
56 #include <libutil.h>
57 #endif
58 #endif
59 #else /* !CONFIG_USER_ONLY */
60 #include "xen-mapcache.h"
61 #include "trace.h"
62 #endif
64 //#define DEBUG_TB_INVALIDATE
65 //#define DEBUG_FLUSH
66 //#define DEBUG_TLB
67 //#define DEBUG_UNASSIGNED
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
71 //#define DEBUG_TLB_CHECK
73 //#define DEBUG_IOPORT
74 //#define DEBUG_SUBPAGE
76 #if !defined(CONFIG_USER_ONLY)
77 /* TB consistency checks only implemented for usermode emulation. */
78 #undef DEBUG_TB_CHECK
79 #endif
81 #define SMC_BITMAP_USE_THRESHOLD 10
83 static TranslationBlock *tbs;
84 static int code_gen_max_blocks;
85 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
86 static int nb_tbs;
87 /* any access to the tbs or the page table must use this lock */
88 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
90 #if defined(__arm__) || defined(__sparc_v9__)
91 /* The prologue must be reachable with a direct jump. ARM and Sparc64
92 have limited branch ranges (possibly also PPC) so place it in a
93 section close to code segment. */
94 #define code_gen_section \
95 __attribute__((__section__(".gen_code"))) \
96 __attribute__((aligned (32)))
97 #elif defined(_WIN32)
98 /* Maximum alignment for Win32 is 16. */
99 #define code_gen_section \
100 __attribute__((aligned (16)))
101 #else
102 #define code_gen_section \
103 __attribute__((aligned (32)))
104 #endif
106 uint8_t code_gen_prologue[1024] code_gen_section;
107 static uint8_t *code_gen_buffer;
108 static unsigned long code_gen_buffer_size;
109 /* threshold to flush the translated code buffer */
110 static unsigned long code_gen_buffer_max_size;
111 static uint8_t *code_gen_ptr;
113 #if !defined(CONFIG_USER_ONLY)
114 int phys_ram_fd;
115 static int in_migration;
117 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
118 #endif
120 CPUState *first_cpu;
121 /* current CPU in the current thread. It is only valid inside
122 cpu_exec() */
123 CPUState *cpu_single_env;
124 /* 0 = Do not count executed instructions.
125 1 = Precise instruction counting.
126 2 = Adaptive rate instruction counting. */
127 int use_icount = 0;
128 /* Current instruction counter. While executing translated code this may
129 include some instructions that have not yet been executed. */
130 int64_t qemu_icount;
132 typedef struct PageDesc {
133 /* list of TBs intersecting this ram page */
134 TranslationBlock *first_tb;
135 /* in order to optimize self modifying code, we count the number
136 of lookups we do to a given page to use a bitmap */
137 unsigned int code_write_count;
138 uint8_t *code_bitmap;
139 #if defined(CONFIG_USER_ONLY)
140 unsigned long flags;
141 #endif
142 } PageDesc;
144 /* In system mode we want L1_MAP to be based on ram offsets,
145 while in user mode we want it to be based on virtual addresses. */
146 #if !defined(CONFIG_USER_ONLY)
147 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
148 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
149 #else
150 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
151 #endif
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
154 #endif
156 /* Size of the L2 (and L3, etc) page tables. */
157 #define L2_BITS 10
158 #define L2_SIZE (1 << L2_BITS)
160 /* The bits remaining after N lower levels of page tables. */
161 #define P_L1_BITS_REM \
162 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
163 #define V_L1_BITS_REM \
164 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
166 /* Size of the L1 page table. Avoid silly small sizes. */
167 #if P_L1_BITS_REM < 4
168 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
169 #else
170 #define P_L1_BITS P_L1_BITS_REM
171 #endif
173 #if V_L1_BITS_REM < 4
174 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
175 #else
176 #define V_L1_BITS V_L1_BITS_REM
177 #endif
179 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
180 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
182 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
183 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
185 unsigned long qemu_real_host_page_size;
186 unsigned long qemu_host_page_bits;
187 unsigned long qemu_host_page_size;
188 unsigned long qemu_host_page_mask;
190 /* This is a multi-level map on the virtual address space.
191 The bottom level has pointers to PageDesc. */
192 static void *l1_map[V_L1_SIZE];
194 #if !defined(CONFIG_USER_ONLY)
195 typedef struct PhysPageDesc {
196 /* offset in host memory of the page + io_index in the low bits */
197 ram_addr_t phys_offset;
198 ram_addr_t region_offset;
199 } PhysPageDesc;
201 /* This is a multi-level map on the physical address space.
202 The bottom level has pointers to PhysPageDesc. */
203 static void *l1_phys_map[P_L1_SIZE];
205 static void io_mem_init(void);
207 /* io memory support */
208 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
209 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
210 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
211 static char io_mem_used[IO_MEM_NB_ENTRIES];
212 static int io_mem_watch;
213 #endif
215 /* log support */
216 #ifdef WIN32
217 static const char *logfilename = "qemu.log";
218 #else
219 static const char *logfilename = "/tmp/qemu.log";
220 #endif
221 FILE *logfile;
222 int loglevel;
223 static int log_append = 0;
225 /* statistics */
226 #if !defined(CONFIG_USER_ONLY)
227 static int tlb_flush_count;
228 #endif
229 static int tb_flush_count;
230 static int tb_phys_invalidate_count;
232 #ifdef _WIN32
233 static void map_exec(void *addr, long size)
235 DWORD old_protect;
236 VirtualProtect(addr, size,
237 PAGE_EXECUTE_READWRITE, &old_protect);
240 #else
241 static void map_exec(void *addr, long size)
243 unsigned long start, end, page_size;
245 page_size = getpagesize();
246 start = (unsigned long)addr;
247 start &= ~(page_size - 1);
249 end = (unsigned long)addr + size;
250 end += page_size - 1;
251 end &= ~(page_size - 1);
253 mprotect((void *)start, end - start,
254 PROT_READ | PROT_WRITE | PROT_EXEC);
256 #endif
258 static void page_init(void)
260 /* NOTE: we can always suppose that qemu_host_page_size >=
261 TARGET_PAGE_SIZE */
262 #ifdef _WIN32
264 SYSTEM_INFO system_info;
266 GetSystemInfo(&system_info);
267 qemu_real_host_page_size = system_info.dwPageSize;
269 #else
270 qemu_real_host_page_size = getpagesize();
271 #endif
272 if (qemu_host_page_size == 0)
273 qemu_host_page_size = qemu_real_host_page_size;
274 if (qemu_host_page_size < TARGET_PAGE_SIZE)
275 qemu_host_page_size = TARGET_PAGE_SIZE;
276 qemu_host_page_bits = 0;
277 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
278 qemu_host_page_bits++;
279 qemu_host_page_mask = ~(qemu_host_page_size - 1);
281 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
283 #ifdef HAVE_KINFO_GETVMMAP
284 struct kinfo_vmentry *freep;
285 int i, cnt;
287 freep = kinfo_getvmmap(getpid(), &cnt);
288 if (freep) {
289 mmap_lock();
290 for (i = 0; i < cnt; i++) {
291 unsigned long startaddr, endaddr;
293 startaddr = freep[i].kve_start;
294 endaddr = freep[i].kve_end;
295 if (h2g_valid(startaddr)) {
296 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
298 if (h2g_valid(endaddr)) {
299 endaddr = h2g(endaddr);
300 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
301 } else {
302 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
303 endaddr = ~0ul;
304 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
305 #endif
309 free(freep);
310 mmap_unlock();
312 #else
313 FILE *f;
315 last_brk = (unsigned long)sbrk(0);
317 f = fopen("/compat/linux/proc/self/maps", "r");
318 if (f) {
319 mmap_lock();
321 do {
322 unsigned long startaddr, endaddr;
323 int n;
325 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
327 if (n == 2 && h2g_valid(startaddr)) {
328 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
330 if (h2g_valid(endaddr)) {
331 endaddr = h2g(endaddr);
332 } else {
333 endaddr = ~0ul;
335 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
337 } while (!feof(f));
339 fclose(f);
340 mmap_unlock();
342 #endif
344 #endif
347 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
349 PageDesc *pd;
350 void **lp;
351 int i;
353 #if defined(CONFIG_USER_ONLY)
354 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
355 # define ALLOC(P, SIZE) \
356 do { \
357 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
358 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
359 } while (0)
360 #else
361 # define ALLOC(P, SIZE) \
362 do { P = qemu_mallocz(SIZE); } while (0)
363 #endif
365 /* Level 1. Always allocated. */
366 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
368 /* Level 2..N-1. */
369 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
370 void **p = *lp;
372 if (p == NULL) {
373 if (!alloc) {
374 return NULL;
376 ALLOC(p, sizeof(void *) * L2_SIZE);
377 *lp = p;
380 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
383 pd = *lp;
384 if (pd == NULL) {
385 if (!alloc) {
386 return NULL;
388 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
389 *lp = pd;
392 #undef ALLOC
394 return pd + (index & (L2_SIZE - 1));
397 static inline PageDesc *page_find(tb_page_addr_t index)
399 return page_find_alloc(index, 0);
402 #if !defined(CONFIG_USER_ONLY)
403 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
405 PhysPageDesc *pd;
406 void **lp;
407 int i;
409 /* Level 1. Always allocated. */
410 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
412 /* Level 2..N-1. */
413 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
414 void **p = *lp;
415 if (p == NULL) {
416 if (!alloc) {
417 return NULL;
419 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
421 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
424 pd = *lp;
425 if (pd == NULL) {
426 int i;
428 if (!alloc) {
429 return NULL;
432 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
434 for (i = 0; i < L2_SIZE; i++) {
435 pd[i].phys_offset = IO_MEM_UNASSIGNED;
436 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
440 return pd + (index & (L2_SIZE - 1));
443 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
445 return phys_page_find_alloc(index, 0);
448 static void tlb_protect_code(ram_addr_t ram_addr);
449 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
450 target_ulong vaddr);
451 #define mmap_lock() do { } while(0)
452 #define mmap_unlock() do { } while(0)
453 #endif
455 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
457 #if defined(CONFIG_USER_ONLY)
458 /* Currently it is not recommended to allocate big chunks of data in
459 user mode. It will change when a dedicated libc will be used */
460 #define USE_STATIC_CODE_GEN_BUFFER
461 #endif
463 #ifdef USE_STATIC_CODE_GEN_BUFFER
464 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
465 __attribute__((aligned (CODE_GEN_ALIGN)));
466 #endif
468 static void code_gen_alloc(unsigned long tb_size)
470 if (kvm_enabled())
471 return;
473 #ifdef USE_STATIC_CODE_GEN_BUFFER
474 code_gen_buffer = static_code_gen_buffer;
475 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
476 map_exec(code_gen_buffer, code_gen_buffer_size);
477 #else
478 code_gen_buffer_size = tb_size;
479 if (code_gen_buffer_size == 0) {
480 #if defined(CONFIG_USER_ONLY)
481 /* in user mode, phys_ram_size is not meaningful */
482 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
483 #else
484 /* XXX: needs adjustments */
485 code_gen_buffer_size = (unsigned long)(ram_size / 4);
486 #endif
488 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
489 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
490 /* The code gen buffer location may have constraints depending on
491 the host cpu and OS */
492 #if defined(__linux__)
494 int flags;
495 void *start = NULL;
497 flags = MAP_PRIVATE | MAP_ANONYMOUS;
498 #if defined(__x86_64__)
499 flags |= MAP_32BIT;
500 /* Cannot map more than that */
501 if (code_gen_buffer_size > (800 * 1024 * 1024))
502 code_gen_buffer_size = (800 * 1024 * 1024);
503 #elif defined(__sparc_v9__)
504 // Map the buffer below 2G, so we can use direct calls and branches
505 flags |= MAP_FIXED;
506 start = (void *) 0x60000000UL;
507 if (code_gen_buffer_size > (512 * 1024 * 1024))
508 code_gen_buffer_size = (512 * 1024 * 1024);
509 #elif defined(__arm__)
510 /* Map the buffer below 32M, so we can use direct calls and branches */
511 flags |= MAP_FIXED;
512 start = (void *) 0x01000000UL;
513 if (code_gen_buffer_size > 16 * 1024 * 1024)
514 code_gen_buffer_size = 16 * 1024 * 1024;
515 #elif defined(__s390x__)
516 /* Map the buffer so that we can use direct calls and branches. */
517 /* We have a +- 4GB range on the branches; leave some slop. */
518 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
519 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
521 start = (void *)0x90000000UL;
522 #endif
523 code_gen_buffer = mmap(start, code_gen_buffer_size,
524 PROT_WRITE | PROT_READ | PROT_EXEC,
525 flags, -1, 0);
526 if (code_gen_buffer == MAP_FAILED) {
527 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
528 exit(1);
531 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
532 || defined(__DragonFly__) || defined(__OpenBSD__)
534 int flags;
535 void *addr = NULL;
536 flags = MAP_PRIVATE | MAP_ANONYMOUS;
537 #if defined(__x86_64__)
538 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
539 * 0x40000000 is free */
540 flags |= MAP_FIXED;
541 addr = (void *)0x40000000;
542 /* Cannot map more than that */
543 if (code_gen_buffer_size > (800 * 1024 * 1024))
544 code_gen_buffer_size = (800 * 1024 * 1024);
545 #elif defined(__sparc_v9__)
546 // Map the buffer below 2G, so we can use direct calls and branches
547 flags |= MAP_FIXED;
548 addr = (void *) 0x60000000UL;
549 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
550 code_gen_buffer_size = (512 * 1024 * 1024);
552 #endif
553 code_gen_buffer = mmap(addr, code_gen_buffer_size,
554 PROT_WRITE | PROT_READ | PROT_EXEC,
555 flags, -1, 0);
556 if (code_gen_buffer == MAP_FAILED) {
557 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
558 exit(1);
561 #else
562 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
563 map_exec(code_gen_buffer, code_gen_buffer_size);
564 #endif
565 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
566 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
567 code_gen_buffer_max_size = code_gen_buffer_size -
568 (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
569 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
570 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
573 /* Must be called before using the QEMU cpus. 'tb_size' is the size
574 (in bytes) allocated to the translation buffer. Zero means default
575 size. */
576 void cpu_exec_init_all(unsigned long tb_size)
578 cpu_gen_init();
579 code_gen_alloc(tb_size);
580 code_gen_ptr = code_gen_buffer;
581 page_init();
582 #if !defined(CONFIG_USER_ONLY)
583 io_mem_init();
584 #endif
585 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
586 /* There's no guest base to take into account, so go ahead and
587 initialize the prologue now. */
588 tcg_prologue_init(&tcg_ctx);
589 #endif
592 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
594 static int cpu_common_post_load(void *opaque, int version_id)
596 CPUState *env = opaque;
598 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
599 version_id is increased. */
600 env->interrupt_request &= ~0x01;
601 tlb_flush(env, 1);
603 return 0;
606 static const VMStateDescription vmstate_cpu_common = {
607 .name = "cpu_common",
608 .version_id = 1,
609 .minimum_version_id = 1,
610 .minimum_version_id_old = 1,
611 .post_load = cpu_common_post_load,
612 .fields = (VMStateField []) {
613 VMSTATE_UINT32(halted, CPUState),
614 VMSTATE_UINT32(interrupt_request, CPUState),
615 VMSTATE_END_OF_LIST()
618 #endif
620 CPUState *qemu_get_cpu(int cpu)
622 CPUState *env = first_cpu;
624 while (env) {
625 if (env->cpu_index == cpu)
626 break;
627 env = env->next_cpu;
630 return env;
633 void cpu_exec_init(CPUState *env)
635 CPUState **penv;
636 int cpu_index;
638 #if defined(CONFIG_USER_ONLY)
639 cpu_list_lock();
640 #endif
641 env->next_cpu = NULL;
642 penv = &first_cpu;
643 cpu_index = 0;
644 while (*penv != NULL) {
645 penv = &(*penv)->next_cpu;
646 cpu_index++;
648 env->cpu_index = cpu_index;
649 env->numa_node = 0;
650 QTAILQ_INIT(&env->breakpoints);
651 QTAILQ_INIT(&env->watchpoints);
652 #ifndef CONFIG_USER_ONLY
653 env->thread_id = qemu_get_thread_id();
654 #endif
655 *penv = env;
656 #if defined(CONFIG_USER_ONLY)
657 cpu_list_unlock();
658 #endif
659 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
660 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
661 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
662 cpu_save, cpu_load, env);
663 #endif
666 /* Allocate a new translation block. Flush the translation buffer if
667 too many translation blocks or too much generated code. */
668 static TranslationBlock *tb_alloc(target_ulong pc)
670 TranslationBlock *tb;
672 if (nb_tbs >= code_gen_max_blocks ||
673 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
674 return NULL;
675 tb = &tbs[nb_tbs++];
676 tb->pc = pc;
677 tb->cflags = 0;
678 return tb;
681 void tb_free(TranslationBlock *tb)
683 /* In practice this is mostly used for single use temporary TB
684 Ignore the hard cases and just back up if this TB happens to
685 be the last one generated. */
686 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
687 code_gen_ptr = tb->tc_ptr;
688 nb_tbs--;
692 static inline void invalidate_page_bitmap(PageDesc *p)
694 if (p->code_bitmap) {
695 qemu_free(p->code_bitmap);
696 p->code_bitmap = NULL;
698 p->code_write_count = 0;
701 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
703 static void page_flush_tb_1 (int level, void **lp)
705 int i;
707 if (*lp == NULL) {
708 return;
710 if (level == 0) {
711 PageDesc *pd = *lp;
712 for (i = 0; i < L2_SIZE; ++i) {
713 pd[i].first_tb = NULL;
714 invalidate_page_bitmap(pd + i);
716 } else {
717 void **pp = *lp;
718 for (i = 0; i < L2_SIZE; ++i) {
719 page_flush_tb_1 (level - 1, pp + i);
724 static void page_flush_tb(void)
726 int i;
727 for (i = 0; i < V_L1_SIZE; i++) {
728 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
732 /* flush all the translation blocks */
733 /* XXX: tb_flush is currently not thread safe */
734 void tb_flush(CPUState *env1)
736 CPUState *env;
737 #if defined(DEBUG_FLUSH)
738 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
739 (unsigned long)(code_gen_ptr - code_gen_buffer),
740 nb_tbs, nb_tbs > 0 ?
741 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
742 #endif
743 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
744 cpu_abort(env1, "Internal error: code buffer overflow\n");
746 nb_tbs = 0;
748 for(env = first_cpu; env != NULL; env = env->next_cpu) {
749 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
752 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
753 page_flush_tb();
755 code_gen_ptr = code_gen_buffer;
756 /* XXX: flush processor icache at this point if cache flush is
757 expensive */
758 tb_flush_count++;
761 #ifdef DEBUG_TB_CHECK
763 static void tb_invalidate_check(target_ulong address)
765 TranslationBlock *tb;
766 int i;
767 address &= TARGET_PAGE_MASK;
768 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
769 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
770 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
771 address >= tb->pc + tb->size)) {
772 printf("ERROR invalidate: address=" TARGET_FMT_lx
773 " PC=%08lx size=%04x\n",
774 address, (long)tb->pc, tb->size);
780 /* verify that all the pages have correct rights for code */
781 static void tb_page_check(void)
783 TranslationBlock *tb;
784 int i, flags1, flags2;
786 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
787 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
788 flags1 = page_get_flags(tb->pc);
789 flags2 = page_get_flags(tb->pc + tb->size - 1);
790 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
791 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
792 (long)tb->pc, tb->size, flags1, flags2);
798 #endif
800 /* invalidate one TB */
801 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
802 int next_offset)
804 TranslationBlock *tb1;
805 for(;;) {
806 tb1 = *ptb;
807 if (tb1 == tb) {
808 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
809 break;
811 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
815 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
817 TranslationBlock *tb1;
818 unsigned int n1;
820 for(;;) {
821 tb1 = *ptb;
822 n1 = (long)tb1 & 3;
823 tb1 = (TranslationBlock *)((long)tb1 & ~3);
824 if (tb1 == tb) {
825 *ptb = tb1->page_next[n1];
826 break;
828 ptb = &tb1->page_next[n1];
832 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
834 TranslationBlock *tb1, **ptb;
835 unsigned int n1;
837 ptb = &tb->jmp_next[n];
838 tb1 = *ptb;
839 if (tb1) {
840 /* find tb(n) in circular list */
841 for(;;) {
842 tb1 = *ptb;
843 n1 = (long)tb1 & 3;
844 tb1 = (TranslationBlock *)((long)tb1 & ~3);
845 if (n1 == n && tb1 == tb)
846 break;
847 if (n1 == 2) {
848 ptb = &tb1->jmp_first;
849 } else {
850 ptb = &tb1->jmp_next[n1];
853 /* now we can suppress tb(n) from the list */
854 *ptb = tb->jmp_next[n];
856 tb->jmp_next[n] = NULL;
860 /* reset the jump entry 'n' of a TB so that it is not chained to
861 another TB */
862 static inline void tb_reset_jump(TranslationBlock *tb, int n)
864 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
867 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
869 CPUState *env;
870 PageDesc *p;
871 unsigned int h, n1;
872 tb_page_addr_t phys_pc;
873 TranslationBlock *tb1, *tb2;
875 /* remove the TB from the hash list */
876 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
877 h = tb_phys_hash_func(phys_pc);
878 tb_remove(&tb_phys_hash[h], tb,
879 offsetof(TranslationBlock, phys_hash_next));
881 /* remove the TB from the page list */
882 if (tb->page_addr[0] != page_addr) {
883 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
884 tb_page_remove(&p->first_tb, tb);
885 invalidate_page_bitmap(p);
887 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
888 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
889 tb_page_remove(&p->first_tb, tb);
890 invalidate_page_bitmap(p);
893 tb_invalidated_flag = 1;
895 /* remove the TB from the hash list */
896 h = tb_jmp_cache_hash_func(tb->pc);
897 for(env = first_cpu; env != NULL; env = env->next_cpu) {
898 if (env->tb_jmp_cache[h] == tb)
899 env->tb_jmp_cache[h] = NULL;
902 /* suppress this TB from the two jump lists */
903 tb_jmp_remove(tb, 0);
904 tb_jmp_remove(tb, 1);
906 /* suppress any remaining jumps to this TB */
907 tb1 = tb->jmp_first;
908 for(;;) {
909 n1 = (long)tb1 & 3;
910 if (n1 == 2)
911 break;
912 tb1 = (TranslationBlock *)((long)tb1 & ~3);
913 tb2 = tb1->jmp_next[n1];
914 tb_reset_jump(tb1, n1);
915 tb1->jmp_next[n1] = NULL;
916 tb1 = tb2;
918 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
920 tb_phys_invalidate_count++;
923 static inline void set_bits(uint8_t *tab, int start, int len)
925 int end, mask, end1;
927 end = start + len;
928 tab += start >> 3;
929 mask = 0xff << (start & 7);
930 if ((start & ~7) == (end & ~7)) {
931 if (start < end) {
932 mask &= ~(0xff << (end & 7));
933 *tab |= mask;
935 } else {
936 *tab++ |= mask;
937 start = (start + 8) & ~7;
938 end1 = end & ~7;
939 while (start < end1) {
940 *tab++ = 0xff;
941 start += 8;
943 if (start < end) {
944 mask = ~(0xff << (end & 7));
945 *tab |= mask;
950 static void build_page_bitmap(PageDesc *p)
952 int n, tb_start, tb_end;
953 TranslationBlock *tb;
955 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
957 tb = p->first_tb;
958 while (tb != NULL) {
959 n = (long)tb & 3;
960 tb = (TranslationBlock *)((long)tb & ~3);
961 /* NOTE: this is subtle as a TB may span two physical pages */
962 if (n == 0) {
963 /* NOTE: tb_end may be after the end of the page, but
964 it is not a problem */
965 tb_start = tb->pc & ~TARGET_PAGE_MASK;
966 tb_end = tb_start + tb->size;
967 if (tb_end > TARGET_PAGE_SIZE)
968 tb_end = TARGET_PAGE_SIZE;
969 } else {
970 tb_start = 0;
971 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
973 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
974 tb = tb->page_next[n];
978 TranslationBlock *tb_gen_code(CPUState *env,
979 target_ulong pc, target_ulong cs_base,
980 int flags, int cflags)
982 TranslationBlock *tb;
983 uint8_t *tc_ptr;
984 tb_page_addr_t phys_pc, phys_page2;
985 target_ulong virt_page2;
986 int code_gen_size;
988 phys_pc = get_page_addr_code(env, pc);
989 tb = tb_alloc(pc);
990 if (!tb) {
991 /* flush must be done */
992 tb_flush(env);
993 /* cannot fail at this point */
994 tb = tb_alloc(pc);
995 /* Don't forget to invalidate previous TB info. */
996 tb_invalidated_flag = 1;
998 tc_ptr = code_gen_ptr;
999 tb->tc_ptr = tc_ptr;
1000 tb->cs_base = cs_base;
1001 tb->flags = flags;
1002 tb->cflags = cflags;
1003 cpu_gen_code(env, tb, &code_gen_size);
1004 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1006 /* check next page if needed */
1007 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1008 phys_page2 = -1;
1009 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1010 phys_page2 = get_page_addr_code(env, virt_page2);
1012 tb_link_page(tb, phys_pc, phys_page2);
1013 return tb;
1016 /* invalidate all TBs which intersect with the target physical page
1017 starting in range [start;end[. NOTE: start and end must refer to
1018 the same physical page. 'is_cpu_write_access' should be true if called
1019 from a real cpu write access: the virtual CPU will exit the current
1020 TB if code is modified inside this TB. */
1021 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1022 int is_cpu_write_access)
1024 TranslationBlock *tb, *tb_next, *saved_tb;
1025 CPUState *env = cpu_single_env;
1026 tb_page_addr_t tb_start, tb_end;
1027 PageDesc *p;
1028 int n;
1029 #ifdef TARGET_HAS_PRECISE_SMC
1030 int current_tb_not_found = is_cpu_write_access;
1031 TranslationBlock *current_tb = NULL;
1032 int current_tb_modified = 0;
1033 target_ulong current_pc = 0;
1034 target_ulong current_cs_base = 0;
1035 int current_flags = 0;
1036 #endif /* TARGET_HAS_PRECISE_SMC */
1038 p = page_find(start >> TARGET_PAGE_BITS);
1039 if (!p)
1040 return;
1041 if (!p->code_bitmap &&
1042 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1043 is_cpu_write_access) {
1044 /* build code bitmap */
1045 build_page_bitmap(p);
1048 /* we remove all the TBs in the range [start, end[ */
1049 /* XXX: see if in some cases it could be faster to invalidate all the code */
1050 tb = p->first_tb;
1051 while (tb != NULL) {
1052 n = (long)tb & 3;
1053 tb = (TranslationBlock *)((long)tb & ~3);
1054 tb_next = tb->page_next[n];
1055 /* NOTE: this is subtle as a TB may span two physical pages */
1056 if (n == 0) {
1057 /* NOTE: tb_end may be after the end of the page, but
1058 it is not a problem */
1059 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1060 tb_end = tb_start + tb->size;
1061 } else {
1062 tb_start = tb->page_addr[1];
1063 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1065 if (!(tb_end <= start || tb_start >= end)) {
1066 #ifdef TARGET_HAS_PRECISE_SMC
1067 if (current_tb_not_found) {
1068 current_tb_not_found = 0;
1069 current_tb = NULL;
1070 if (env->mem_io_pc) {
1071 /* now we have a real cpu fault */
1072 current_tb = tb_find_pc(env->mem_io_pc);
1075 if (current_tb == tb &&
1076 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1077 /* If we are modifying the current TB, we must stop
1078 its execution. We could be more precise by checking
1079 that the modification is after the current PC, but it
1080 would require a specialized function to partially
1081 restore the CPU state */
1083 current_tb_modified = 1;
1084 cpu_restore_state(current_tb, env, env->mem_io_pc);
1085 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1086 &current_flags);
1088 #endif /* TARGET_HAS_PRECISE_SMC */
1089 /* we need to do that to handle the case where a signal
1090 occurs while doing tb_phys_invalidate() */
1091 saved_tb = NULL;
1092 if (env) {
1093 saved_tb = env->current_tb;
1094 env->current_tb = NULL;
1096 tb_phys_invalidate(tb, -1);
1097 if (env) {
1098 env->current_tb = saved_tb;
1099 if (env->interrupt_request && env->current_tb)
1100 cpu_interrupt(env, env->interrupt_request);
1103 tb = tb_next;
1105 #if !defined(CONFIG_USER_ONLY)
1106 /* if no code remaining, no need to continue to use slow writes */
1107 if (!p->first_tb) {
1108 invalidate_page_bitmap(p);
1109 if (is_cpu_write_access) {
1110 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1113 #endif
1114 #ifdef TARGET_HAS_PRECISE_SMC
1115 if (current_tb_modified) {
1116 /* we generate a block containing just the instruction
1117 modifying the memory. It will ensure that it cannot modify
1118 itself */
1119 env->current_tb = NULL;
1120 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1121 cpu_resume_from_signal(env, NULL);
1123 #endif
1126 /* len must be <= 8 and start must be a multiple of len */
1127 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1129 PageDesc *p;
1130 int offset, b;
1131 #if 0
1132 if (1) {
1133 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1134 cpu_single_env->mem_io_vaddr, len,
1135 cpu_single_env->eip,
1136 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1138 #endif
1139 p = page_find(start >> TARGET_PAGE_BITS);
1140 if (!p)
1141 return;
1142 if (p->code_bitmap) {
1143 offset = start & ~TARGET_PAGE_MASK;
1144 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1145 if (b & ((1 << len) - 1))
1146 goto do_invalidate;
1147 } else {
1148 do_invalidate:
1149 tb_invalidate_phys_page_range(start, start + len, 1);
1153 #if !defined(CONFIG_SOFTMMU)
1154 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1155 unsigned long pc, void *puc)
1157 TranslationBlock *tb;
1158 PageDesc *p;
1159 int n;
1160 #ifdef TARGET_HAS_PRECISE_SMC
1161 TranslationBlock *current_tb = NULL;
1162 CPUState *env = cpu_single_env;
1163 int current_tb_modified = 0;
1164 target_ulong current_pc = 0;
1165 target_ulong current_cs_base = 0;
1166 int current_flags = 0;
1167 #endif
1169 addr &= TARGET_PAGE_MASK;
1170 p = page_find(addr >> TARGET_PAGE_BITS);
1171 if (!p)
1172 return;
1173 tb = p->first_tb;
1174 #ifdef TARGET_HAS_PRECISE_SMC
1175 if (tb && pc != 0) {
1176 current_tb = tb_find_pc(pc);
1178 #endif
1179 while (tb != NULL) {
1180 n = (long)tb & 3;
1181 tb = (TranslationBlock *)((long)tb & ~3);
1182 #ifdef TARGET_HAS_PRECISE_SMC
1183 if (current_tb == tb &&
1184 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1185 /* If we are modifying the current TB, we must stop
1186 its execution. We could be more precise by checking
1187 that the modification is after the current PC, but it
1188 would require a specialized function to partially
1189 restore the CPU state */
1191 current_tb_modified = 1;
1192 cpu_restore_state(current_tb, env, pc);
1193 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1194 &current_flags);
1196 #endif /* TARGET_HAS_PRECISE_SMC */
1197 tb_phys_invalidate(tb, addr);
1198 tb = tb->page_next[n];
1200 p->first_tb = NULL;
1201 #ifdef TARGET_HAS_PRECISE_SMC
1202 if (current_tb_modified) {
1203 /* we generate a block containing just the instruction
1204 modifying the memory. It will ensure that it cannot modify
1205 itself */
1206 env->current_tb = NULL;
1207 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1208 cpu_resume_from_signal(env, puc);
1210 #endif
1212 #endif
1214 /* add the tb in the target page and protect it if necessary */
1215 static inline void tb_alloc_page(TranslationBlock *tb,
1216 unsigned int n, tb_page_addr_t page_addr)
1218 PageDesc *p;
1219 TranslationBlock *last_first_tb;
1221 tb->page_addr[n] = page_addr;
1222 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1223 tb->page_next[n] = p->first_tb;
1224 last_first_tb = p->first_tb;
1225 p->first_tb = (TranslationBlock *)((long)tb | n);
1226 invalidate_page_bitmap(p);
1228 #if defined(TARGET_HAS_SMC) || 1
1230 #if defined(CONFIG_USER_ONLY)
1231 if (p->flags & PAGE_WRITE) {
1232 target_ulong addr;
1233 PageDesc *p2;
1234 int prot;
1236 /* force the host page as non writable (writes will have a
1237 page fault + mprotect overhead) */
1238 page_addr &= qemu_host_page_mask;
1239 prot = 0;
1240 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1241 addr += TARGET_PAGE_SIZE) {
1243 p2 = page_find (addr >> TARGET_PAGE_BITS);
1244 if (!p2)
1245 continue;
1246 prot |= p2->flags;
1247 p2->flags &= ~PAGE_WRITE;
1249 mprotect(g2h(page_addr), qemu_host_page_size,
1250 (prot & PAGE_BITS) & ~PAGE_WRITE);
1251 #ifdef DEBUG_TB_INVALIDATE
1252 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1253 page_addr);
1254 #endif
1256 #else
1257 /* if some code is already present, then the pages are already
1258 protected. So we handle the case where only the first TB is
1259 allocated in a physical page */
1260 if (!last_first_tb) {
1261 tlb_protect_code(page_addr);
1263 #endif
1265 #endif /* TARGET_HAS_SMC */
1268 /* add a new TB and link it to the physical page tables. phys_page2 is
1269 (-1) to indicate that only one page contains the TB. */
1270 void tb_link_page(TranslationBlock *tb,
1271 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1273 unsigned int h;
1274 TranslationBlock **ptb;
1276 /* Grab the mmap lock to stop another thread invalidating this TB
1277 before we are done. */
1278 mmap_lock();
1279 /* add in the physical hash table */
1280 h = tb_phys_hash_func(phys_pc);
1281 ptb = &tb_phys_hash[h];
1282 tb->phys_hash_next = *ptb;
1283 *ptb = tb;
1285 /* add in the page list */
1286 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1287 if (phys_page2 != -1)
1288 tb_alloc_page(tb, 1, phys_page2);
1289 else
1290 tb->page_addr[1] = -1;
1292 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1293 tb->jmp_next[0] = NULL;
1294 tb->jmp_next[1] = NULL;
1296 /* init original jump addresses */
1297 if (tb->tb_next_offset[0] != 0xffff)
1298 tb_reset_jump(tb, 0);
1299 if (tb->tb_next_offset[1] != 0xffff)
1300 tb_reset_jump(tb, 1);
1302 #ifdef DEBUG_TB_CHECK
1303 tb_page_check();
1304 #endif
1305 mmap_unlock();
1308 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1309 tb[1].tc_ptr. Return NULL if not found */
1310 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1312 int m_min, m_max, m;
1313 unsigned long v;
1314 TranslationBlock *tb;
1316 if (nb_tbs <= 0)
1317 return NULL;
1318 if (tc_ptr < (unsigned long)code_gen_buffer ||
1319 tc_ptr >= (unsigned long)code_gen_ptr)
1320 return NULL;
1321 /* binary search (cf Knuth) */
1322 m_min = 0;
1323 m_max = nb_tbs - 1;
1324 while (m_min <= m_max) {
1325 m = (m_min + m_max) >> 1;
1326 tb = &tbs[m];
1327 v = (unsigned long)tb->tc_ptr;
1328 if (v == tc_ptr)
1329 return tb;
1330 else if (tc_ptr < v) {
1331 m_max = m - 1;
1332 } else {
1333 m_min = m + 1;
1336 return &tbs[m_max];
1339 static void tb_reset_jump_recursive(TranslationBlock *tb);
1341 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1343 TranslationBlock *tb1, *tb_next, **ptb;
1344 unsigned int n1;
1346 tb1 = tb->jmp_next[n];
1347 if (tb1 != NULL) {
1348 /* find head of list */
1349 for(;;) {
1350 n1 = (long)tb1 & 3;
1351 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1352 if (n1 == 2)
1353 break;
1354 tb1 = tb1->jmp_next[n1];
1356 /* we are now sure now that tb jumps to tb1 */
1357 tb_next = tb1;
1359 /* remove tb from the jmp_first list */
1360 ptb = &tb_next->jmp_first;
1361 for(;;) {
1362 tb1 = *ptb;
1363 n1 = (long)tb1 & 3;
1364 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1365 if (n1 == n && tb1 == tb)
1366 break;
1367 ptb = &tb1->jmp_next[n1];
1369 *ptb = tb->jmp_next[n];
1370 tb->jmp_next[n] = NULL;
1372 /* suppress the jump to next tb in generated code */
1373 tb_reset_jump(tb, n);
1375 /* suppress jumps in the tb on which we could have jumped */
1376 tb_reset_jump_recursive(tb_next);
1380 static void tb_reset_jump_recursive(TranslationBlock *tb)
1382 tb_reset_jump_recursive2(tb, 0);
1383 tb_reset_jump_recursive2(tb, 1);
1386 #if defined(TARGET_HAS_ICE)
1387 #if defined(CONFIG_USER_ONLY)
1388 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1390 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1392 #else
1393 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1395 target_phys_addr_t addr;
1396 target_ulong pd;
1397 ram_addr_t ram_addr;
1398 PhysPageDesc *p;
1400 addr = cpu_get_phys_page_debug(env, pc);
1401 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1402 if (!p) {
1403 pd = IO_MEM_UNASSIGNED;
1404 } else {
1405 pd = p->phys_offset;
1407 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1408 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1410 #endif
1411 #endif /* TARGET_HAS_ICE */
1413 #if defined(CONFIG_USER_ONLY)
1414 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1419 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1420 int flags, CPUWatchpoint **watchpoint)
1422 return -ENOSYS;
1424 #else
1425 /* Add a watchpoint. */
1426 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1427 int flags, CPUWatchpoint **watchpoint)
1429 target_ulong len_mask = ~(len - 1);
1430 CPUWatchpoint *wp;
1432 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1433 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1434 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1435 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1436 return -EINVAL;
1438 wp = qemu_malloc(sizeof(*wp));
1440 wp->vaddr = addr;
1441 wp->len_mask = len_mask;
1442 wp->flags = flags;
1444 /* keep all GDB-injected watchpoints in front */
1445 if (flags & BP_GDB)
1446 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1447 else
1448 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1450 tlb_flush_page(env, addr);
1452 if (watchpoint)
1453 *watchpoint = wp;
1454 return 0;
1457 /* Remove a specific watchpoint. */
1458 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1459 int flags)
1461 target_ulong len_mask = ~(len - 1);
1462 CPUWatchpoint *wp;
1464 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1465 if (addr == wp->vaddr && len_mask == wp->len_mask
1466 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1467 cpu_watchpoint_remove_by_ref(env, wp);
1468 return 0;
1471 return -ENOENT;
1474 /* Remove a specific watchpoint by reference. */
1475 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1477 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1479 tlb_flush_page(env, watchpoint->vaddr);
1481 qemu_free(watchpoint);
1484 /* Remove all matching watchpoints. */
1485 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1487 CPUWatchpoint *wp, *next;
1489 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1490 if (wp->flags & mask)
1491 cpu_watchpoint_remove_by_ref(env, wp);
1494 #endif
1496 /* Add a breakpoint. */
1497 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1498 CPUBreakpoint **breakpoint)
1500 #if defined(TARGET_HAS_ICE)
1501 CPUBreakpoint *bp;
1503 bp = qemu_malloc(sizeof(*bp));
1505 bp->pc = pc;
1506 bp->flags = flags;
1508 /* keep all GDB-injected breakpoints in front */
1509 if (flags & BP_GDB)
1510 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1511 else
1512 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1514 breakpoint_invalidate(env, pc);
1516 if (breakpoint)
1517 *breakpoint = bp;
1518 return 0;
1519 #else
1520 return -ENOSYS;
1521 #endif
1524 /* Remove a specific breakpoint. */
1525 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1527 #if defined(TARGET_HAS_ICE)
1528 CPUBreakpoint *bp;
1530 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1531 if (bp->pc == pc && bp->flags == flags) {
1532 cpu_breakpoint_remove_by_ref(env, bp);
1533 return 0;
1536 return -ENOENT;
1537 #else
1538 return -ENOSYS;
1539 #endif
1542 /* Remove a specific breakpoint by reference. */
1543 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1545 #if defined(TARGET_HAS_ICE)
1546 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1548 breakpoint_invalidate(env, breakpoint->pc);
1550 qemu_free(breakpoint);
1551 #endif
1554 /* Remove all matching breakpoints. */
1555 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1557 #if defined(TARGET_HAS_ICE)
1558 CPUBreakpoint *bp, *next;
1560 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1561 if (bp->flags & mask)
1562 cpu_breakpoint_remove_by_ref(env, bp);
1564 #endif
1567 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1568 CPU loop after each instruction */
1569 void cpu_single_step(CPUState *env, int enabled)
1571 #if defined(TARGET_HAS_ICE)
1572 if (env->singlestep_enabled != enabled) {
1573 env->singlestep_enabled = enabled;
1574 if (kvm_enabled())
1575 kvm_update_guest_debug(env, 0);
1576 else {
1577 /* must flush all the translated code to avoid inconsistencies */
1578 /* XXX: only flush what is necessary */
1579 tb_flush(env);
1582 #endif
1585 /* enable or disable low levels log */
1586 void cpu_set_log(int log_flags)
1588 loglevel = log_flags;
1589 if (loglevel && !logfile) {
1590 logfile = fopen(logfilename, log_append ? "a" : "w");
1591 if (!logfile) {
1592 perror(logfilename);
1593 _exit(1);
1595 #if !defined(CONFIG_SOFTMMU)
1596 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1598 static char logfile_buf[4096];
1599 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1601 #elif !defined(_WIN32)
1602 /* Win32 doesn't support line-buffering and requires size >= 2 */
1603 setvbuf(logfile, NULL, _IOLBF, 0);
1604 #endif
1605 log_append = 1;
1607 if (!loglevel && logfile) {
1608 fclose(logfile);
1609 logfile = NULL;
1613 void cpu_set_log_filename(const char *filename)
1615 logfilename = strdup(filename);
1616 if (logfile) {
1617 fclose(logfile);
1618 logfile = NULL;
1620 cpu_set_log(loglevel);
1623 static void cpu_unlink_tb(CPUState *env)
1625 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1626 problem and hope the cpu will stop of its own accord. For userspace
1627 emulation this often isn't actually as bad as it sounds. Often
1628 signals are used primarily to interrupt blocking syscalls. */
1629 TranslationBlock *tb;
1630 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1632 spin_lock(&interrupt_lock);
1633 tb = env->current_tb;
1634 /* if the cpu is currently executing code, we must unlink it and
1635 all the potentially executing TB */
1636 if (tb) {
1637 env->current_tb = NULL;
1638 tb_reset_jump_recursive(tb);
1640 spin_unlock(&interrupt_lock);
1643 #ifndef CONFIG_USER_ONLY
1644 /* mask must never be zero, except for A20 change call */
1645 static void tcg_handle_interrupt(CPUState *env, int mask)
1647 int old_mask;
1649 old_mask = env->interrupt_request;
1650 env->interrupt_request |= mask;
1653 * If called from iothread context, wake the target cpu in
1654 * case its halted.
1656 if (!qemu_cpu_is_self(env)) {
1657 qemu_cpu_kick(env);
1658 return;
1661 if (use_icount) {
1662 env->icount_decr.u16.high = 0xffff;
1663 if (!can_do_io(env)
1664 && (mask & ~old_mask) != 0) {
1665 cpu_abort(env, "Raised interrupt while not in I/O function");
1667 } else {
1668 cpu_unlink_tb(env);
1672 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1674 #else /* CONFIG_USER_ONLY */
1676 void cpu_interrupt(CPUState *env, int mask)
1678 env->interrupt_request |= mask;
1679 cpu_unlink_tb(env);
1681 #endif /* CONFIG_USER_ONLY */
1683 void cpu_reset_interrupt(CPUState *env, int mask)
1685 env->interrupt_request &= ~mask;
1688 void cpu_exit(CPUState *env)
1690 env->exit_request = 1;
1691 cpu_unlink_tb(env);
1694 const CPULogItem cpu_log_items[] = {
1695 { CPU_LOG_TB_OUT_ASM, "out_asm",
1696 "show generated host assembly code for each compiled TB" },
1697 { CPU_LOG_TB_IN_ASM, "in_asm",
1698 "show target assembly code for each compiled TB" },
1699 { CPU_LOG_TB_OP, "op",
1700 "show micro ops for each compiled TB" },
1701 { CPU_LOG_TB_OP_OPT, "op_opt",
1702 "show micro ops "
1703 #ifdef TARGET_I386
1704 "before eflags optimization and "
1705 #endif
1706 "after liveness analysis" },
1707 { CPU_LOG_INT, "int",
1708 "show interrupts/exceptions in short format" },
1709 { CPU_LOG_EXEC, "exec",
1710 "show trace before each executed TB (lots of logs)" },
1711 { CPU_LOG_TB_CPU, "cpu",
1712 "show CPU state before block translation" },
1713 #ifdef TARGET_I386
1714 { CPU_LOG_PCALL, "pcall",
1715 "show protected mode far calls/returns/exceptions" },
1716 { CPU_LOG_RESET, "cpu_reset",
1717 "show CPU state before CPU resets" },
1718 #endif
1719 #ifdef DEBUG_IOPORT
1720 { CPU_LOG_IOPORT, "ioport",
1721 "show all i/o ports accesses" },
1722 #endif
1723 { 0, NULL, NULL },
1726 #ifndef CONFIG_USER_ONLY
1727 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1728 = QLIST_HEAD_INITIALIZER(memory_client_list);
1730 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1731 ram_addr_t size,
1732 ram_addr_t phys_offset,
1733 bool log_dirty)
1735 CPUPhysMemoryClient *client;
1736 QLIST_FOREACH(client, &memory_client_list, list) {
1737 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1741 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1742 target_phys_addr_t end)
1744 CPUPhysMemoryClient *client;
1745 QLIST_FOREACH(client, &memory_client_list, list) {
1746 int r = client->sync_dirty_bitmap(client, start, end);
1747 if (r < 0)
1748 return r;
1750 return 0;
1753 static int cpu_notify_migration_log(int enable)
1755 CPUPhysMemoryClient *client;
1756 QLIST_FOREACH(client, &memory_client_list, list) {
1757 int r = client->migration_log(client, enable);
1758 if (r < 0)
1759 return r;
1761 return 0;
1764 struct last_map {
1765 target_phys_addr_t start_addr;
1766 ram_addr_t size;
1767 ram_addr_t phys_offset;
1770 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1771 * address. Each intermediate table provides the next L2_BITs of guest
1772 * physical address space. The number of levels vary based on host and
1773 * guest configuration, making it efficient to build the final guest
1774 * physical address by seeding the L1 offset and shifting and adding in
1775 * each L2 offset as we recurse through them. */
1776 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1777 void **lp, target_phys_addr_t addr,
1778 struct last_map *map)
1780 int i;
1782 if (*lp == NULL) {
1783 return;
1785 if (level == 0) {
1786 PhysPageDesc *pd = *lp;
1787 addr <<= L2_BITS + TARGET_PAGE_BITS;
1788 for (i = 0; i < L2_SIZE; ++i) {
1789 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1790 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1792 if (map->size &&
1793 start_addr == map->start_addr + map->size &&
1794 pd[i].phys_offset == map->phys_offset + map->size) {
1796 map->size += TARGET_PAGE_SIZE;
1797 continue;
1798 } else if (map->size) {
1799 client->set_memory(client, map->start_addr,
1800 map->size, map->phys_offset, false);
1803 map->start_addr = start_addr;
1804 map->size = TARGET_PAGE_SIZE;
1805 map->phys_offset = pd[i].phys_offset;
1808 } else {
1809 void **pp = *lp;
1810 for (i = 0; i < L2_SIZE; ++i) {
1811 phys_page_for_each_1(client, level - 1, pp + i,
1812 (addr << L2_BITS) | i, map);
1817 static void phys_page_for_each(CPUPhysMemoryClient *client)
1819 int i;
1820 struct last_map map = { };
1822 for (i = 0; i < P_L1_SIZE; ++i) {
1823 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1824 l1_phys_map + i, i, &map);
1826 if (map.size) {
1827 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1828 false);
1832 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1834 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1835 phys_page_for_each(client);
1838 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1840 QLIST_REMOVE(client, list);
1842 #endif
1844 static int cmp1(const char *s1, int n, const char *s2)
1846 if (strlen(s2) != n)
1847 return 0;
1848 return memcmp(s1, s2, n) == 0;
1851 /* takes a comma separated list of log masks. Return 0 if error. */
1852 int cpu_str_to_log_mask(const char *str)
1854 const CPULogItem *item;
1855 int mask;
1856 const char *p, *p1;
1858 p = str;
1859 mask = 0;
1860 for(;;) {
1861 p1 = strchr(p, ',');
1862 if (!p1)
1863 p1 = p + strlen(p);
1864 if(cmp1(p,p1-p,"all")) {
1865 for(item = cpu_log_items; item->mask != 0; item++) {
1866 mask |= item->mask;
1868 } else {
1869 for(item = cpu_log_items; item->mask != 0; item++) {
1870 if (cmp1(p, p1 - p, item->name))
1871 goto found;
1873 return 0;
1875 found:
1876 mask |= item->mask;
1877 if (*p1 != ',')
1878 break;
1879 p = p1 + 1;
1881 return mask;
1884 void cpu_abort(CPUState *env, const char *fmt, ...)
1886 va_list ap;
1887 va_list ap2;
1889 va_start(ap, fmt);
1890 va_copy(ap2, ap);
1891 fprintf(stderr, "qemu: fatal: ");
1892 vfprintf(stderr, fmt, ap);
1893 fprintf(stderr, "\n");
1894 #ifdef TARGET_I386
1895 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1896 #else
1897 cpu_dump_state(env, stderr, fprintf, 0);
1898 #endif
1899 if (qemu_log_enabled()) {
1900 qemu_log("qemu: fatal: ");
1901 qemu_log_vprintf(fmt, ap2);
1902 qemu_log("\n");
1903 #ifdef TARGET_I386
1904 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1905 #else
1906 log_cpu_state(env, 0);
1907 #endif
1908 qemu_log_flush();
1909 qemu_log_close();
1911 va_end(ap2);
1912 va_end(ap);
1913 #if defined(CONFIG_USER_ONLY)
1915 struct sigaction act;
1916 sigfillset(&act.sa_mask);
1917 act.sa_handler = SIG_DFL;
1918 sigaction(SIGABRT, &act, NULL);
1920 #endif
1921 abort();
1924 CPUState *cpu_copy(CPUState *env)
1926 CPUState *new_env = cpu_init(env->cpu_model_str);
1927 CPUState *next_cpu = new_env->next_cpu;
1928 int cpu_index = new_env->cpu_index;
1929 #if defined(TARGET_HAS_ICE)
1930 CPUBreakpoint *bp;
1931 CPUWatchpoint *wp;
1932 #endif
1934 memcpy(new_env, env, sizeof(CPUState));
1936 /* Preserve chaining and index. */
1937 new_env->next_cpu = next_cpu;
1938 new_env->cpu_index = cpu_index;
1940 /* Clone all break/watchpoints.
1941 Note: Once we support ptrace with hw-debug register access, make sure
1942 BP_CPU break/watchpoints are handled correctly on clone. */
1943 QTAILQ_INIT(&env->breakpoints);
1944 QTAILQ_INIT(&env->watchpoints);
1945 #if defined(TARGET_HAS_ICE)
1946 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1947 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1949 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1950 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1951 wp->flags, NULL);
1953 #endif
1955 return new_env;
1958 #if !defined(CONFIG_USER_ONLY)
1960 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1962 unsigned int i;
1964 /* Discard jump cache entries for any tb which might potentially
1965 overlap the flushed page. */
1966 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1967 memset (&env->tb_jmp_cache[i], 0,
1968 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1970 i = tb_jmp_cache_hash_page(addr);
1971 memset (&env->tb_jmp_cache[i], 0,
1972 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1975 static CPUTLBEntry s_cputlb_empty_entry = {
1976 .addr_read = -1,
1977 .addr_write = -1,
1978 .addr_code = -1,
1979 .addend = -1,
1982 /* NOTE: if flush_global is true, also flush global entries (not
1983 implemented yet) */
1984 void tlb_flush(CPUState *env, int flush_global)
1986 int i;
1988 #if defined(DEBUG_TLB)
1989 printf("tlb_flush:\n");
1990 #endif
1991 /* must reset current TB so that interrupts cannot modify the
1992 links while we are modifying them */
1993 env->current_tb = NULL;
1995 for(i = 0; i < CPU_TLB_SIZE; i++) {
1996 int mmu_idx;
1997 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1998 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2002 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2004 env->tlb_flush_addr = -1;
2005 env->tlb_flush_mask = 0;
2006 tlb_flush_count++;
2009 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2011 if (addr == (tlb_entry->addr_read &
2012 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2013 addr == (tlb_entry->addr_write &
2014 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2015 addr == (tlb_entry->addr_code &
2016 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2017 *tlb_entry = s_cputlb_empty_entry;
2021 void tlb_flush_page(CPUState *env, target_ulong addr)
2023 int i;
2024 int mmu_idx;
2026 #if defined(DEBUG_TLB)
2027 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2028 #endif
2029 /* Check if we need to flush due to large pages. */
2030 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2031 #if defined(DEBUG_TLB)
2032 printf("tlb_flush_page: forced full flush ("
2033 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2034 env->tlb_flush_addr, env->tlb_flush_mask);
2035 #endif
2036 tlb_flush(env, 1);
2037 return;
2039 /* must reset current TB so that interrupts cannot modify the
2040 links while we are modifying them */
2041 env->current_tb = NULL;
2043 addr &= TARGET_PAGE_MASK;
2044 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2045 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2046 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2048 tlb_flush_jmp_cache(env, addr);
2051 /* update the TLBs so that writes to code in the virtual page 'addr'
2052 can be detected */
2053 static void tlb_protect_code(ram_addr_t ram_addr)
2055 cpu_physical_memory_reset_dirty(ram_addr,
2056 ram_addr + TARGET_PAGE_SIZE,
2057 CODE_DIRTY_FLAG);
2060 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2061 tested for self modifying code */
2062 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2063 target_ulong vaddr)
2065 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2068 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2069 unsigned long start, unsigned long length)
2071 unsigned long addr;
2072 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2073 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2074 if ((addr - start) < length) {
2075 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2080 /* Note: start and end must be within the same ram block. */
2081 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2082 int dirty_flags)
2084 CPUState *env;
2085 unsigned long length, start1;
2086 int i;
2088 start &= TARGET_PAGE_MASK;
2089 end = TARGET_PAGE_ALIGN(end);
2091 length = end - start;
2092 if (length == 0)
2093 return;
2094 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2096 /* we modify the TLB cache so that the dirty bit will be set again
2097 when accessing the range */
2098 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2099 /* Check that we don't span multiple blocks - this breaks the
2100 address comparisons below. */
2101 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2102 != (end - 1) - start) {
2103 abort();
2106 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2107 int mmu_idx;
2108 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2109 for(i = 0; i < CPU_TLB_SIZE; i++)
2110 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2111 start1, length);
2116 int cpu_physical_memory_set_dirty_tracking(int enable)
2118 int ret = 0;
2119 in_migration = enable;
2120 ret = cpu_notify_migration_log(!!enable);
2121 return ret;
2124 int cpu_physical_memory_get_dirty_tracking(void)
2126 return in_migration;
2129 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2130 target_phys_addr_t end_addr)
2132 int ret;
2134 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2135 return ret;
2138 int cpu_physical_log_start(target_phys_addr_t start_addr,
2139 ram_addr_t size)
2141 CPUPhysMemoryClient *client;
2142 QLIST_FOREACH(client, &memory_client_list, list) {
2143 if (client->log_start) {
2144 int r = client->log_start(client, start_addr, size);
2145 if (r < 0) {
2146 return r;
2150 return 0;
2153 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2154 ram_addr_t size)
2156 CPUPhysMemoryClient *client;
2157 QLIST_FOREACH(client, &memory_client_list, list) {
2158 if (client->log_stop) {
2159 int r = client->log_stop(client, start_addr, size);
2160 if (r < 0) {
2161 return r;
2165 return 0;
2168 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2170 ram_addr_t ram_addr;
2171 void *p;
2173 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2174 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2175 + tlb_entry->addend);
2176 ram_addr = qemu_ram_addr_from_host_nofail(p);
2177 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2178 tlb_entry->addr_write |= TLB_NOTDIRTY;
2183 /* update the TLB according to the current state of the dirty bits */
2184 void cpu_tlb_update_dirty(CPUState *env)
2186 int i;
2187 int mmu_idx;
2188 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2189 for(i = 0; i < CPU_TLB_SIZE; i++)
2190 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2194 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2196 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2197 tlb_entry->addr_write = vaddr;
2200 /* update the TLB corresponding to virtual page vaddr
2201 so that it is no longer dirty */
2202 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2204 int i;
2205 int mmu_idx;
2207 vaddr &= TARGET_PAGE_MASK;
2208 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2209 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2210 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2213 /* Our TLB does not support large pages, so remember the area covered by
2214 large pages and trigger a full TLB flush if these are invalidated. */
2215 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2216 target_ulong size)
2218 target_ulong mask = ~(size - 1);
2220 if (env->tlb_flush_addr == (target_ulong)-1) {
2221 env->tlb_flush_addr = vaddr & mask;
2222 env->tlb_flush_mask = mask;
2223 return;
2225 /* Extend the existing region to include the new page.
2226 This is a compromise between unnecessary flushes and the cost
2227 of maintaining a full variable size TLB. */
2228 mask &= env->tlb_flush_mask;
2229 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2230 mask <<= 1;
2232 env->tlb_flush_addr &= mask;
2233 env->tlb_flush_mask = mask;
2236 /* Add a new TLB entry. At most one entry for a given virtual address
2237 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2238 supplied size is only used by tlb_flush_page. */
2239 void tlb_set_page(CPUState *env, target_ulong vaddr,
2240 target_phys_addr_t paddr, int prot,
2241 int mmu_idx, target_ulong size)
2243 PhysPageDesc *p;
2244 unsigned long pd;
2245 unsigned int index;
2246 target_ulong address;
2247 target_ulong code_address;
2248 unsigned long addend;
2249 CPUTLBEntry *te;
2250 CPUWatchpoint *wp;
2251 target_phys_addr_t iotlb;
2253 assert(size >= TARGET_PAGE_SIZE);
2254 if (size != TARGET_PAGE_SIZE) {
2255 tlb_add_large_page(env, vaddr, size);
2257 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2258 if (!p) {
2259 pd = IO_MEM_UNASSIGNED;
2260 } else {
2261 pd = p->phys_offset;
2263 #if defined(DEBUG_TLB)
2264 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2265 " prot=%x idx=%d pd=0x%08lx\n",
2266 vaddr, paddr, prot, mmu_idx, pd);
2267 #endif
2269 address = vaddr;
2270 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2271 /* IO memory case (romd handled later) */
2272 address |= TLB_MMIO;
2274 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2275 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2276 /* Normal RAM. */
2277 iotlb = pd & TARGET_PAGE_MASK;
2278 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2279 iotlb |= IO_MEM_NOTDIRTY;
2280 else
2281 iotlb |= IO_MEM_ROM;
2282 } else {
2283 /* IO handlers are currently passed a physical address.
2284 It would be nice to pass an offset from the base address
2285 of that region. This would avoid having to special case RAM,
2286 and avoid full address decoding in every device.
2287 We can't use the high bits of pd for this because
2288 IO_MEM_ROMD uses these as a ram address. */
2289 iotlb = (pd & ~TARGET_PAGE_MASK);
2290 if (p) {
2291 iotlb += p->region_offset;
2292 } else {
2293 iotlb += paddr;
2297 code_address = address;
2298 /* Make accesses to pages with watchpoints go via the
2299 watchpoint trap routines. */
2300 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2301 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2302 /* Avoid trapping reads of pages with a write breakpoint. */
2303 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2304 iotlb = io_mem_watch + paddr;
2305 address |= TLB_MMIO;
2306 break;
2311 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2312 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2313 te = &env->tlb_table[mmu_idx][index];
2314 te->addend = addend - vaddr;
2315 if (prot & PAGE_READ) {
2316 te->addr_read = address;
2317 } else {
2318 te->addr_read = -1;
2321 if (prot & PAGE_EXEC) {
2322 te->addr_code = code_address;
2323 } else {
2324 te->addr_code = -1;
2326 if (prot & PAGE_WRITE) {
2327 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2328 (pd & IO_MEM_ROMD)) {
2329 /* Write access calls the I/O callback. */
2330 te->addr_write = address | TLB_MMIO;
2331 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2332 !cpu_physical_memory_is_dirty(pd)) {
2333 te->addr_write = address | TLB_NOTDIRTY;
2334 } else {
2335 te->addr_write = address;
2337 } else {
2338 te->addr_write = -1;
2342 #else
2344 void tlb_flush(CPUState *env, int flush_global)
2348 void tlb_flush_page(CPUState *env, target_ulong addr)
2353 * Walks guest process memory "regions" one by one
2354 * and calls callback function 'fn' for each region.
2357 struct walk_memory_regions_data
2359 walk_memory_regions_fn fn;
2360 void *priv;
2361 unsigned long start;
2362 int prot;
2365 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2366 abi_ulong end, int new_prot)
2368 if (data->start != -1ul) {
2369 int rc = data->fn(data->priv, data->start, end, data->prot);
2370 if (rc != 0) {
2371 return rc;
2375 data->start = (new_prot ? end : -1ul);
2376 data->prot = new_prot;
2378 return 0;
2381 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2382 abi_ulong base, int level, void **lp)
2384 abi_ulong pa;
2385 int i, rc;
2387 if (*lp == NULL) {
2388 return walk_memory_regions_end(data, base, 0);
2391 if (level == 0) {
2392 PageDesc *pd = *lp;
2393 for (i = 0; i < L2_SIZE; ++i) {
2394 int prot = pd[i].flags;
2396 pa = base | (i << TARGET_PAGE_BITS);
2397 if (prot != data->prot) {
2398 rc = walk_memory_regions_end(data, pa, prot);
2399 if (rc != 0) {
2400 return rc;
2404 } else {
2405 void **pp = *lp;
2406 for (i = 0; i < L2_SIZE; ++i) {
2407 pa = base | ((abi_ulong)i <<
2408 (TARGET_PAGE_BITS + L2_BITS * level));
2409 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2410 if (rc != 0) {
2411 return rc;
2416 return 0;
2419 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2421 struct walk_memory_regions_data data;
2422 unsigned long i;
2424 data.fn = fn;
2425 data.priv = priv;
2426 data.start = -1ul;
2427 data.prot = 0;
2429 for (i = 0; i < V_L1_SIZE; i++) {
2430 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2431 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2432 if (rc != 0) {
2433 return rc;
2437 return walk_memory_regions_end(&data, 0, 0);
2440 static int dump_region(void *priv, abi_ulong start,
2441 abi_ulong end, unsigned long prot)
2443 FILE *f = (FILE *)priv;
2445 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2446 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2447 start, end, end - start,
2448 ((prot & PAGE_READ) ? 'r' : '-'),
2449 ((prot & PAGE_WRITE) ? 'w' : '-'),
2450 ((prot & PAGE_EXEC) ? 'x' : '-'));
2452 return (0);
2455 /* dump memory mappings */
2456 void page_dump(FILE *f)
2458 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2459 "start", "end", "size", "prot");
2460 walk_memory_regions(f, dump_region);
2463 int page_get_flags(target_ulong address)
2465 PageDesc *p;
2467 p = page_find(address >> TARGET_PAGE_BITS);
2468 if (!p)
2469 return 0;
2470 return p->flags;
2473 /* Modify the flags of a page and invalidate the code if necessary.
2474 The flag PAGE_WRITE_ORG is positioned automatically depending
2475 on PAGE_WRITE. The mmap_lock should already be held. */
2476 void page_set_flags(target_ulong start, target_ulong end, int flags)
2478 target_ulong addr, len;
2480 /* This function should never be called with addresses outside the
2481 guest address space. If this assert fires, it probably indicates
2482 a missing call to h2g_valid. */
2483 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2484 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2485 #endif
2486 assert(start < end);
2488 start = start & TARGET_PAGE_MASK;
2489 end = TARGET_PAGE_ALIGN(end);
2491 if (flags & PAGE_WRITE) {
2492 flags |= PAGE_WRITE_ORG;
2495 for (addr = start, len = end - start;
2496 len != 0;
2497 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2498 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2500 /* If the write protection bit is set, then we invalidate
2501 the code inside. */
2502 if (!(p->flags & PAGE_WRITE) &&
2503 (flags & PAGE_WRITE) &&
2504 p->first_tb) {
2505 tb_invalidate_phys_page(addr, 0, NULL);
2507 p->flags = flags;
2511 int page_check_range(target_ulong start, target_ulong len, int flags)
2513 PageDesc *p;
2514 target_ulong end;
2515 target_ulong addr;
2517 /* This function should never be called with addresses outside the
2518 guest address space. If this assert fires, it probably indicates
2519 a missing call to h2g_valid. */
2520 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2521 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2522 #endif
2524 if (len == 0) {
2525 return 0;
2527 if (start + len - 1 < start) {
2528 /* We've wrapped around. */
2529 return -1;
2532 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2533 start = start & TARGET_PAGE_MASK;
2535 for (addr = start, len = end - start;
2536 len != 0;
2537 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2538 p = page_find(addr >> TARGET_PAGE_BITS);
2539 if( !p )
2540 return -1;
2541 if( !(p->flags & PAGE_VALID) )
2542 return -1;
2544 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2545 return -1;
2546 if (flags & PAGE_WRITE) {
2547 if (!(p->flags & PAGE_WRITE_ORG))
2548 return -1;
2549 /* unprotect the page if it was put read-only because it
2550 contains translated code */
2551 if (!(p->flags & PAGE_WRITE)) {
2552 if (!page_unprotect(addr, 0, NULL))
2553 return -1;
2555 return 0;
2558 return 0;
2561 /* called from signal handler: invalidate the code and unprotect the
2562 page. Return TRUE if the fault was successfully handled. */
2563 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2565 unsigned int prot;
2566 PageDesc *p;
2567 target_ulong host_start, host_end, addr;
2569 /* Technically this isn't safe inside a signal handler. However we
2570 know this only ever happens in a synchronous SEGV handler, so in
2571 practice it seems to be ok. */
2572 mmap_lock();
2574 p = page_find(address >> TARGET_PAGE_BITS);
2575 if (!p) {
2576 mmap_unlock();
2577 return 0;
2580 /* if the page was really writable, then we change its
2581 protection back to writable */
2582 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2583 host_start = address & qemu_host_page_mask;
2584 host_end = host_start + qemu_host_page_size;
2586 prot = 0;
2587 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2588 p = page_find(addr >> TARGET_PAGE_BITS);
2589 p->flags |= PAGE_WRITE;
2590 prot |= p->flags;
2592 /* and since the content will be modified, we must invalidate
2593 the corresponding translated code. */
2594 tb_invalidate_phys_page(addr, pc, puc);
2595 #ifdef DEBUG_TB_CHECK
2596 tb_invalidate_check(addr);
2597 #endif
2599 mprotect((void *)g2h(host_start), qemu_host_page_size,
2600 prot & PAGE_BITS);
2602 mmap_unlock();
2603 return 1;
2605 mmap_unlock();
2606 return 0;
2609 static inline void tlb_set_dirty(CPUState *env,
2610 unsigned long addr, target_ulong vaddr)
2613 #endif /* defined(CONFIG_USER_ONLY) */
2615 #if !defined(CONFIG_USER_ONLY)
2617 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2618 typedef struct subpage_t {
2619 target_phys_addr_t base;
2620 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2621 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2622 } subpage_t;
2624 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2625 ram_addr_t memory, ram_addr_t region_offset);
2626 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2627 ram_addr_t orig_memory,
2628 ram_addr_t region_offset);
2629 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2630 need_subpage) \
2631 do { \
2632 if (addr > start_addr) \
2633 start_addr2 = 0; \
2634 else { \
2635 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2636 if (start_addr2 > 0) \
2637 need_subpage = 1; \
2640 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2641 end_addr2 = TARGET_PAGE_SIZE - 1; \
2642 else { \
2643 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2644 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2645 need_subpage = 1; \
2647 } while (0)
2649 /* register physical memory.
2650 For RAM, 'size' must be a multiple of the target page size.
2651 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2652 io memory page. The address used when calling the IO function is
2653 the offset from the start of the region, plus region_offset. Both
2654 start_addr and region_offset are rounded down to a page boundary
2655 before calculating this offset. This should not be a problem unless
2656 the low bits of start_addr and region_offset differ. */
2657 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2658 ram_addr_t size,
2659 ram_addr_t phys_offset,
2660 ram_addr_t region_offset,
2661 bool log_dirty)
2663 target_phys_addr_t addr, end_addr;
2664 PhysPageDesc *p;
2665 CPUState *env;
2666 ram_addr_t orig_size = size;
2667 subpage_t *subpage;
2669 assert(size);
2670 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2672 if (phys_offset == IO_MEM_UNASSIGNED) {
2673 region_offset = start_addr;
2675 region_offset &= TARGET_PAGE_MASK;
2676 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2677 end_addr = start_addr + (target_phys_addr_t)size;
2679 addr = start_addr;
2680 do {
2681 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2682 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2683 ram_addr_t orig_memory = p->phys_offset;
2684 target_phys_addr_t start_addr2, end_addr2;
2685 int need_subpage = 0;
2687 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2688 need_subpage);
2689 if (need_subpage) {
2690 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2691 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2692 &p->phys_offset, orig_memory,
2693 p->region_offset);
2694 } else {
2695 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2696 >> IO_MEM_SHIFT];
2698 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2699 region_offset);
2700 p->region_offset = 0;
2701 } else {
2702 p->phys_offset = phys_offset;
2703 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2704 (phys_offset & IO_MEM_ROMD))
2705 phys_offset += TARGET_PAGE_SIZE;
2707 } else {
2708 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2709 p->phys_offset = phys_offset;
2710 p->region_offset = region_offset;
2711 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2712 (phys_offset & IO_MEM_ROMD)) {
2713 phys_offset += TARGET_PAGE_SIZE;
2714 } else {
2715 target_phys_addr_t start_addr2, end_addr2;
2716 int need_subpage = 0;
2718 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2719 end_addr2, need_subpage);
2721 if (need_subpage) {
2722 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2723 &p->phys_offset, IO_MEM_UNASSIGNED,
2724 addr & TARGET_PAGE_MASK);
2725 subpage_register(subpage, start_addr2, end_addr2,
2726 phys_offset, region_offset);
2727 p->region_offset = 0;
2731 region_offset += TARGET_PAGE_SIZE;
2732 addr += TARGET_PAGE_SIZE;
2733 } while (addr != end_addr);
2735 /* since each CPU stores ram addresses in its TLB cache, we must
2736 reset the modified entries */
2737 /* XXX: slow ! */
2738 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2739 tlb_flush(env, 1);
2743 /* XXX: temporary until new memory mapping API */
2744 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2746 PhysPageDesc *p;
2748 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2749 if (!p)
2750 return IO_MEM_UNASSIGNED;
2751 return p->phys_offset;
2754 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2756 if (kvm_enabled())
2757 kvm_coalesce_mmio_region(addr, size);
2760 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2762 if (kvm_enabled())
2763 kvm_uncoalesce_mmio_region(addr, size);
2766 void qemu_flush_coalesced_mmio_buffer(void)
2768 if (kvm_enabled())
2769 kvm_flush_coalesced_mmio_buffer();
2772 #if defined(__linux__) && !defined(TARGET_S390X)
2774 #include <sys/vfs.h>
2776 #define HUGETLBFS_MAGIC 0x958458f6
2778 static long gethugepagesize(const char *path)
2780 struct statfs fs;
2781 int ret;
2783 do {
2784 ret = statfs(path, &fs);
2785 } while (ret != 0 && errno == EINTR);
2787 if (ret != 0) {
2788 perror(path);
2789 return 0;
2792 if (fs.f_type != HUGETLBFS_MAGIC)
2793 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2795 return fs.f_bsize;
2798 static void *file_ram_alloc(RAMBlock *block,
2799 ram_addr_t memory,
2800 const char *path)
2802 char *filename;
2803 void *area;
2804 int fd;
2805 #ifdef MAP_POPULATE
2806 int flags;
2807 #endif
2808 unsigned long hpagesize;
2810 hpagesize = gethugepagesize(path);
2811 if (!hpagesize) {
2812 return NULL;
2815 if (memory < hpagesize) {
2816 return NULL;
2819 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2820 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2821 return NULL;
2824 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2825 return NULL;
2828 fd = mkstemp(filename);
2829 if (fd < 0) {
2830 perror("unable to create backing store for hugepages");
2831 free(filename);
2832 return NULL;
2834 unlink(filename);
2835 free(filename);
2837 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2840 * ftruncate is not supported by hugetlbfs in older
2841 * hosts, so don't bother bailing out on errors.
2842 * If anything goes wrong with it under other filesystems,
2843 * mmap will fail.
2845 if (ftruncate(fd, memory))
2846 perror("ftruncate");
2848 #ifdef MAP_POPULATE
2849 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2850 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2851 * to sidestep this quirk.
2853 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2854 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2855 #else
2856 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2857 #endif
2858 if (area == MAP_FAILED) {
2859 perror("file_ram_alloc: can't mmap RAM pages");
2860 close(fd);
2861 return (NULL);
2863 block->fd = fd;
2864 return area;
2866 #endif
2868 static ram_addr_t find_ram_offset(ram_addr_t size)
2870 RAMBlock *block, *next_block;
2871 ram_addr_t offset = 0, mingap = ULONG_MAX;
2873 if (QLIST_EMPTY(&ram_list.blocks))
2874 return 0;
2876 QLIST_FOREACH(block, &ram_list.blocks, next) {
2877 ram_addr_t end, next = ULONG_MAX;
2879 end = block->offset + block->length;
2881 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2882 if (next_block->offset >= end) {
2883 next = MIN(next, next_block->offset);
2886 if (next - end >= size && next - end < mingap) {
2887 offset = end;
2888 mingap = next - end;
2891 return offset;
2894 static ram_addr_t last_ram_offset(void)
2896 RAMBlock *block;
2897 ram_addr_t last = 0;
2899 QLIST_FOREACH(block, &ram_list.blocks, next)
2900 last = MAX(last, block->offset + block->length);
2902 return last;
2905 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2906 ram_addr_t size, void *host)
2908 RAMBlock *new_block, *block;
2910 size = TARGET_PAGE_ALIGN(size);
2911 new_block = qemu_mallocz(sizeof(*new_block));
2913 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2914 char *id = dev->parent_bus->info->get_dev_path(dev);
2915 if (id) {
2916 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2917 qemu_free(id);
2920 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2922 QLIST_FOREACH(block, &ram_list.blocks, next) {
2923 if (!strcmp(block->idstr, new_block->idstr)) {
2924 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2925 new_block->idstr);
2926 abort();
2930 new_block->offset = find_ram_offset(size);
2931 if (host) {
2932 new_block->host = host;
2933 new_block->flags |= RAM_PREALLOC_MASK;
2934 } else {
2935 if (mem_path) {
2936 #if defined (__linux__) && !defined(TARGET_S390X)
2937 new_block->host = file_ram_alloc(new_block, size, mem_path);
2938 if (!new_block->host) {
2939 new_block->host = qemu_vmalloc(size);
2940 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2942 #else
2943 fprintf(stderr, "-mem-path option unsupported\n");
2944 exit(1);
2945 #endif
2946 } else {
2947 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2948 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2949 an system defined value, which is at least 256GB. Larger systems
2950 have larger values. We put the guest between the end of data
2951 segment (system break) and this value. We use 32GB as a base to
2952 have enough room for the system break to grow. */
2953 new_block->host = mmap((void*)0x800000000, size,
2954 PROT_EXEC|PROT_READ|PROT_WRITE,
2955 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2956 if (new_block->host == MAP_FAILED) {
2957 fprintf(stderr, "Allocating RAM failed\n");
2958 abort();
2960 #else
2961 if (xen_mapcache_enabled()) {
2962 xen_ram_alloc(new_block->offset, size);
2963 } else {
2964 new_block->host = qemu_vmalloc(size);
2966 #endif
2967 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2970 new_block->length = size;
2972 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2974 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2975 last_ram_offset() >> TARGET_PAGE_BITS);
2976 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2977 0xff, size >> TARGET_PAGE_BITS);
2979 if (kvm_enabled())
2980 kvm_setup_guest_memory(new_block->host, size);
2982 return new_block->offset;
2985 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2987 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2990 void qemu_ram_free_from_ptr(ram_addr_t addr)
2992 RAMBlock *block;
2994 QLIST_FOREACH(block, &ram_list.blocks, next) {
2995 if (addr == block->offset) {
2996 QLIST_REMOVE(block, next);
2997 qemu_free(block);
2998 return;
3003 void qemu_ram_free(ram_addr_t addr)
3005 RAMBlock *block;
3007 QLIST_FOREACH(block, &ram_list.blocks, next) {
3008 if (addr == block->offset) {
3009 QLIST_REMOVE(block, next);
3010 if (block->flags & RAM_PREALLOC_MASK) {
3012 } else if (mem_path) {
3013 #if defined (__linux__) && !defined(TARGET_S390X)
3014 if (block->fd) {
3015 munmap(block->host, block->length);
3016 close(block->fd);
3017 } else {
3018 qemu_vfree(block->host);
3020 #else
3021 abort();
3022 #endif
3023 } else {
3024 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3025 munmap(block->host, block->length);
3026 #else
3027 if (xen_mapcache_enabled()) {
3028 qemu_invalidate_entry(block->host);
3029 } else {
3030 qemu_vfree(block->host);
3032 #endif
3034 qemu_free(block);
3035 return;
3041 #ifndef _WIN32
3042 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3044 RAMBlock *block;
3045 ram_addr_t offset;
3046 int flags;
3047 void *area, *vaddr;
3049 QLIST_FOREACH(block, &ram_list.blocks, next) {
3050 offset = addr - block->offset;
3051 if (offset < block->length) {
3052 vaddr = block->host + offset;
3053 if (block->flags & RAM_PREALLOC_MASK) {
3055 } else {
3056 flags = MAP_FIXED;
3057 munmap(vaddr, length);
3058 if (mem_path) {
3059 #if defined(__linux__) && !defined(TARGET_S390X)
3060 if (block->fd) {
3061 #ifdef MAP_POPULATE
3062 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3063 MAP_PRIVATE;
3064 #else
3065 flags |= MAP_PRIVATE;
3066 #endif
3067 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3068 flags, block->fd, offset);
3069 } else {
3070 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3071 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3072 flags, -1, 0);
3074 #else
3075 abort();
3076 #endif
3077 } else {
3078 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3079 flags |= MAP_SHARED | MAP_ANONYMOUS;
3080 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3081 flags, -1, 0);
3082 #else
3083 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3084 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3085 flags, -1, 0);
3086 #endif
3088 if (area != vaddr) {
3089 fprintf(stderr, "Could not remap addr: %lx@%lx\n",
3090 length, addr);
3091 exit(1);
3093 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3095 return;
3099 #endif /* !_WIN32 */
3101 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3102 With the exception of the softmmu code in this file, this should
3103 only be used for local memory (e.g. video ram) that the device owns,
3104 and knows it isn't going to access beyond the end of the block.
3106 It should not be used for general purpose DMA.
3107 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3109 void *qemu_get_ram_ptr(ram_addr_t addr)
3111 RAMBlock *block;
3113 QLIST_FOREACH(block, &ram_list.blocks, next) {
3114 if (addr - block->offset < block->length) {
3115 /* Move this entry to to start of the list. */
3116 if (block != QLIST_FIRST(&ram_list.blocks)) {
3117 QLIST_REMOVE(block, next);
3118 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3120 if (xen_mapcache_enabled()) {
3121 /* We need to check if the requested address is in the RAM
3122 * because we don't want to map the entire memory in QEMU.
3123 * In that case just map until the end of the page.
3125 if (block->offset == 0) {
3126 return qemu_map_cache(addr, 0, 0);
3127 } else if (block->host == NULL) {
3128 block->host = qemu_map_cache(block->offset, block->length, 1);
3131 return block->host + (addr - block->offset);
3135 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3136 abort();
3138 return NULL;
3141 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3142 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3144 void *qemu_safe_ram_ptr(ram_addr_t addr)
3146 RAMBlock *block;
3148 QLIST_FOREACH(block, &ram_list.blocks, next) {
3149 if (addr - block->offset < block->length) {
3150 if (xen_mapcache_enabled()) {
3151 /* We need to check if the requested address is in the RAM
3152 * because we don't want to map the entire memory in QEMU.
3153 * In that case just map until the end of the page.
3155 if (block->offset == 0) {
3156 return qemu_map_cache(addr, 0, 0);
3157 } else if (block->host == NULL) {
3158 block->host = qemu_map_cache(block->offset, block->length, 1);
3161 return block->host + (addr - block->offset);
3165 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3166 abort();
3168 return NULL;
3171 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3172 * but takes a size argument */
3173 void *qemu_ram_ptr_length(target_phys_addr_t addr, target_phys_addr_t *size)
3175 if (xen_mapcache_enabled())
3176 return qemu_map_cache(addr, *size, 1);
3177 else {
3178 RAMBlock *block;
3180 QLIST_FOREACH(block, &ram_list.blocks, next) {
3181 if (addr - block->offset < block->length) {
3182 if (addr - block->offset + *size > block->length)
3183 *size = block->length - addr + block->offset;
3184 return block->host + (addr - block->offset);
3188 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3189 abort();
3191 *size = 0;
3192 return NULL;
3196 void qemu_put_ram_ptr(void *addr)
3198 trace_qemu_put_ram_ptr(addr);
3201 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3203 RAMBlock *block;
3204 uint8_t *host = ptr;
3206 if (xen_mapcache_enabled()) {
3207 *ram_addr = qemu_ram_addr_from_mapcache(ptr);
3208 return 0;
3211 QLIST_FOREACH(block, &ram_list.blocks, next) {
3212 /* This case append when the block is not mapped. */
3213 if (block->host == NULL) {
3214 continue;
3216 if (host - block->host < block->length) {
3217 *ram_addr = block->offset + (host - block->host);
3218 return 0;
3222 return -1;
3225 /* Some of the softmmu routines need to translate from a host pointer
3226 (typically a TLB entry) back to a ram offset. */
3227 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3229 ram_addr_t ram_addr;
3231 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3232 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3233 abort();
3235 return ram_addr;
3238 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3240 #ifdef DEBUG_UNASSIGNED
3241 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3242 #endif
3243 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3244 do_unassigned_access(addr, 0, 0, 0, 1);
3245 #endif
3246 return 0;
3249 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3251 #ifdef DEBUG_UNASSIGNED
3252 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3253 #endif
3254 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3255 do_unassigned_access(addr, 0, 0, 0, 2);
3256 #endif
3257 return 0;
3260 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3262 #ifdef DEBUG_UNASSIGNED
3263 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3264 #endif
3265 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3266 do_unassigned_access(addr, 0, 0, 0, 4);
3267 #endif
3268 return 0;
3271 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3273 #ifdef DEBUG_UNASSIGNED
3274 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3275 #endif
3276 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3277 do_unassigned_access(addr, 1, 0, 0, 1);
3278 #endif
3281 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3283 #ifdef DEBUG_UNASSIGNED
3284 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3285 #endif
3286 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3287 do_unassigned_access(addr, 1, 0, 0, 2);
3288 #endif
3291 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3293 #ifdef DEBUG_UNASSIGNED
3294 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3295 #endif
3296 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3297 do_unassigned_access(addr, 1, 0, 0, 4);
3298 #endif
3301 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3302 unassigned_mem_readb,
3303 unassigned_mem_readw,
3304 unassigned_mem_readl,
3307 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3308 unassigned_mem_writeb,
3309 unassigned_mem_writew,
3310 unassigned_mem_writel,
3313 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3314 uint32_t val)
3316 int dirty_flags;
3317 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3318 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3319 #if !defined(CONFIG_USER_ONLY)
3320 tb_invalidate_phys_page_fast(ram_addr, 1);
3321 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3322 #endif
3324 stb_p(qemu_get_ram_ptr(ram_addr), val);
3325 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3326 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3327 /* we remove the notdirty callback only if the code has been
3328 flushed */
3329 if (dirty_flags == 0xff)
3330 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3333 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3334 uint32_t val)
3336 int dirty_flags;
3337 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3338 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3339 #if !defined(CONFIG_USER_ONLY)
3340 tb_invalidate_phys_page_fast(ram_addr, 2);
3341 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3342 #endif
3344 stw_p(qemu_get_ram_ptr(ram_addr), val);
3345 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3346 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3347 /* we remove the notdirty callback only if the code has been
3348 flushed */
3349 if (dirty_flags == 0xff)
3350 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3353 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3354 uint32_t val)
3356 int dirty_flags;
3357 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3358 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3359 #if !defined(CONFIG_USER_ONLY)
3360 tb_invalidate_phys_page_fast(ram_addr, 4);
3361 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3362 #endif
3364 stl_p(qemu_get_ram_ptr(ram_addr), val);
3365 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3366 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3367 /* we remove the notdirty callback only if the code has been
3368 flushed */
3369 if (dirty_flags == 0xff)
3370 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3373 static CPUReadMemoryFunc * const error_mem_read[3] = {
3374 NULL, /* never used */
3375 NULL, /* never used */
3376 NULL, /* never used */
3379 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3380 notdirty_mem_writeb,
3381 notdirty_mem_writew,
3382 notdirty_mem_writel,
3385 /* Generate a debug exception if a watchpoint has been hit. */
3386 static void check_watchpoint(int offset, int len_mask, int flags)
3388 CPUState *env = cpu_single_env;
3389 target_ulong pc, cs_base;
3390 TranslationBlock *tb;
3391 target_ulong vaddr;
3392 CPUWatchpoint *wp;
3393 int cpu_flags;
3395 if (env->watchpoint_hit) {
3396 /* We re-entered the check after replacing the TB. Now raise
3397 * the debug interrupt so that is will trigger after the
3398 * current instruction. */
3399 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3400 return;
3402 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3403 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3404 if ((vaddr == (wp->vaddr & len_mask) ||
3405 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3406 wp->flags |= BP_WATCHPOINT_HIT;
3407 if (!env->watchpoint_hit) {
3408 env->watchpoint_hit = wp;
3409 tb = tb_find_pc(env->mem_io_pc);
3410 if (!tb) {
3411 cpu_abort(env, "check_watchpoint: could not find TB for "
3412 "pc=%p", (void *)env->mem_io_pc);
3414 cpu_restore_state(tb, env, env->mem_io_pc);
3415 tb_phys_invalidate(tb, -1);
3416 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3417 env->exception_index = EXCP_DEBUG;
3418 } else {
3419 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3420 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3422 cpu_resume_from_signal(env, NULL);
3424 } else {
3425 wp->flags &= ~BP_WATCHPOINT_HIT;
3430 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3431 so these check for a hit then pass through to the normal out-of-line
3432 phys routines. */
3433 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3435 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3436 return ldub_phys(addr);
3439 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3441 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3442 return lduw_phys(addr);
3445 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3447 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3448 return ldl_phys(addr);
3451 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3452 uint32_t val)
3454 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3455 stb_phys(addr, val);
3458 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3459 uint32_t val)
3461 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3462 stw_phys(addr, val);
3465 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3466 uint32_t val)
3468 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3469 stl_phys(addr, val);
3472 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3473 watch_mem_readb,
3474 watch_mem_readw,
3475 watch_mem_readl,
3478 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3479 watch_mem_writeb,
3480 watch_mem_writew,
3481 watch_mem_writel,
3484 static inline uint32_t subpage_readlen (subpage_t *mmio,
3485 target_phys_addr_t addr,
3486 unsigned int len)
3488 unsigned int idx = SUBPAGE_IDX(addr);
3489 #if defined(DEBUG_SUBPAGE)
3490 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3491 mmio, len, addr, idx);
3492 #endif
3494 addr += mmio->region_offset[idx];
3495 idx = mmio->sub_io_index[idx];
3496 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3499 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3500 uint32_t value, unsigned int len)
3502 unsigned int idx = SUBPAGE_IDX(addr);
3503 #if defined(DEBUG_SUBPAGE)
3504 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3505 __func__, mmio, len, addr, idx, value);
3506 #endif
3508 addr += mmio->region_offset[idx];
3509 idx = mmio->sub_io_index[idx];
3510 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3513 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3515 return subpage_readlen(opaque, addr, 0);
3518 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3519 uint32_t value)
3521 subpage_writelen(opaque, addr, value, 0);
3524 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3526 return subpage_readlen(opaque, addr, 1);
3529 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3530 uint32_t value)
3532 subpage_writelen(opaque, addr, value, 1);
3535 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3537 return subpage_readlen(opaque, addr, 2);
3540 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3541 uint32_t value)
3543 subpage_writelen(opaque, addr, value, 2);
3546 static CPUReadMemoryFunc * const subpage_read[] = {
3547 &subpage_readb,
3548 &subpage_readw,
3549 &subpage_readl,
3552 static CPUWriteMemoryFunc * const subpage_write[] = {
3553 &subpage_writeb,
3554 &subpage_writew,
3555 &subpage_writel,
3558 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3559 ram_addr_t memory, ram_addr_t region_offset)
3561 int idx, eidx;
3563 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3564 return -1;
3565 idx = SUBPAGE_IDX(start);
3566 eidx = SUBPAGE_IDX(end);
3567 #if defined(DEBUG_SUBPAGE)
3568 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3569 mmio, start, end, idx, eidx, memory);
3570 #endif
3571 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3572 memory = IO_MEM_UNASSIGNED;
3573 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3574 for (; idx <= eidx; idx++) {
3575 mmio->sub_io_index[idx] = memory;
3576 mmio->region_offset[idx] = region_offset;
3579 return 0;
3582 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3583 ram_addr_t orig_memory,
3584 ram_addr_t region_offset)
3586 subpage_t *mmio;
3587 int subpage_memory;
3589 mmio = qemu_mallocz(sizeof(subpage_t));
3591 mmio->base = base;
3592 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3593 DEVICE_NATIVE_ENDIAN);
3594 #if defined(DEBUG_SUBPAGE)
3595 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3596 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3597 #endif
3598 *phys = subpage_memory | IO_MEM_SUBPAGE;
3599 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3601 return mmio;
3604 static int get_free_io_mem_idx(void)
3606 int i;
3608 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3609 if (!io_mem_used[i]) {
3610 io_mem_used[i] = 1;
3611 return i;
3613 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3614 return -1;
3618 * Usually, devices operate in little endian mode. There are devices out
3619 * there that operate in big endian too. Each device gets byte swapped
3620 * mmio if plugged onto a CPU that does the other endianness.
3622 * CPU Device swap?
3624 * little little no
3625 * little big yes
3626 * big little yes
3627 * big big no
3630 typedef struct SwapEndianContainer {
3631 CPUReadMemoryFunc *read[3];
3632 CPUWriteMemoryFunc *write[3];
3633 void *opaque;
3634 } SwapEndianContainer;
3636 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3638 uint32_t val;
3639 SwapEndianContainer *c = opaque;
3640 val = c->read[0](c->opaque, addr);
3641 return val;
3644 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3646 uint32_t val;
3647 SwapEndianContainer *c = opaque;
3648 val = bswap16(c->read[1](c->opaque, addr));
3649 return val;
3652 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3654 uint32_t val;
3655 SwapEndianContainer *c = opaque;
3656 val = bswap32(c->read[2](c->opaque, addr));
3657 return val;
3660 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3661 swapendian_mem_readb,
3662 swapendian_mem_readw,
3663 swapendian_mem_readl
3666 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3667 uint32_t val)
3669 SwapEndianContainer *c = opaque;
3670 c->write[0](c->opaque, addr, val);
3673 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3674 uint32_t val)
3676 SwapEndianContainer *c = opaque;
3677 c->write[1](c->opaque, addr, bswap16(val));
3680 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3681 uint32_t val)
3683 SwapEndianContainer *c = opaque;
3684 c->write[2](c->opaque, addr, bswap32(val));
3687 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3688 swapendian_mem_writeb,
3689 swapendian_mem_writew,
3690 swapendian_mem_writel
3693 static void swapendian_init(int io_index)
3695 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3696 int i;
3698 /* Swap mmio for big endian targets */
3699 c->opaque = io_mem_opaque[io_index];
3700 for (i = 0; i < 3; i++) {
3701 c->read[i] = io_mem_read[io_index][i];
3702 c->write[i] = io_mem_write[io_index][i];
3704 io_mem_read[io_index][i] = swapendian_readfn[i];
3705 io_mem_write[io_index][i] = swapendian_writefn[i];
3707 io_mem_opaque[io_index] = c;
3710 static void swapendian_del(int io_index)
3712 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3713 qemu_free(io_mem_opaque[io_index]);
3717 /* mem_read and mem_write are arrays of functions containing the
3718 function to access byte (index 0), word (index 1) and dword (index
3719 2). Functions can be omitted with a NULL function pointer.
3720 If io_index is non zero, the corresponding io zone is
3721 modified. If it is zero, a new io zone is allocated. The return
3722 value can be used with cpu_register_physical_memory(). (-1) is
3723 returned if error. */
3724 static int cpu_register_io_memory_fixed(int io_index,
3725 CPUReadMemoryFunc * const *mem_read,
3726 CPUWriteMemoryFunc * const *mem_write,
3727 void *opaque, enum device_endian endian)
3729 int i;
3731 if (io_index <= 0) {
3732 io_index = get_free_io_mem_idx();
3733 if (io_index == -1)
3734 return io_index;
3735 } else {
3736 io_index >>= IO_MEM_SHIFT;
3737 if (io_index >= IO_MEM_NB_ENTRIES)
3738 return -1;
3741 for (i = 0; i < 3; ++i) {
3742 io_mem_read[io_index][i]
3743 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3745 for (i = 0; i < 3; ++i) {
3746 io_mem_write[io_index][i]
3747 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3749 io_mem_opaque[io_index] = opaque;
3751 switch (endian) {
3752 case DEVICE_BIG_ENDIAN:
3753 #ifndef TARGET_WORDS_BIGENDIAN
3754 swapendian_init(io_index);
3755 #endif
3756 break;
3757 case DEVICE_LITTLE_ENDIAN:
3758 #ifdef TARGET_WORDS_BIGENDIAN
3759 swapendian_init(io_index);
3760 #endif
3761 break;
3762 case DEVICE_NATIVE_ENDIAN:
3763 default:
3764 break;
3767 return (io_index << IO_MEM_SHIFT);
3770 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3771 CPUWriteMemoryFunc * const *mem_write,
3772 void *opaque, enum device_endian endian)
3774 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3777 void cpu_unregister_io_memory(int io_table_address)
3779 int i;
3780 int io_index = io_table_address >> IO_MEM_SHIFT;
3782 swapendian_del(io_index);
3784 for (i=0;i < 3; i++) {
3785 io_mem_read[io_index][i] = unassigned_mem_read[i];
3786 io_mem_write[io_index][i] = unassigned_mem_write[i];
3788 io_mem_opaque[io_index] = NULL;
3789 io_mem_used[io_index] = 0;
3792 static void io_mem_init(void)
3794 int i;
3796 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3797 unassigned_mem_write, NULL,
3798 DEVICE_NATIVE_ENDIAN);
3799 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3800 unassigned_mem_write, NULL,
3801 DEVICE_NATIVE_ENDIAN);
3802 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3803 notdirty_mem_write, NULL,
3804 DEVICE_NATIVE_ENDIAN);
3805 for (i=0; i<5; i++)
3806 io_mem_used[i] = 1;
3808 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3809 watch_mem_write, NULL,
3810 DEVICE_NATIVE_ENDIAN);
3813 #endif /* !defined(CONFIG_USER_ONLY) */
3815 /* physical memory access (slow version, mainly for debug) */
3816 #if defined(CONFIG_USER_ONLY)
3817 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3818 uint8_t *buf, int len, int is_write)
3820 int l, flags;
3821 target_ulong page;
3822 void * p;
3824 while (len > 0) {
3825 page = addr & TARGET_PAGE_MASK;
3826 l = (page + TARGET_PAGE_SIZE) - addr;
3827 if (l > len)
3828 l = len;
3829 flags = page_get_flags(page);
3830 if (!(flags & PAGE_VALID))
3831 return -1;
3832 if (is_write) {
3833 if (!(flags & PAGE_WRITE))
3834 return -1;
3835 /* XXX: this code should not depend on lock_user */
3836 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3837 return -1;
3838 memcpy(p, buf, l);
3839 unlock_user(p, addr, l);
3840 } else {
3841 if (!(flags & PAGE_READ))
3842 return -1;
3843 /* XXX: this code should not depend on lock_user */
3844 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3845 return -1;
3846 memcpy(buf, p, l);
3847 unlock_user(p, addr, 0);
3849 len -= l;
3850 buf += l;
3851 addr += l;
3853 return 0;
3856 #else
3857 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3858 int len, int is_write)
3860 int l, io_index;
3861 uint8_t *ptr;
3862 uint32_t val;
3863 target_phys_addr_t page;
3864 unsigned long pd;
3865 PhysPageDesc *p;
3867 while (len > 0) {
3868 page = addr & TARGET_PAGE_MASK;
3869 l = (page + TARGET_PAGE_SIZE) - addr;
3870 if (l > len)
3871 l = len;
3872 p = phys_page_find(page >> TARGET_PAGE_BITS);
3873 if (!p) {
3874 pd = IO_MEM_UNASSIGNED;
3875 } else {
3876 pd = p->phys_offset;
3879 if (is_write) {
3880 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3881 target_phys_addr_t addr1 = addr;
3882 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3883 if (p)
3884 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3885 /* XXX: could force cpu_single_env to NULL to avoid
3886 potential bugs */
3887 if (l >= 4 && ((addr1 & 3) == 0)) {
3888 /* 32 bit write access */
3889 val = ldl_p(buf);
3890 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3891 l = 4;
3892 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3893 /* 16 bit write access */
3894 val = lduw_p(buf);
3895 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3896 l = 2;
3897 } else {
3898 /* 8 bit write access */
3899 val = ldub_p(buf);
3900 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3901 l = 1;
3903 } else {
3904 unsigned long addr1;
3905 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3906 /* RAM case */
3907 ptr = qemu_get_ram_ptr(addr1);
3908 memcpy(ptr, buf, l);
3909 if (!cpu_physical_memory_is_dirty(addr1)) {
3910 /* invalidate code */
3911 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3912 /* set dirty bit */
3913 cpu_physical_memory_set_dirty_flags(
3914 addr1, (0xff & ~CODE_DIRTY_FLAG));
3916 /* qemu doesn't execute guest code directly, but kvm does
3917 therefore flush instruction caches */
3918 if (kvm_enabled())
3919 flush_icache_range((unsigned long)ptr,
3920 ((unsigned long)ptr)+l);
3921 qemu_put_ram_ptr(ptr);
3923 } else {
3924 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3925 !(pd & IO_MEM_ROMD)) {
3926 target_phys_addr_t addr1 = addr;
3927 /* I/O case */
3928 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3929 if (p)
3930 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3931 if (l >= 4 && ((addr1 & 3) == 0)) {
3932 /* 32 bit read access */
3933 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3934 stl_p(buf, val);
3935 l = 4;
3936 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3937 /* 16 bit read access */
3938 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3939 stw_p(buf, val);
3940 l = 2;
3941 } else {
3942 /* 8 bit read access */
3943 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3944 stb_p(buf, val);
3945 l = 1;
3947 } else {
3948 /* RAM case */
3949 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3950 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3951 qemu_put_ram_ptr(ptr);
3954 len -= l;
3955 buf += l;
3956 addr += l;
3960 /* used for ROM loading : can write in RAM and ROM */
3961 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3962 const uint8_t *buf, int len)
3964 int l;
3965 uint8_t *ptr;
3966 target_phys_addr_t page;
3967 unsigned long pd;
3968 PhysPageDesc *p;
3970 while (len > 0) {
3971 page = addr & TARGET_PAGE_MASK;
3972 l = (page + TARGET_PAGE_SIZE) - addr;
3973 if (l > len)
3974 l = len;
3975 p = phys_page_find(page >> TARGET_PAGE_BITS);
3976 if (!p) {
3977 pd = IO_MEM_UNASSIGNED;
3978 } else {
3979 pd = p->phys_offset;
3982 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3983 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3984 !(pd & IO_MEM_ROMD)) {
3985 /* do nothing */
3986 } else {
3987 unsigned long addr1;
3988 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3989 /* ROM/RAM case */
3990 ptr = qemu_get_ram_ptr(addr1);
3991 memcpy(ptr, buf, l);
3992 qemu_put_ram_ptr(ptr);
3994 len -= l;
3995 buf += l;
3996 addr += l;
4000 typedef struct {
4001 void *buffer;
4002 target_phys_addr_t addr;
4003 target_phys_addr_t len;
4004 } BounceBuffer;
4006 static BounceBuffer bounce;
4008 typedef struct MapClient {
4009 void *opaque;
4010 void (*callback)(void *opaque);
4011 QLIST_ENTRY(MapClient) link;
4012 } MapClient;
4014 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4015 = QLIST_HEAD_INITIALIZER(map_client_list);
4017 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4019 MapClient *client = qemu_malloc(sizeof(*client));
4021 client->opaque = opaque;
4022 client->callback = callback;
4023 QLIST_INSERT_HEAD(&map_client_list, client, link);
4024 return client;
4027 void cpu_unregister_map_client(void *_client)
4029 MapClient *client = (MapClient *)_client;
4031 QLIST_REMOVE(client, link);
4032 qemu_free(client);
4035 static void cpu_notify_map_clients(void)
4037 MapClient *client;
4039 while (!QLIST_EMPTY(&map_client_list)) {
4040 client = QLIST_FIRST(&map_client_list);
4041 client->callback(client->opaque);
4042 cpu_unregister_map_client(client);
4046 /* Map a physical memory region into a host virtual address.
4047 * May map a subset of the requested range, given by and returned in *plen.
4048 * May return NULL if resources needed to perform the mapping are exhausted.
4049 * Use only for reads OR writes - not for read-modify-write operations.
4050 * Use cpu_register_map_client() to know when retrying the map operation is
4051 * likely to succeed.
4053 void *cpu_physical_memory_map(target_phys_addr_t addr,
4054 target_phys_addr_t *plen,
4055 int is_write)
4057 target_phys_addr_t len = *plen;
4058 target_phys_addr_t todo = 0;
4059 int l;
4060 target_phys_addr_t page;
4061 unsigned long pd;
4062 PhysPageDesc *p;
4063 target_phys_addr_t addr1 = addr;
4065 while (len > 0) {
4066 page = addr & TARGET_PAGE_MASK;
4067 l = (page + TARGET_PAGE_SIZE) - addr;
4068 if (l > len)
4069 l = len;
4070 p = phys_page_find(page >> TARGET_PAGE_BITS);
4071 if (!p) {
4072 pd = IO_MEM_UNASSIGNED;
4073 } else {
4074 pd = p->phys_offset;
4077 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4078 if (todo || bounce.buffer) {
4079 break;
4081 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4082 bounce.addr = addr;
4083 bounce.len = l;
4084 if (!is_write) {
4085 cpu_physical_memory_read(addr, bounce.buffer, l);
4088 *plen = l;
4089 return bounce.buffer;
4092 len -= l;
4093 addr += l;
4094 todo += l;
4096 *plen = todo;
4097 return qemu_ram_ptr_length(addr1, plen);
4100 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4101 * Will also mark the memory as dirty if is_write == 1. access_len gives
4102 * the amount of memory that was actually read or written by the caller.
4104 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4105 int is_write, target_phys_addr_t access_len)
4107 unsigned long flush_len = (unsigned long)access_len;
4109 if (buffer != bounce.buffer) {
4110 if (is_write) {
4111 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4112 while (access_len) {
4113 unsigned l;
4114 l = TARGET_PAGE_SIZE;
4115 if (l > access_len)
4116 l = access_len;
4117 if (!cpu_physical_memory_is_dirty(addr1)) {
4118 /* invalidate code */
4119 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4120 /* set dirty bit */
4121 cpu_physical_memory_set_dirty_flags(
4122 addr1, (0xff & ~CODE_DIRTY_FLAG));
4124 addr1 += l;
4125 access_len -= l;
4127 dma_flush_range((unsigned long)buffer,
4128 (unsigned long)buffer + flush_len);
4130 if (xen_mapcache_enabled()) {
4131 qemu_invalidate_entry(buffer);
4133 return;
4135 if (is_write) {
4136 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4138 qemu_vfree(bounce.buffer);
4139 bounce.buffer = NULL;
4140 cpu_notify_map_clients();
4143 /* warning: addr must be aligned */
4144 uint32_t ldl_phys(target_phys_addr_t addr)
4146 int io_index;
4147 uint8_t *ptr;
4148 uint32_t val;
4149 unsigned long pd;
4150 PhysPageDesc *p;
4152 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4153 if (!p) {
4154 pd = IO_MEM_UNASSIGNED;
4155 } else {
4156 pd = p->phys_offset;
4159 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4160 !(pd & IO_MEM_ROMD)) {
4161 /* I/O case */
4162 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4163 if (p)
4164 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4165 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4166 } else {
4167 /* RAM case */
4168 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4169 (addr & ~TARGET_PAGE_MASK);
4170 val = ldl_p(ptr);
4172 return val;
4175 /* warning: addr must be aligned */
4176 uint64_t ldq_phys(target_phys_addr_t addr)
4178 int io_index;
4179 uint8_t *ptr;
4180 uint64_t val;
4181 unsigned long pd;
4182 PhysPageDesc *p;
4184 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4185 if (!p) {
4186 pd = IO_MEM_UNASSIGNED;
4187 } else {
4188 pd = p->phys_offset;
4191 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4192 !(pd & IO_MEM_ROMD)) {
4193 /* I/O case */
4194 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4195 if (p)
4196 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4197 #ifdef TARGET_WORDS_BIGENDIAN
4198 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4199 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4200 #else
4201 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4202 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4203 #endif
4204 } else {
4205 /* RAM case */
4206 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4207 (addr & ~TARGET_PAGE_MASK);
4208 val = ldq_p(ptr);
4210 return val;
4213 /* XXX: optimize */
4214 uint32_t ldub_phys(target_phys_addr_t addr)
4216 uint8_t val;
4217 cpu_physical_memory_read(addr, &val, 1);
4218 return val;
4221 /* warning: addr must be aligned */
4222 uint32_t lduw_phys(target_phys_addr_t addr)
4224 int io_index;
4225 uint8_t *ptr;
4226 uint64_t val;
4227 unsigned long pd;
4228 PhysPageDesc *p;
4230 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4231 if (!p) {
4232 pd = IO_MEM_UNASSIGNED;
4233 } else {
4234 pd = p->phys_offset;
4237 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4238 !(pd & IO_MEM_ROMD)) {
4239 /* I/O case */
4240 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4241 if (p)
4242 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4243 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4244 } else {
4245 /* RAM case */
4246 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4247 (addr & ~TARGET_PAGE_MASK);
4248 val = lduw_p(ptr);
4250 return val;
4253 /* warning: addr must be aligned. The ram page is not masked as dirty
4254 and the code inside is not invalidated. It is useful if the dirty
4255 bits are used to track modified PTEs */
4256 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4258 int io_index;
4259 uint8_t *ptr;
4260 unsigned long pd;
4261 PhysPageDesc *p;
4263 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4264 if (!p) {
4265 pd = IO_MEM_UNASSIGNED;
4266 } else {
4267 pd = p->phys_offset;
4270 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4271 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4272 if (p)
4273 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4274 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4275 } else {
4276 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4277 ptr = qemu_get_ram_ptr(addr1);
4278 stl_p(ptr, val);
4280 if (unlikely(in_migration)) {
4281 if (!cpu_physical_memory_is_dirty(addr1)) {
4282 /* invalidate code */
4283 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4284 /* set dirty bit */
4285 cpu_physical_memory_set_dirty_flags(
4286 addr1, (0xff & ~CODE_DIRTY_FLAG));
4292 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4294 int io_index;
4295 uint8_t *ptr;
4296 unsigned long pd;
4297 PhysPageDesc *p;
4299 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4300 if (!p) {
4301 pd = IO_MEM_UNASSIGNED;
4302 } else {
4303 pd = p->phys_offset;
4306 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4307 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4308 if (p)
4309 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4310 #ifdef TARGET_WORDS_BIGENDIAN
4311 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4312 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4313 #else
4314 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4315 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4316 #endif
4317 } else {
4318 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4319 (addr & ~TARGET_PAGE_MASK);
4320 stq_p(ptr, val);
4324 /* warning: addr must be aligned */
4325 void stl_phys(target_phys_addr_t addr, uint32_t val)
4327 int io_index;
4328 uint8_t *ptr;
4329 unsigned long pd;
4330 PhysPageDesc *p;
4332 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4333 if (!p) {
4334 pd = IO_MEM_UNASSIGNED;
4335 } else {
4336 pd = p->phys_offset;
4339 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4340 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4341 if (p)
4342 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4343 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4344 } else {
4345 unsigned long addr1;
4346 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4347 /* RAM case */
4348 ptr = qemu_get_ram_ptr(addr1);
4349 stl_p(ptr, val);
4350 if (!cpu_physical_memory_is_dirty(addr1)) {
4351 /* invalidate code */
4352 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4353 /* set dirty bit */
4354 cpu_physical_memory_set_dirty_flags(addr1,
4355 (0xff & ~CODE_DIRTY_FLAG));
4360 /* XXX: optimize */
4361 void stb_phys(target_phys_addr_t addr, uint32_t val)
4363 uint8_t v = val;
4364 cpu_physical_memory_write(addr, &v, 1);
4367 /* warning: addr must be aligned */
4368 void stw_phys(target_phys_addr_t addr, uint32_t val)
4370 int io_index;
4371 uint8_t *ptr;
4372 unsigned long pd;
4373 PhysPageDesc *p;
4375 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4376 if (!p) {
4377 pd = IO_MEM_UNASSIGNED;
4378 } else {
4379 pd = p->phys_offset;
4382 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4383 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4384 if (p)
4385 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4386 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4387 } else {
4388 unsigned long addr1;
4389 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4390 /* RAM case */
4391 ptr = qemu_get_ram_ptr(addr1);
4392 stw_p(ptr, val);
4393 if (!cpu_physical_memory_is_dirty(addr1)) {
4394 /* invalidate code */
4395 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4396 /* set dirty bit */
4397 cpu_physical_memory_set_dirty_flags(addr1,
4398 (0xff & ~CODE_DIRTY_FLAG));
4403 /* XXX: optimize */
4404 void stq_phys(target_phys_addr_t addr, uint64_t val)
4406 val = tswap64(val);
4407 cpu_physical_memory_write(addr, &val, 8);
4410 /* virtual memory access for debug (includes writing to ROM) */
4411 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4412 uint8_t *buf, int len, int is_write)
4414 int l;
4415 target_phys_addr_t phys_addr;
4416 target_ulong page;
4418 while (len > 0) {
4419 page = addr & TARGET_PAGE_MASK;
4420 phys_addr = cpu_get_phys_page_debug(env, page);
4421 /* if no physical page mapped, return an error */
4422 if (phys_addr == -1)
4423 return -1;
4424 l = (page + TARGET_PAGE_SIZE) - addr;
4425 if (l > len)
4426 l = len;
4427 phys_addr += (addr & ~TARGET_PAGE_MASK);
4428 if (is_write)
4429 cpu_physical_memory_write_rom(phys_addr, buf, l);
4430 else
4431 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4432 len -= l;
4433 buf += l;
4434 addr += l;
4436 return 0;
4438 #endif
4440 /* in deterministic execution mode, instructions doing device I/Os
4441 must be at the end of the TB */
4442 void cpu_io_recompile(CPUState *env, void *retaddr)
4444 TranslationBlock *tb;
4445 uint32_t n, cflags;
4446 target_ulong pc, cs_base;
4447 uint64_t flags;
4449 tb = tb_find_pc((unsigned long)retaddr);
4450 if (!tb) {
4451 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4452 retaddr);
4454 n = env->icount_decr.u16.low + tb->icount;
4455 cpu_restore_state(tb, env, (unsigned long)retaddr);
4456 /* Calculate how many instructions had been executed before the fault
4457 occurred. */
4458 n = n - env->icount_decr.u16.low;
4459 /* Generate a new TB ending on the I/O insn. */
4460 n++;
4461 /* On MIPS and SH, delay slot instructions can only be restarted if
4462 they were already the first instruction in the TB. If this is not
4463 the first instruction in a TB then re-execute the preceding
4464 branch. */
4465 #if defined(TARGET_MIPS)
4466 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4467 env->active_tc.PC -= 4;
4468 env->icount_decr.u16.low++;
4469 env->hflags &= ~MIPS_HFLAG_BMASK;
4471 #elif defined(TARGET_SH4)
4472 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4473 && n > 1) {
4474 env->pc -= 2;
4475 env->icount_decr.u16.low++;
4476 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4478 #endif
4479 /* This should never happen. */
4480 if (n > CF_COUNT_MASK)
4481 cpu_abort(env, "TB too big during recompile");
4483 cflags = n | CF_LAST_IO;
4484 pc = tb->pc;
4485 cs_base = tb->cs_base;
4486 flags = tb->flags;
4487 tb_phys_invalidate(tb, -1);
4488 /* FIXME: In theory this could raise an exception. In practice
4489 we have already translated the block once so it's probably ok. */
4490 tb_gen_code(env, pc, cs_base, flags, cflags);
4491 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4492 the first in the TB) then we end up generating a whole new TB and
4493 repeating the fault, which is horribly inefficient.
4494 Better would be to execute just this insn uncached, or generate a
4495 second new TB. */
4496 cpu_resume_from_signal(env, NULL);
4499 #if !defined(CONFIG_USER_ONLY)
4501 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4503 int i, target_code_size, max_target_code_size;
4504 int direct_jmp_count, direct_jmp2_count, cross_page;
4505 TranslationBlock *tb;
4507 target_code_size = 0;
4508 max_target_code_size = 0;
4509 cross_page = 0;
4510 direct_jmp_count = 0;
4511 direct_jmp2_count = 0;
4512 for(i = 0; i < nb_tbs; i++) {
4513 tb = &tbs[i];
4514 target_code_size += tb->size;
4515 if (tb->size > max_target_code_size)
4516 max_target_code_size = tb->size;
4517 if (tb->page_addr[1] != -1)
4518 cross_page++;
4519 if (tb->tb_next_offset[0] != 0xffff) {
4520 direct_jmp_count++;
4521 if (tb->tb_next_offset[1] != 0xffff) {
4522 direct_jmp2_count++;
4526 /* XXX: avoid using doubles ? */
4527 cpu_fprintf(f, "Translation buffer state:\n");
4528 cpu_fprintf(f, "gen code size %td/%ld\n",
4529 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4530 cpu_fprintf(f, "TB count %d/%d\n",
4531 nb_tbs, code_gen_max_blocks);
4532 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4533 nb_tbs ? target_code_size / nb_tbs : 0,
4534 max_target_code_size);
4535 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4536 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4537 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4538 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4539 cross_page,
4540 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4541 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4542 direct_jmp_count,
4543 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4544 direct_jmp2_count,
4545 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4546 cpu_fprintf(f, "\nStatistics:\n");
4547 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4548 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4549 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4550 #ifdef CONFIG_PROFILER
4551 tcg_dump_info(f, cpu_fprintf);
4552 #endif
4555 #define MMUSUFFIX _cmmu
4556 #define GETPC() NULL
4557 #define env cpu_single_env
4558 #define SOFTMMU_CODE_ACCESS
4560 #define SHIFT 0
4561 #include "softmmu_template.h"
4563 #define SHIFT 1
4564 #include "softmmu_template.h"
4566 #define SHIFT 2
4567 #include "softmmu_template.h"
4569 #define SHIFT 3
4570 #include "softmmu_template.h"
4572 #undef env
4574 #endif