qemu-kvm: Drop redundant cpuid filtering from cpu_x86_cpuid
[qemu-kvm.git] / exec.c
blob76e8c76fd75645cbd5d62bcfbb87f2a83bb172a5
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "cache-utils.h"
31 #if !defined(TARGET_IA64)
32 #include "tcg.h"
33 #endif
35 #include "hw/hw.h"
36 #include "hw/qdev.h"
37 #include "osdep.h"
38 #include "kvm.h"
39 #include "hw/xen.h"
40 #include "qemu-timer.h"
41 #include "memory.h"
42 #include "exec-memory.h"
43 #if defined(CONFIG_USER_ONLY)
44 #include <qemu.h>
45 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
46 #include <sys/param.h>
47 #if __FreeBSD_version >= 700104
48 #define HAVE_KINFO_GETVMMAP
49 #define sigqueue sigqueue_freebsd /* avoid redefinition */
50 #include <sys/time.h>
51 #include <sys/proc.h>
52 #include <machine/profile.h>
53 #define _KERNEL
54 #include <sys/user.h>
55 #undef _KERNEL
56 #undef sigqueue
57 #include <libutil.h>
58 #endif
59 #endif
60 #else /* !CONFIG_USER_ONLY */
61 #include "xen-mapcache.h"
62 #include "trace.h"
63 #endif
65 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_FLUSH
67 //#define DEBUG_TLB
68 //#define DEBUG_UNASSIGNED
70 /* make various TB consistency checks */
71 //#define DEBUG_TB_CHECK
72 //#define DEBUG_TLB_CHECK
74 //#define DEBUG_IOPORT
75 //#define DEBUG_SUBPAGE
77 #if !defined(CONFIG_USER_ONLY)
78 /* TB consistency checks only implemented for usermode emulation. */
79 #undef DEBUG_TB_CHECK
80 #endif
82 #define SMC_BITMAP_USE_THRESHOLD 10
84 static TranslationBlock *tbs;
85 static int code_gen_max_blocks;
86 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
87 static int nb_tbs;
88 /* any access to the tbs or the page table must use this lock */
89 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
91 #if defined(__arm__) || defined(__sparc_v9__)
92 /* The prologue must be reachable with a direct jump. ARM and Sparc64
93 have limited branch ranges (possibly also PPC) so place it in a
94 section close to code segment. */
95 #define code_gen_section \
96 __attribute__((__section__(".gen_code"))) \
97 __attribute__((aligned (32)))
98 #elif defined(_WIN32)
99 /* Maximum alignment for Win32 is 16. */
100 #define code_gen_section \
101 __attribute__((aligned (16)))
102 #else
103 #define code_gen_section \
104 __attribute__((aligned (32)))
105 #endif
107 uint8_t code_gen_prologue[1024] code_gen_section;
108 static uint8_t *code_gen_buffer;
109 static unsigned long code_gen_buffer_size;
110 /* threshold to flush the translated code buffer */
111 static unsigned long code_gen_buffer_max_size;
112 static uint8_t *code_gen_ptr;
114 #if !defined(CONFIG_USER_ONLY)
115 int phys_ram_fd;
116 static int in_migration;
118 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
120 static MemoryRegion *system_memory;
121 static MemoryRegion *system_io;
123 #endif
125 CPUState *first_cpu;
126 /* current CPU in the current thread. It is only valid inside
127 cpu_exec() */
128 CPUState *cpu_single_env;
129 /* 0 = Do not count executed instructions.
130 1 = Precise instruction counting.
131 2 = Adaptive rate instruction counting. */
132 int use_icount = 0;
133 /* Current instruction counter. While executing translated code this may
134 include some instructions that have not yet been executed. */
135 int64_t qemu_icount;
137 typedef struct PageDesc {
138 /* list of TBs intersecting this ram page */
139 TranslationBlock *first_tb;
140 /* in order to optimize self modifying code, we count the number
141 of lookups we do to a given page to use a bitmap */
142 unsigned int code_write_count;
143 uint8_t *code_bitmap;
144 #if defined(CONFIG_USER_ONLY)
145 unsigned long flags;
146 #endif
147 } PageDesc;
149 /* In system mode we want L1_MAP to be based on ram offsets,
150 while in user mode we want it to be based on virtual addresses. */
151 #if !defined(CONFIG_USER_ONLY)
152 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
153 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
154 #else
155 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
156 #endif
157 #else
158 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
159 #endif
161 /* Size of the L2 (and L3, etc) page tables. */
162 #define L2_BITS 10
163 #define L2_SIZE (1 << L2_BITS)
165 /* The bits remaining after N lower levels of page tables. */
166 #define P_L1_BITS_REM \
167 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
168 #define V_L1_BITS_REM \
169 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
171 /* Size of the L1 page table. Avoid silly small sizes. */
172 #if P_L1_BITS_REM < 4
173 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
174 #else
175 #define P_L1_BITS P_L1_BITS_REM
176 #endif
178 #if V_L1_BITS_REM < 4
179 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
180 #else
181 #define V_L1_BITS V_L1_BITS_REM
182 #endif
184 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
185 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
187 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
188 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
190 unsigned long qemu_real_host_page_size;
191 unsigned long qemu_host_page_size;
192 unsigned long qemu_host_page_mask;
194 /* This is a multi-level map on the virtual address space.
195 The bottom level has pointers to PageDesc. */
196 static void *l1_map[V_L1_SIZE];
198 #if !defined(CONFIG_USER_ONLY)
199 typedef struct PhysPageDesc {
200 /* offset in host memory of the page + io_index in the low bits */
201 ram_addr_t phys_offset;
202 ram_addr_t region_offset;
203 } PhysPageDesc;
205 /* This is a multi-level map on the physical address space.
206 The bottom level has pointers to PhysPageDesc. */
207 static void *l1_phys_map[P_L1_SIZE];
209 static void io_mem_init(void);
210 static void memory_map_init(void);
212 /* io memory support */
213 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
214 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
215 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
216 static char io_mem_used[IO_MEM_NB_ENTRIES];
217 static int io_mem_watch;
218 #endif
220 /* log support */
221 #ifdef WIN32
222 static const char *logfilename = "qemu.log";
223 #else
224 static const char *logfilename = "/tmp/qemu.log";
225 #endif
226 FILE *logfile;
227 int loglevel;
228 static int log_append = 0;
230 /* statistics */
231 #if !defined(CONFIG_USER_ONLY)
232 static int tlb_flush_count;
233 #endif
234 static int tb_flush_count;
235 static int tb_phys_invalidate_count;
237 #ifdef _WIN32
238 static void map_exec(void *addr, long size)
240 DWORD old_protect;
241 VirtualProtect(addr, size,
242 PAGE_EXECUTE_READWRITE, &old_protect);
245 #else
246 static void map_exec(void *addr, long size)
248 unsigned long start, end, page_size;
250 page_size = getpagesize();
251 start = (unsigned long)addr;
252 start &= ~(page_size - 1);
254 end = (unsigned long)addr + size;
255 end += page_size - 1;
256 end &= ~(page_size - 1);
258 mprotect((void *)start, end - start,
259 PROT_READ | PROT_WRITE | PROT_EXEC);
261 #endif
263 static void page_init(void)
265 /* NOTE: we can always suppose that qemu_host_page_size >=
266 TARGET_PAGE_SIZE */
267 #ifdef _WIN32
269 SYSTEM_INFO system_info;
271 GetSystemInfo(&system_info);
272 qemu_real_host_page_size = system_info.dwPageSize;
274 #else
275 qemu_real_host_page_size = getpagesize();
276 #endif
277 if (qemu_host_page_size == 0)
278 qemu_host_page_size = qemu_real_host_page_size;
279 if (qemu_host_page_size < TARGET_PAGE_SIZE)
280 qemu_host_page_size = TARGET_PAGE_SIZE;
281 qemu_host_page_mask = ~(qemu_host_page_size - 1);
283 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
285 #ifdef HAVE_KINFO_GETVMMAP
286 struct kinfo_vmentry *freep;
287 int i, cnt;
289 freep = kinfo_getvmmap(getpid(), &cnt);
290 if (freep) {
291 mmap_lock();
292 for (i = 0; i < cnt; i++) {
293 unsigned long startaddr, endaddr;
295 startaddr = freep[i].kve_start;
296 endaddr = freep[i].kve_end;
297 if (h2g_valid(startaddr)) {
298 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
300 if (h2g_valid(endaddr)) {
301 endaddr = h2g(endaddr);
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
303 } else {
304 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
305 endaddr = ~0ul;
306 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
307 #endif
311 free(freep);
312 mmap_unlock();
314 #else
315 FILE *f;
317 last_brk = (unsigned long)sbrk(0);
319 f = fopen("/compat/linux/proc/self/maps", "r");
320 if (f) {
321 mmap_lock();
323 do {
324 unsigned long startaddr, endaddr;
325 int n;
327 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
329 if (n == 2 && h2g_valid(startaddr)) {
330 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
332 if (h2g_valid(endaddr)) {
333 endaddr = h2g(endaddr);
334 } else {
335 endaddr = ~0ul;
337 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
339 } while (!feof(f));
341 fclose(f);
342 mmap_unlock();
344 #endif
346 #endif
349 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
351 PageDesc *pd;
352 void **lp;
353 int i;
355 #if defined(CONFIG_USER_ONLY)
356 /* We can't use g_malloc because it may recurse into a locked mutex. */
357 # define ALLOC(P, SIZE) \
358 do { \
359 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
360 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
361 } while (0)
362 #else
363 # define ALLOC(P, SIZE) \
364 do { P = g_malloc0(SIZE); } while (0)
365 #endif
367 /* Level 1. Always allocated. */
368 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
370 /* Level 2..N-1. */
371 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
372 void **p = *lp;
374 if (p == NULL) {
375 if (!alloc) {
376 return NULL;
378 ALLOC(p, sizeof(void *) * L2_SIZE);
379 *lp = p;
382 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
385 pd = *lp;
386 if (pd == NULL) {
387 if (!alloc) {
388 return NULL;
390 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
391 *lp = pd;
394 #undef ALLOC
396 return pd + (index & (L2_SIZE - 1));
399 static inline PageDesc *page_find(tb_page_addr_t index)
401 return page_find_alloc(index, 0);
404 #if !defined(CONFIG_USER_ONLY)
405 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
407 PhysPageDesc *pd;
408 void **lp;
409 int i;
411 /* Level 1. Always allocated. */
412 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
414 /* Level 2..N-1. */
415 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
416 void **p = *lp;
417 if (p == NULL) {
418 if (!alloc) {
419 return NULL;
421 *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
423 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
426 pd = *lp;
427 if (pd == NULL) {
428 int i;
430 if (!alloc) {
431 return NULL;
434 *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
436 for (i = 0; i < L2_SIZE; i++) {
437 pd[i].phys_offset = IO_MEM_UNASSIGNED;
438 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
442 return pd + (index & (L2_SIZE - 1));
445 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
447 return phys_page_find_alloc(index, 0);
450 static void tlb_protect_code(ram_addr_t ram_addr);
451 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
452 target_ulong vaddr);
453 #define mmap_lock() do { } while(0)
454 #define mmap_unlock() do { } while(0)
455 #endif
457 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
459 #if defined(CONFIG_USER_ONLY)
460 /* Currently it is not recommended to allocate big chunks of data in
461 user mode. It will change when a dedicated libc will be used */
462 #define USE_STATIC_CODE_GEN_BUFFER
463 #endif
465 #ifdef USE_STATIC_CODE_GEN_BUFFER
466 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
467 __attribute__((aligned (CODE_GEN_ALIGN)));
468 #endif
470 static void code_gen_alloc(unsigned long tb_size)
472 #ifdef USE_STATIC_CODE_GEN_BUFFER
473 code_gen_buffer = static_code_gen_buffer;
474 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
475 map_exec(code_gen_buffer, code_gen_buffer_size);
476 #else
477 code_gen_buffer_size = tb_size;
478 if (code_gen_buffer_size == 0) {
479 #if defined(CONFIG_USER_ONLY)
480 /* in user mode, phys_ram_size is not meaningful */
481 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
482 #else
483 /* XXX: needs adjustments */
484 code_gen_buffer_size = (unsigned long)(ram_size / 4);
485 #endif
487 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
488 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
489 /* The code gen buffer location may have constraints depending on
490 the host cpu and OS */
491 #if defined(__linux__)
493 int flags;
494 void *start = NULL;
496 flags = MAP_PRIVATE | MAP_ANONYMOUS;
497 #if defined(__x86_64__)
498 flags |= MAP_32BIT;
499 /* Cannot map more than that */
500 if (code_gen_buffer_size > (800 * 1024 * 1024))
501 code_gen_buffer_size = (800 * 1024 * 1024);
502 #elif defined(__sparc_v9__)
503 // Map the buffer below 2G, so we can use direct calls and branches
504 flags |= MAP_FIXED;
505 start = (void *) 0x60000000UL;
506 if (code_gen_buffer_size > (512 * 1024 * 1024))
507 code_gen_buffer_size = (512 * 1024 * 1024);
508 #elif defined(__arm__)
509 /* Map the buffer below 32M, so we can use direct calls and branches */
510 flags |= MAP_FIXED;
511 start = (void *) 0x01000000UL;
512 if (code_gen_buffer_size > 16 * 1024 * 1024)
513 code_gen_buffer_size = 16 * 1024 * 1024;
514 #elif defined(__s390x__)
515 /* Map the buffer so that we can use direct calls and branches. */
516 /* We have a +- 4GB range on the branches; leave some slop. */
517 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
518 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
520 start = (void *)0x90000000UL;
521 #endif
522 code_gen_buffer = mmap(start, code_gen_buffer_size,
523 PROT_WRITE | PROT_READ | PROT_EXEC,
524 flags, -1, 0);
525 if (code_gen_buffer == MAP_FAILED) {
526 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
527 exit(1);
530 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
531 || defined(__DragonFly__) || defined(__OpenBSD__) \
532 || defined(__NetBSD__)
534 int flags;
535 void *addr = NULL;
536 flags = MAP_PRIVATE | MAP_ANONYMOUS;
537 #if defined(__x86_64__)
538 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
539 * 0x40000000 is free */
540 flags |= MAP_FIXED;
541 addr = (void *)0x40000000;
542 /* Cannot map more than that */
543 if (code_gen_buffer_size > (800 * 1024 * 1024))
544 code_gen_buffer_size = (800 * 1024 * 1024);
545 #elif defined(__sparc_v9__)
546 // Map the buffer below 2G, so we can use direct calls and branches
547 flags |= MAP_FIXED;
548 addr = (void *) 0x60000000UL;
549 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
550 code_gen_buffer_size = (512 * 1024 * 1024);
552 #endif
553 code_gen_buffer = mmap(addr, code_gen_buffer_size,
554 PROT_WRITE | PROT_READ | PROT_EXEC,
555 flags, -1, 0);
556 if (code_gen_buffer == MAP_FAILED) {
557 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
558 exit(1);
561 #else
562 code_gen_buffer = g_malloc(code_gen_buffer_size);
563 map_exec(code_gen_buffer, code_gen_buffer_size);
564 #endif
565 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
566 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
567 code_gen_buffer_max_size = code_gen_buffer_size -
568 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
569 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
570 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
573 /* Must be called before using the QEMU cpus. 'tb_size' is the size
574 (in bytes) allocated to the translation buffer. Zero means default
575 size. */
576 void tcg_exec_init(unsigned long tb_size)
578 cpu_gen_init();
579 code_gen_alloc(tb_size);
580 code_gen_ptr = code_gen_buffer;
581 page_init();
582 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
583 /* There's no guest base to take into account, so go ahead and
584 initialize the prologue now. */
585 tcg_prologue_init(&tcg_ctx);
586 #endif
589 bool tcg_enabled(void)
591 return code_gen_buffer != NULL;
594 void cpu_exec_init_all(void)
596 #if !defined(CONFIG_USER_ONLY)
597 memory_map_init();
598 io_mem_init();
599 #endif
602 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
604 static int cpu_common_post_load(void *opaque, int version_id)
606 CPUState *env = opaque;
608 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
609 version_id is increased. */
610 env->interrupt_request &= ~0x01;
611 tlb_flush(env, 1);
613 return 0;
616 static const VMStateDescription vmstate_cpu_common = {
617 .name = "cpu_common",
618 .version_id = 1,
619 .minimum_version_id = 1,
620 .minimum_version_id_old = 1,
621 .post_load = cpu_common_post_load,
622 .fields = (VMStateField []) {
623 VMSTATE_UINT32(halted, CPUState),
624 VMSTATE_UINT32(interrupt_request, CPUState),
625 VMSTATE_END_OF_LIST()
628 #endif
630 CPUState *qemu_get_cpu(int cpu)
632 CPUState *env = first_cpu;
634 while (env) {
635 if (env->cpu_index == cpu)
636 break;
637 env = env->next_cpu;
640 return env;
643 void cpu_exec_init(CPUState *env)
645 CPUState **penv;
646 int cpu_index;
648 #if defined(CONFIG_USER_ONLY)
649 cpu_list_lock();
650 #endif
651 env->next_cpu = NULL;
652 penv = &first_cpu;
653 cpu_index = 0;
654 while (*penv != NULL) {
655 penv = &(*penv)->next_cpu;
656 cpu_index++;
658 env->cpu_index = cpu_index;
659 env->numa_node = 0;
660 QTAILQ_INIT(&env->breakpoints);
661 QTAILQ_INIT(&env->watchpoints);
662 #ifndef CONFIG_USER_ONLY
663 env->thread_id = qemu_get_thread_id();
664 #endif
665 *penv = env;
666 #if defined(CONFIG_USER_ONLY)
667 cpu_list_unlock();
668 #endif
669 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
670 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
671 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
672 cpu_save, cpu_load, env);
673 #endif
676 /* Allocate a new translation block. Flush the translation buffer if
677 too many translation blocks or too much generated code. */
678 static TranslationBlock *tb_alloc(target_ulong pc)
680 TranslationBlock *tb;
682 if (nb_tbs >= code_gen_max_blocks ||
683 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
684 return NULL;
685 tb = &tbs[nb_tbs++];
686 tb->pc = pc;
687 tb->cflags = 0;
688 return tb;
691 void tb_free(TranslationBlock *tb)
693 /* In practice this is mostly used for single use temporary TB
694 Ignore the hard cases and just back up if this TB happens to
695 be the last one generated. */
696 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
697 code_gen_ptr = tb->tc_ptr;
698 nb_tbs--;
702 static inline void invalidate_page_bitmap(PageDesc *p)
704 if (p->code_bitmap) {
705 g_free(p->code_bitmap);
706 p->code_bitmap = NULL;
708 p->code_write_count = 0;
711 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
713 static void page_flush_tb_1 (int level, void **lp)
715 int i;
717 if (*lp == NULL) {
718 return;
720 if (level == 0) {
721 PageDesc *pd = *lp;
722 for (i = 0; i < L2_SIZE; ++i) {
723 pd[i].first_tb = NULL;
724 invalidate_page_bitmap(pd + i);
726 } else {
727 void **pp = *lp;
728 for (i = 0; i < L2_SIZE; ++i) {
729 page_flush_tb_1 (level - 1, pp + i);
734 static void page_flush_tb(void)
736 int i;
737 for (i = 0; i < V_L1_SIZE; i++) {
738 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
742 /* flush all the translation blocks */
743 /* XXX: tb_flush is currently not thread safe */
744 void tb_flush(CPUState *env1)
746 CPUState *env;
747 #if defined(DEBUG_FLUSH)
748 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
749 (unsigned long)(code_gen_ptr - code_gen_buffer),
750 nb_tbs, nb_tbs > 0 ?
751 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
752 #endif
753 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
754 cpu_abort(env1, "Internal error: code buffer overflow\n");
756 nb_tbs = 0;
758 for(env = first_cpu; env != NULL; env = env->next_cpu) {
759 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
762 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
763 page_flush_tb();
765 code_gen_ptr = code_gen_buffer;
766 /* XXX: flush processor icache at this point if cache flush is
767 expensive */
768 tb_flush_count++;
771 #ifdef DEBUG_TB_CHECK
773 static void tb_invalidate_check(target_ulong address)
775 TranslationBlock *tb;
776 int i;
777 address &= TARGET_PAGE_MASK;
778 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
779 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
780 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
781 address >= tb->pc + tb->size)) {
782 printf("ERROR invalidate: address=" TARGET_FMT_lx
783 " PC=%08lx size=%04x\n",
784 address, (long)tb->pc, tb->size);
790 /* verify that all the pages have correct rights for code */
791 static void tb_page_check(void)
793 TranslationBlock *tb;
794 int i, flags1, flags2;
796 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
797 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
798 flags1 = page_get_flags(tb->pc);
799 flags2 = page_get_flags(tb->pc + tb->size - 1);
800 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
801 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
802 (long)tb->pc, tb->size, flags1, flags2);
808 #endif
810 /* invalidate one TB */
811 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
812 int next_offset)
814 TranslationBlock *tb1;
815 for(;;) {
816 tb1 = *ptb;
817 if (tb1 == tb) {
818 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
819 break;
821 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
825 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
827 TranslationBlock *tb1;
828 unsigned int n1;
830 for(;;) {
831 tb1 = *ptb;
832 n1 = (long)tb1 & 3;
833 tb1 = (TranslationBlock *)((long)tb1 & ~3);
834 if (tb1 == tb) {
835 *ptb = tb1->page_next[n1];
836 break;
838 ptb = &tb1->page_next[n1];
842 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
844 TranslationBlock *tb1, **ptb;
845 unsigned int n1;
847 ptb = &tb->jmp_next[n];
848 tb1 = *ptb;
849 if (tb1) {
850 /* find tb(n) in circular list */
851 for(;;) {
852 tb1 = *ptb;
853 n1 = (long)tb1 & 3;
854 tb1 = (TranslationBlock *)((long)tb1 & ~3);
855 if (n1 == n && tb1 == tb)
856 break;
857 if (n1 == 2) {
858 ptb = &tb1->jmp_first;
859 } else {
860 ptb = &tb1->jmp_next[n1];
863 /* now we can suppress tb(n) from the list */
864 *ptb = tb->jmp_next[n];
866 tb->jmp_next[n] = NULL;
870 /* reset the jump entry 'n' of a TB so that it is not chained to
871 another TB */
872 static inline void tb_reset_jump(TranslationBlock *tb, int n)
874 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
877 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
879 CPUState *env;
880 PageDesc *p;
881 unsigned int h, n1;
882 tb_page_addr_t phys_pc;
883 TranslationBlock *tb1, *tb2;
885 /* remove the TB from the hash list */
886 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
887 h = tb_phys_hash_func(phys_pc);
888 tb_remove(&tb_phys_hash[h], tb,
889 offsetof(TranslationBlock, phys_hash_next));
891 /* remove the TB from the page list */
892 if (tb->page_addr[0] != page_addr) {
893 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
894 tb_page_remove(&p->first_tb, tb);
895 invalidate_page_bitmap(p);
897 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
898 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
899 tb_page_remove(&p->first_tb, tb);
900 invalidate_page_bitmap(p);
903 tb_invalidated_flag = 1;
905 /* remove the TB from the hash list */
906 h = tb_jmp_cache_hash_func(tb->pc);
907 for(env = first_cpu; env != NULL; env = env->next_cpu) {
908 if (env->tb_jmp_cache[h] == tb)
909 env->tb_jmp_cache[h] = NULL;
912 /* suppress this TB from the two jump lists */
913 tb_jmp_remove(tb, 0);
914 tb_jmp_remove(tb, 1);
916 /* suppress any remaining jumps to this TB */
917 tb1 = tb->jmp_first;
918 for(;;) {
919 n1 = (long)tb1 & 3;
920 if (n1 == 2)
921 break;
922 tb1 = (TranslationBlock *)((long)tb1 & ~3);
923 tb2 = tb1->jmp_next[n1];
924 tb_reset_jump(tb1, n1);
925 tb1->jmp_next[n1] = NULL;
926 tb1 = tb2;
928 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
930 tb_phys_invalidate_count++;
933 static inline void set_bits(uint8_t *tab, int start, int len)
935 int end, mask, end1;
937 end = start + len;
938 tab += start >> 3;
939 mask = 0xff << (start & 7);
940 if ((start & ~7) == (end & ~7)) {
941 if (start < end) {
942 mask &= ~(0xff << (end & 7));
943 *tab |= mask;
945 } else {
946 *tab++ |= mask;
947 start = (start + 8) & ~7;
948 end1 = end & ~7;
949 while (start < end1) {
950 *tab++ = 0xff;
951 start += 8;
953 if (start < end) {
954 mask = ~(0xff << (end & 7));
955 *tab |= mask;
960 static void build_page_bitmap(PageDesc *p)
962 int n, tb_start, tb_end;
963 TranslationBlock *tb;
965 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
967 tb = p->first_tb;
968 while (tb != NULL) {
969 n = (long)tb & 3;
970 tb = (TranslationBlock *)((long)tb & ~3);
971 /* NOTE: this is subtle as a TB may span two physical pages */
972 if (n == 0) {
973 /* NOTE: tb_end may be after the end of the page, but
974 it is not a problem */
975 tb_start = tb->pc & ~TARGET_PAGE_MASK;
976 tb_end = tb_start + tb->size;
977 if (tb_end > TARGET_PAGE_SIZE)
978 tb_end = TARGET_PAGE_SIZE;
979 } else {
980 tb_start = 0;
981 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
983 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
984 tb = tb->page_next[n];
988 TranslationBlock *tb_gen_code(CPUState *env,
989 target_ulong pc, target_ulong cs_base,
990 int flags, int cflags)
992 TranslationBlock *tb;
993 uint8_t *tc_ptr;
994 tb_page_addr_t phys_pc, phys_page2;
995 target_ulong virt_page2;
996 int code_gen_size;
998 phys_pc = get_page_addr_code(env, pc);
999 tb = tb_alloc(pc);
1000 if (!tb) {
1001 /* flush must be done */
1002 tb_flush(env);
1003 /* cannot fail at this point */
1004 tb = tb_alloc(pc);
1005 /* Don't forget to invalidate previous TB info. */
1006 tb_invalidated_flag = 1;
1008 tc_ptr = code_gen_ptr;
1009 tb->tc_ptr = tc_ptr;
1010 tb->cs_base = cs_base;
1011 tb->flags = flags;
1012 tb->cflags = cflags;
1013 cpu_gen_code(env, tb, &code_gen_size);
1014 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1016 /* check next page if needed */
1017 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1018 phys_page2 = -1;
1019 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1020 phys_page2 = get_page_addr_code(env, virt_page2);
1022 tb_link_page(tb, phys_pc, phys_page2);
1023 return tb;
1026 /* invalidate all TBs which intersect with the target physical page
1027 starting in range [start;end[. NOTE: start and end must refer to
1028 the same physical page. 'is_cpu_write_access' should be true if called
1029 from a real cpu write access: the virtual CPU will exit the current
1030 TB if code is modified inside this TB. */
1031 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1032 int is_cpu_write_access)
1034 TranslationBlock *tb, *tb_next, *saved_tb;
1035 CPUState *env = cpu_single_env;
1036 tb_page_addr_t tb_start, tb_end;
1037 PageDesc *p;
1038 int n;
1039 #ifdef TARGET_HAS_PRECISE_SMC
1040 int current_tb_not_found = is_cpu_write_access;
1041 TranslationBlock *current_tb = NULL;
1042 int current_tb_modified = 0;
1043 target_ulong current_pc = 0;
1044 target_ulong current_cs_base = 0;
1045 int current_flags = 0;
1046 #endif /* TARGET_HAS_PRECISE_SMC */
1048 p = page_find(start >> TARGET_PAGE_BITS);
1049 if (!p)
1050 return;
1051 if (!p->code_bitmap &&
1052 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1053 is_cpu_write_access) {
1054 /* build code bitmap */
1055 build_page_bitmap(p);
1058 /* we remove all the TBs in the range [start, end[ */
1059 /* XXX: see if in some cases it could be faster to invalidate all the code */
1060 tb = p->first_tb;
1061 while (tb != NULL) {
1062 n = (long)tb & 3;
1063 tb = (TranslationBlock *)((long)tb & ~3);
1064 tb_next = tb->page_next[n];
1065 /* NOTE: this is subtle as a TB may span two physical pages */
1066 if (n == 0) {
1067 /* NOTE: tb_end may be after the end of the page, but
1068 it is not a problem */
1069 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1070 tb_end = tb_start + tb->size;
1071 } else {
1072 tb_start = tb->page_addr[1];
1073 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1075 if (!(tb_end <= start || tb_start >= end)) {
1076 #ifdef TARGET_HAS_PRECISE_SMC
1077 if (current_tb_not_found) {
1078 current_tb_not_found = 0;
1079 current_tb = NULL;
1080 if (env->mem_io_pc) {
1081 /* now we have a real cpu fault */
1082 current_tb = tb_find_pc(env->mem_io_pc);
1085 if (current_tb == tb &&
1086 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1087 /* If we are modifying the current TB, we must stop
1088 its execution. We could be more precise by checking
1089 that the modification is after the current PC, but it
1090 would require a specialized function to partially
1091 restore the CPU state */
1093 current_tb_modified = 1;
1094 cpu_restore_state(current_tb, env, env->mem_io_pc);
1095 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1096 &current_flags);
1098 #endif /* TARGET_HAS_PRECISE_SMC */
1099 /* we need to do that to handle the case where a signal
1100 occurs while doing tb_phys_invalidate() */
1101 saved_tb = NULL;
1102 if (env) {
1103 saved_tb = env->current_tb;
1104 env->current_tb = NULL;
1106 tb_phys_invalidate(tb, -1);
1107 if (env) {
1108 env->current_tb = saved_tb;
1109 if (env->interrupt_request && env->current_tb)
1110 cpu_interrupt(env, env->interrupt_request);
1113 tb = tb_next;
1115 #if !defined(CONFIG_USER_ONLY)
1116 /* if no code remaining, no need to continue to use slow writes */
1117 if (!p->first_tb) {
1118 invalidate_page_bitmap(p);
1119 if (is_cpu_write_access) {
1120 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1123 #endif
1124 #ifdef TARGET_HAS_PRECISE_SMC
1125 if (current_tb_modified) {
1126 /* we generate a block containing just the instruction
1127 modifying the memory. It will ensure that it cannot modify
1128 itself */
1129 env->current_tb = NULL;
1130 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1131 cpu_resume_from_signal(env, NULL);
1133 #endif
1136 /* len must be <= 8 and start must be a multiple of len */
1137 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1139 PageDesc *p;
1140 int offset, b;
1141 #if 0
1142 if (1) {
1143 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1144 cpu_single_env->mem_io_vaddr, len,
1145 cpu_single_env->eip,
1146 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1148 #endif
1149 p = page_find(start >> TARGET_PAGE_BITS);
1150 if (!p)
1151 return;
1152 if (p->code_bitmap) {
1153 offset = start & ~TARGET_PAGE_MASK;
1154 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1155 if (b & ((1 << len) - 1))
1156 goto do_invalidate;
1157 } else {
1158 do_invalidate:
1159 tb_invalidate_phys_page_range(start, start + len, 1);
1163 #if !defined(CONFIG_SOFTMMU)
1164 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1165 unsigned long pc, void *puc)
1167 TranslationBlock *tb;
1168 PageDesc *p;
1169 int n;
1170 #ifdef TARGET_HAS_PRECISE_SMC
1171 TranslationBlock *current_tb = NULL;
1172 CPUState *env = cpu_single_env;
1173 int current_tb_modified = 0;
1174 target_ulong current_pc = 0;
1175 target_ulong current_cs_base = 0;
1176 int current_flags = 0;
1177 #endif
1179 addr &= TARGET_PAGE_MASK;
1180 p = page_find(addr >> TARGET_PAGE_BITS);
1181 if (!p)
1182 return;
1183 tb = p->first_tb;
1184 #ifdef TARGET_HAS_PRECISE_SMC
1185 if (tb && pc != 0) {
1186 current_tb = tb_find_pc(pc);
1188 #endif
1189 while (tb != NULL) {
1190 n = (long)tb & 3;
1191 tb = (TranslationBlock *)((long)tb & ~3);
1192 #ifdef TARGET_HAS_PRECISE_SMC
1193 if (current_tb == tb &&
1194 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1195 /* If we are modifying the current TB, we must stop
1196 its execution. We could be more precise by checking
1197 that the modification is after the current PC, but it
1198 would require a specialized function to partially
1199 restore the CPU state */
1201 current_tb_modified = 1;
1202 cpu_restore_state(current_tb, env, pc);
1203 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1204 &current_flags);
1206 #endif /* TARGET_HAS_PRECISE_SMC */
1207 tb_phys_invalidate(tb, addr);
1208 tb = tb->page_next[n];
1210 p->first_tb = NULL;
1211 #ifdef TARGET_HAS_PRECISE_SMC
1212 if (current_tb_modified) {
1213 /* we generate a block containing just the instruction
1214 modifying the memory. It will ensure that it cannot modify
1215 itself */
1216 env->current_tb = NULL;
1217 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1218 cpu_resume_from_signal(env, puc);
1220 #endif
1222 #endif
1224 /* add the tb in the target page and protect it if necessary */
1225 static inline void tb_alloc_page(TranslationBlock *tb,
1226 unsigned int n, tb_page_addr_t page_addr)
1228 PageDesc *p;
1229 #ifndef CONFIG_USER_ONLY
1230 bool page_already_protected;
1231 #endif
1233 tb->page_addr[n] = page_addr;
1234 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1235 tb->page_next[n] = p->first_tb;
1236 #ifndef CONFIG_USER_ONLY
1237 page_already_protected = p->first_tb != NULL;
1238 #endif
1239 p->first_tb = (TranslationBlock *)((long)tb | n);
1240 invalidate_page_bitmap(p);
1242 #if defined(TARGET_HAS_SMC) || 1
1244 #if defined(CONFIG_USER_ONLY)
1245 if (p->flags & PAGE_WRITE) {
1246 target_ulong addr;
1247 PageDesc *p2;
1248 int prot;
1250 /* force the host page as non writable (writes will have a
1251 page fault + mprotect overhead) */
1252 page_addr &= qemu_host_page_mask;
1253 prot = 0;
1254 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1255 addr += TARGET_PAGE_SIZE) {
1257 p2 = page_find (addr >> TARGET_PAGE_BITS);
1258 if (!p2)
1259 continue;
1260 prot |= p2->flags;
1261 p2->flags &= ~PAGE_WRITE;
1263 mprotect(g2h(page_addr), qemu_host_page_size,
1264 (prot & PAGE_BITS) & ~PAGE_WRITE);
1265 #ifdef DEBUG_TB_INVALIDATE
1266 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1267 page_addr);
1268 #endif
1270 #else
1271 /* if some code is already present, then the pages are already
1272 protected. So we handle the case where only the first TB is
1273 allocated in a physical page */
1274 if (!page_already_protected) {
1275 tlb_protect_code(page_addr);
1277 #endif
1279 #endif /* TARGET_HAS_SMC */
1282 /* add a new TB and link it to the physical page tables. phys_page2 is
1283 (-1) to indicate that only one page contains the TB. */
1284 void tb_link_page(TranslationBlock *tb,
1285 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1287 unsigned int h;
1288 TranslationBlock **ptb;
1290 /* Grab the mmap lock to stop another thread invalidating this TB
1291 before we are done. */
1292 mmap_lock();
1293 /* add in the physical hash table */
1294 h = tb_phys_hash_func(phys_pc);
1295 ptb = &tb_phys_hash[h];
1296 tb->phys_hash_next = *ptb;
1297 *ptb = tb;
1299 /* add in the page list */
1300 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1301 if (phys_page2 != -1)
1302 tb_alloc_page(tb, 1, phys_page2);
1303 else
1304 tb->page_addr[1] = -1;
1306 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1307 tb->jmp_next[0] = NULL;
1308 tb->jmp_next[1] = NULL;
1310 /* init original jump addresses */
1311 if (tb->tb_next_offset[0] != 0xffff)
1312 tb_reset_jump(tb, 0);
1313 if (tb->tb_next_offset[1] != 0xffff)
1314 tb_reset_jump(tb, 1);
1316 #ifdef DEBUG_TB_CHECK
1317 tb_page_check();
1318 #endif
1319 mmap_unlock();
1322 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1323 tb[1].tc_ptr. Return NULL if not found */
1324 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1326 int m_min, m_max, m;
1327 unsigned long v;
1328 TranslationBlock *tb;
1330 if (nb_tbs <= 0)
1331 return NULL;
1332 if (tc_ptr < (unsigned long)code_gen_buffer ||
1333 tc_ptr >= (unsigned long)code_gen_ptr)
1334 return NULL;
1335 /* binary search (cf Knuth) */
1336 m_min = 0;
1337 m_max = nb_tbs - 1;
1338 while (m_min <= m_max) {
1339 m = (m_min + m_max) >> 1;
1340 tb = &tbs[m];
1341 v = (unsigned long)tb->tc_ptr;
1342 if (v == tc_ptr)
1343 return tb;
1344 else if (tc_ptr < v) {
1345 m_max = m - 1;
1346 } else {
1347 m_min = m + 1;
1350 return &tbs[m_max];
1353 static void tb_reset_jump_recursive(TranslationBlock *tb);
1355 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1357 TranslationBlock *tb1, *tb_next, **ptb;
1358 unsigned int n1;
1360 tb1 = tb->jmp_next[n];
1361 if (tb1 != NULL) {
1362 /* find head of list */
1363 for(;;) {
1364 n1 = (long)tb1 & 3;
1365 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1366 if (n1 == 2)
1367 break;
1368 tb1 = tb1->jmp_next[n1];
1370 /* we are now sure now that tb jumps to tb1 */
1371 tb_next = tb1;
1373 /* remove tb from the jmp_first list */
1374 ptb = &tb_next->jmp_first;
1375 for(;;) {
1376 tb1 = *ptb;
1377 n1 = (long)tb1 & 3;
1378 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1379 if (n1 == n && tb1 == tb)
1380 break;
1381 ptb = &tb1->jmp_next[n1];
1383 *ptb = tb->jmp_next[n];
1384 tb->jmp_next[n] = NULL;
1386 /* suppress the jump to next tb in generated code */
1387 tb_reset_jump(tb, n);
1389 /* suppress jumps in the tb on which we could have jumped */
1390 tb_reset_jump_recursive(tb_next);
1394 static void tb_reset_jump_recursive(TranslationBlock *tb)
1396 tb_reset_jump_recursive2(tb, 0);
1397 tb_reset_jump_recursive2(tb, 1);
1400 #if defined(TARGET_HAS_ICE)
1401 #if defined(CONFIG_USER_ONLY)
1402 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1404 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1406 #else
1407 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1409 target_phys_addr_t addr;
1410 target_ulong pd;
1411 ram_addr_t ram_addr;
1412 PhysPageDesc *p;
1414 addr = cpu_get_phys_page_debug(env, pc);
1415 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1416 if (!p) {
1417 pd = IO_MEM_UNASSIGNED;
1418 } else {
1419 pd = p->phys_offset;
1421 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1422 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1424 #endif
1425 #endif /* TARGET_HAS_ICE */
1427 #if defined(CONFIG_USER_ONLY)
1428 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1433 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1434 int flags, CPUWatchpoint **watchpoint)
1436 return -ENOSYS;
1438 #else
1439 /* Add a watchpoint. */
1440 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1441 int flags, CPUWatchpoint **watchpoint)
1443 target_ulong len_mask = ~(len - 1);
1444 CPUWatchpoint *wp;
1446 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1447 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1448 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1449 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1450 return -EINVAL;
1452 wp = g_malloc(sizeof(*wp));
1454 wp->vaddr = addr;
1455 wp->len_mask = len_mask;
1456 wp->flags = flags;
1458 /* keep all GDB-injected watchpoints in front */
1459 if (flags & BP_GDB)
1460 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1461 else
1462 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1464 tlb_flush_page(env, addr);
1466 if (watchpoint)
1467 *watchpoint = wp;
1468 return 0;
1471 /* Remove a specific watchpoint. */
1472 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1473 int flags)
1475 target_ulong len_mask = ~(len - 1);
1476 CPUWatchpoint *wp;
1478 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1479 if (addr == wp->vaddr && len_mask == wp->len_mask
1480 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1481 cpu_watchpoint_remove_by_ref(env, wp);
1482 return 0;
1485 return -ENOENT;
1488 /* Remove a specific watchpoint by reference. */
1489 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1491 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1493 tlb_flush_page(env, watchpoint->vaddr);
1495 g_free(watchpoint);
1498 /* Remove all matching watchpoints. */
1499 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1501 CPUWatchpoint *wp, *next;
1503 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1504 if (wp->flags & mask)
1505 cpu_watchpoint_remove_by_ref(env, wp);
1508 #endif
1510 /* Add a breakpoint. */
1511 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1512 CPUBreakpoint **breakpoint)
1514 #if defined(TARGET_HAS_ICE)
1515 CPUBreakpoint *bp;
1517 bp = g_malloc(sizeof(*bp));
1519 bp->pc = pc;
1520 bp->flags = flags;
1522 /* keep all GDB-injected breakpoints in front */
1523 if (flags & BP_GDB)
1524 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1525 else
1526 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1528 breakpoint_invalidate(env, pc);
1530 if (breakpoint)
1531 *breakpoint = bp;
1532 return 0;
1533 #else
1534 return -ENOSYS;
1535 #endif
1538 /* Remove a specific breakpoint. */
1539 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1541 #if defined(TARGET_HAS_ICE)
1542 CPUBreakpoint *bp;
1544 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1545 if (bp->pc == pc && bp->flags == flags) {
1546 cpu_breakpoint_remove_by_ref(env, bp);
1547 return 0;
1550 return -ENOENT;
1551 #else
1552 return -ENOSYS;
1553 #endif
1556 /* Remove a specific breakpoint by reference. */
1557 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1559 #if defined(TARGET_HAS_ICE)
1560 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1562 breakpoint_invalidate(env, breakpoint->pc);
1564 g_free(breakpoint);
1565 #endif
1568 /* Remove all matching breakpoints. */
1569 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1571 #if defined(TARGET_HAS_ICE)
1572 CPUBreakpoint *bp, *next;
1574 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1575 if (bp->flags & mask)
1576 cpu_breakpoint_remove_by_ref(env, bp);
1578 #endif
1581 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1582 CPU loop after each instruction */
1583 void cpu_single_step(CPUState *env, int enabled)
1585 #if defined(TARGET_HAS_ICE)
1586 if (env->singlestep_enabled != enabled) {
1587 env->singlestep_enabled = enabled;
1588 if (kvm_enabled())
1589 kvm_update_guest_debug(env, 0);
1590 else {
1591 /* must flush all the translated code to avoid inconsistencies */
1592 /* XXX: only flush what is necessary */
1593 tb_flush(env);
1596 #endif
1599 /* enable or disable low levels log */
1600 void cpu_set_log(int log_flags)
1602 loglevel = log_flags;
1603 if (loglevel && !logfile) {
1604 logfile = fopen(logfilename, log_append ? "a" : "w");
1605 if (!logfile) {
1606 perror(logfilename);
1607 _exit(1);
1609 #if !defined(CONFIG_SOFTMMU)
1610 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1612 static char logfile_buf[4096];
1613 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1615 #elif !defined(_WIN32)
1616 /* Win32 doesn't support line-buffering and requires size >= 2 */
1617 setvbuf(logfile, NULL, _IOLBF, 0);
1618 #endif
1619 log_append = 1;
1621 if (!loglevel && logfile) {
1622 fclose(logfile);
1623 logfile = NULL;
1627 void cpu_set_log_filename(const char *filename)
1629 logfilename = strdup(filename);
1630 if (logfile) {
1631 fclose(logfile);
1632 logfile = NULL;
1634 cpu_set_log(loglevel);
1637 static void cpu_unlink_tb(CPUState *env)
1639 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1640 problem and hope the cpu will stop of its own accord. For userspace
1641 emulation this often isn't actually as bad as it sounds. Often
1642 signals are used primarily to interrupt blocking syscalls. */
1643 TranslationBlock *tb;
1644 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1646 spin_lock(&interrupt_lock);
1647 tb = env->current_tb;
1648 /* if the cpu is currently executing code, we must unlink it and
1649 all the potentially executing TB */
1650 if (tb) {
1651 env->current_tb = NULL;
1652 tb_reset_jump_recursive(tb);
1654 spin_unlock(&interrupt_lock);
1657 #ifndef CONFIG_USER_ONLY
1658 /* mask must never be zero, except for A20 change call */
1659 static void tcg_handle_interrupt(CPUState *env, int mask)
1661 int old_mask;
1663 old_mask = env->interrupt_request;
1664 env->interrupt_request |= mask;
1667 * If called from iothread context, wake the target cpu in
1668 * case its halted.
1670 if (!qemu_cpu_is_self(env)) {
1671 qemu_cpu_kick(env);
1672 return;
1675 if (use_icount) {
1676 env->icount_decr.u16.high = 0xffff;
1677 if (!can_do_io(env)
1678 && (mask & ~old_mask) != 0) {
1679 cpu_abort(env, "Raised interrupt while not in I/O function");
1681 } else {
1682 cpu_unlink_tb(env);
1686 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1688 #else /* CONFIG_USER_ONLY */
1690 void cpu_interrupt(CPUState *env, int mask)
1692 env->interrupt_request |= mask;
1693 cpu_unlink_tb(env);
1695 #endif /* CONFIG_USER_ONLY */
1697 void cpu_reset_interrupt(CPUState *env, int mask)
1699 env->interrupt_request &= ~mask;
1702 void cpu_exit(CPUState *env)
1704 env->exit_request = 1;
1705 cpu_unlink_tb(env);
1708 const CPULogItem cpu_log_items[] = {
1709 { CPU_LOG_TB_OUT_ASM, "out_asm",
1710 "show generated host assembly code for each compiled TB" },
1711 { CPU_LOG_TB_IN_ASM, "in_asm",
1712 "show target assembly code for each compiled TB" },
1713 { CPU_LOG_TB_OP, "op",
1714 "show micro ops for each compiled TB" },
1715 { CPU_LOG_TB_OP_OPT, "op_opt",
1716 "show micro ops "
1717 #ifdef TARGET_I386
1718 "before eflags optimization and "
1719 #endif
1720 "after liveness analysis" },
1721 { CPU_LOG_INT, "int",
1722 "show interrupts/exceptions in short format" },
1723 { CPU_LOG_EXEC, "exec",
1724 "show trace before each executed TB (lots of logs)" },
1725 { CPU_LOG_TB_CPU, "cpu",
1726 "show CPU state before block translation" },
1727 #ifdef TARGET_I386
1728 { CPU_LOG_PCALL, "pcall",
1729 "show protected mode far calls/returns/exceptions" },
1730 { CPU_LOG_RESET, "cpu_reset",
1731 "show CPU state before CPU resets" },
1732 #endif
1733 #ifdef DEBUG_IOPORT
1734 { CPU_LOG_IOPORT, "ioport",
1735 "show all i/o ports accesses" },
1736 #endif
1737 { 0, NULL, NULL },
1740 #ifndef CONFIG_USER_ONLY
1741 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1742 = QLIST_HEAD_INITIALIZER(memory_client_list);
1744 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1745 ram_addr_t size,
1746 ram_addr_t phys_offset,
1747 bool log_dirty)
1749 CPUPhysMemoryClient *client;
1750 QLIST_FOREACH(client, &memory_client_list, list) {
1751 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1755 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1756 target_phys_addr_t end)
1758 CPUPhysMemoryClient *client;
1759 QLIST_FOREACH(client, &memory_client_list, list) {
1760 int r = client->sync_dirty_bitmap(client, start, end);
1761 if (r < 0)
1762 return r;
1764 return 0;
1767 static int cpu_notify_migration_log(int enable)
1769 CPUPhysMemoryClient *client;
1770 QLIST_FOREACH(client, &memory_client_list, list) {
1771 int r = client->migration_log(client, enable);
1772 if (r < 0)
1773 return r;
1775 return 0;
1778 struct last_map {
1779 target_phys_addr_t start_addr;
1780 ram_addr_t size;
1781 ram_addr_t phys_offset;
1784 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1785 * address. Each intermediate table provides the next L2_BITs of guest
1786 * physical address space. The number of levels vary based on host and
1787 * guest configuration, making it efficient to build the final guest
1788 * physical address by seeding the L1 offset and shifting and adding in
1789 * each L2 offset as we recurse through them. */
1790 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1791 void **lp, target_phys_addr_t addr,
1792 struct last_map *map)
1794 int i;
1796 if (*lp == NULL) {
1797 return;
1799 if (level == 0) {
1800 PhysPageDesc *pd = *lp;
1801 addr <<= L2_BITS + TARGET_PAGE_BITS;
1802 for (i = 0; i < L2_SIZE; ++i) {
1803 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1804 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1806 if (map->size &&
1807 start_addr == map->start_addr + map->size &&
1808 pd[i].phys_offset == map->phys_offset + map->size) {
1810 map->size += TARGET_PAGE_SIZE;
1811 continue;
1812 } else if (map->size) {
1813 client->set_memory(client, map->start_addr,
1814 map->size, map->phys_offset, false);
1817 map->start_addr = start_addr;
1818 map->size = TARGET_PAGE_SIZE;
1819 map->phys_offset = pd[i].phys_offset;
1822 } else {
1823 void **pp = *lp;
1824 for (i = 0; i < L2_SIZE; ++i) {
1825 phys_page_for_each_1(client, level - 1, pp + i,
1826 (addr << L2_BITS) | i, map);
1831 static void phys_page_for_each(CPUPhysMemoryClient *client)
1833 int i;
1834 struct last_map map = { };
1836 for (i = 0; i < P_L1_SIZE; ++i) {
1837 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1838 l1_phys_map + i, i, &map);
1840 if (map.size) {
1841 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1842 false);
1846 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1848 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1849 phys_page_for_each(client);
1852 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1854 QLIST_REMOVE(client, list);
1856 #endif
1858 static int cmp1(const char *s1, int n, const char *s2)
1860 if (strlen(s2) != n)
1861 return 0;
1862 return memcmp(s1, s2, n) == 0;
1865 /* takes a comma separated list of log masks. Return 0 if error. */
1866 int cpu_str_to_log_mask(const char *str)
1868 const CPULogItem *item;
1869 int mask;
1870 const char *p, *p1;
1872 p = str;
1873 mask = 0;
1874 for(;;) {
1875 p1 = strchr(p, ',');
1876 if (!p1)
1877 p1 = p + strlen(p);
1878 if(cmp1(p,p1-p,"all")) {
1879 for(item = cpu_log_items; item->mask != 0; item++) {
1880 mask |= item->mask;
1882 } else {
1883 for(item = cpu_log_items; item->mask != 0; item++) {
1884 if (cmp1(p, p1 - p, item->name))
1885 goto found;
1887 return 0;
1889 found:
1890 mask |= item->mask;
1891 if (*p1 != ',')
1892 break;
1893 p = p1 + 1;
1895 return mask;
1898 void cpu_abort(CPUState *env, const char *fmt, ...)
1900 va_list ap;
1901 va_list ap2;
1903 va_start(ap, fmt);
1904 va_copy(ap2, ap);
1905 fprintf(stderr, "qemu: fatal: ");
1906 vfprintf(stderr, fmt, ap);
1907 fprintf(stderr, "\n");
1908 #ifdef TARGET_I386
1909 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1910 #else
1911 cpu_dump_state(env, stderr, fprintf, 0);
1912 #endif
1913 if (qemu_log_enabled()) {
1914 qemu_log("qemu: fatal: ");
1915 qemu_log_vprintf(fmt, ap2);
1916 qemu_log("\n");
1917 #ifdef TARGET_I386
1918 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1919 #else
1920 log_cpu_state(env, 0);
1921 #endif
1922 qemu_log_flush();
1923 qemu_log_close();
1925 va_end(ap2);
1926 va_end(ap);
1927 #if defined(CONFIG_USER_ONLY)
1929 struct sigaction act;
1930 sigfillset(&act.sa_mask);
1931 act.sa_handler = SIG_DFL;
1932 sigaction(SIGABRT, &act, NULL);
1934 #endif
1935 abort();
1938 CPUState *cpu_copy(CPUState *env)
1940 CPUState *new_env = cpu_init(env->cpu_model_str);
1941 CPUState *next_cpu = new_env->next_cpu;
1942 int cpu_index = new_env->cpu_index;
1943 #if defined(TARGET_HAS_ICE)
1944 CPUBreakpoint *bp;
1945 CPUWatchpoint *wp;
1946 #endif
1948 memcpy(new_env, env, sizeof(CPUState));
1950 /* Preserve chaining and index. */
1951 new_env->next_cpu = next_cpu;
1952 new_env->cpu_index = cpu_index;
1954 /* Clone all break/watchpoints.
1955 Note: Once we support ptrace with hw-debug register access, make sure
1956 BP_CPU break/watchpoints are handled correctly on clone. */
1957 QTAILQ_INIT(&env->breakpoints);
1958 QTAILQ_INIT(&env->watchpoints);
1959 #if defined(TARGET_HAS_ICE)
1960 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1961 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1963 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1964 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1965 wp->flags, NULL);
1967 #endif
1969 return new_env;
1972 #if !defined(CONFIG_USER_ONLY)
1974 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1976 unsigned int i;
1978 /* Discard jump cache entries for any tb which might potentially
1979 overlap the flushed page. */
1980 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1981 memset (&env->tb_jmp_cache[i], 0,
1982 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1984 i = tb_jmp_cache_hash_page(addr);
1985 memset (&env->tb_jmp_cache[i], 0,
1986 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1989 static CPUTLBEntry s_cputlb_empty_entry = {
1990 .addr_read = -1,
1991 .addr_write = -1,
1992 .addr_code = -1,
1993 .addend = -1,
1996 /* NOTE: if flush_global is true, also flush global entries (not
1997 implemented yet) */
1998 void tlb_flush(CPUState *env, int flush_global)
2000 int i;
2002 #if defined(DEBUG_TLB)
2003 printf("tlb_flush:\n");
2004 #endif
2005 /* must reset current TB so that interrupts cannot modify the
2006 links while we are modifying them */
2007 env->current_tb = NULL;
2009 for(i = 0; i < CPU_TLB_SIZE; i++) {
2010 int mmu_idx;
2011 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2012 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2016 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2018 env->tlb_flush_addr = -1;
2019 env->tlb_flush_mask = 0;
2020 tlb_flush_count++;
2023 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2025 if (addr == (tlb_entry->addr_read &
2026 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2027 addr == (tlb_entry->addr_write &
2028 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2029 addr == (tlb_entry->addr_code &
2030 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2031 *tlb_entry = s_cputlb_empty_entry;
2035 void tlb_flush_page(CPUState *env, target_ulong addr)
2037 int i;
2038 int mmu_idx;
2040 #if defined(DEBUG_TLB)
2041 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2042 #endif
2043 /* Check if we need to flush due to large pages. */
2044 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2045 #if defined(DEBUG_TLB)
2046 printf("tlb_flush_page: forced full flush ("
2047 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2048 env->tlb_flush_addr, env->tlb_flush_mask);
2049 #endif
2050 tlb_flush(env, 1);
2051 return;
2053 /* must reset current TB so that interrupts cannot modify the
2054 links while we are modifying them */
2055 env->current_tb = NULL;
2057 addr &= TARGET_PAGE_MASK;
2058 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2059 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2060 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2062 tlb_flush_jmp_cache(env, addr);
2065 /* update the TLBs so that writes to code in the virtual page 'addr'
2066 can be detected */
2067 static void tlb_protect_code(ram_addr_t ram_addr)
2069 cpu_physical_memory_reset_dirty(ram_addr,
2070 ram_addr + TARGET_PAGE_SIZE,
2071 CODE_DIRTY_FLAG);
2074 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2075 tested for self modifying code */
2076 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2077 target_ulong vaddr)
2079 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2082 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2083 unsigned long start, unsigned long length)
2085 unsigned long addr;
2086 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2087 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2088 if ((addr - start) < length) {
2089 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2094 /* Note: start and end must be within the same ram block. */
2095 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2096 int dirty_flags)
2098 CPUState *env;
2099 unsigned long length, start1;
2100 int i;
2102 start &= TARGET_PAGE_MASK;
2103 end = TARGET_PAGE_ALIGN(end);
2105 length = end - start;
2106 if (length == 0)
2107 return;
2108 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2110 /* we modify the TLB cache so that the dirty bit will be set again
2111 when accessing the range */
2112 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2113 /* Check that we don't span multiple blocks - this breaks the
2114 address comparisons below. */
2115 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2116 != (end - 1) - start) {
2117 abort();
2120 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2121 int mmu_idx;
2122 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2123 for(i = 0; i < CPU_TLB_SIZE; i++)
2124 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2125 start1, length);
2130 int cpu_physical_memory_set_dirty_tracking(int enable)
2132 int ret = 0;
2133 in_migration = enable;
2134 ret = cpu_notify_migration_log(!!enable);
2135 return ret;
2138 int cpu_physical_memory_get_dirty_tracking(void)
2140 return in_migration;
2143 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2144 target_phys_addr_t end_addr)
2146 int ret;
2148 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2149 return ret;
2152 int cpu_physical_log_start(target_phys_addr_t start_addr,
2153 ram_addr_t size)
2155 CPUPhysMemoryClient *client;
2156 QLIST_FOREACH(client, &memory_client_list, list) {
2157 if (client->log_start) {
2158 int r = client->log_start(client, start_addr, size);
2159 if (r < 0) {
2160 return r;
2164 return 0;
2167 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2168 ram_addr_t size)
2170 CPUPhysMemoryClient *client;
2171 QLIST_FOREACH(client, &memory_client_list, list) {
2172 if (client->log_stop) {
2173 int r = client->log_stop(client, start_addr, size);
2174 if (r < 0) {
2175 return r;
2179 return 0;
2182 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2184 ram_addr_t ram_addr;
2185 void *p;
2187 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2188 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2189 + tlb_entry->addend);
2190 ram_addr = qemu_ram_addr_from_host_nofail(p);
2191 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2192 tlb_entry->addr_write |= TLB_NOTDIRTY;
2197 /* update the TLB according to the current state of the dirty bits */
2198 void cpu_tlb_update_dirty(CPUState *env)
2200 int i;
2201 int mmu_idx;
2202 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2203 for(i = 0; i < CPU_TLB_SIZE; i++)
2204 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2208 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2210 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2211 tlb_entry->addr_write = vaddr;
2214 /* update the TLB corresponding to virtual page vaddr
2215 so that it is no longer dirty */
2216 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2218 int i;
2219 int mmu_idx;
2221 vaddr &= TARGET_PAGE_MASK;
2222 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2223 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2224 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2227 /* Our TLB does not support large pages, so remember the area covered by
2228 large pages and trigger a full TLB flush if these are invalidated. */
2229 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2230 target_ulong size)
2232 target_ulong mask = ~(size - 1);
2234 if (env->tlb_flush_addr == (target_ulong)-1) {
2235 env->tlb_flush_addr = vaddr & mask;
2236 env->tlb_flush_mask = mask;
2237 return;
2239 /* Extend the existing region to include the new page.
2240 This is a compromise between unnecessary flushes and the cost
2241 of maintaining a full variable size TLB. */
2242 mask &= env->tlb_flush_mask;
2243 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2244 mask <<= 1;
2246 env->tlb_flush_addr &= mask;
2247 env->tlb_flush_mask = mask;
2250 /* Add a new TLB entry. At most one entry for a given virtual address
2251 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2252 supplied size is only used by tlb_flush_page. */
2253 void tlb_set_page(CPUState *env, target_ulong vaddr,
2254 target_phys_addr_t paddr, int prot,
2255 int mmu_idx, target_ulong size)
2257 PhysPageDesc *p;
2258 unsigned long pd;
2259 unsigned int index;
2260 target_ulong address;
2261 target_ulong code_address;
2262 unsigned long addend;
2263 CPUTLBEntry *te;
2264 CPUWatchpoint *wp;
2265 target_phys_addr_t iotlb;
2267 assert(size >= TARGET_PAGE_SIZE);
2268 if (size != TARGET_PAGE_SIZE) {
2269 tlb_add_large_page(env, vaddr, size);
2271 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2272 if (!p) {
2273 pd = IO_MEM_UNASSIGNED;
2274 } else {
2275 pd = p->phys_offset;
2277 #if defined(DEBUG_TLB)
2278 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2279 " prot=%x idx=%d pd=0x%08lx\n",
2280 vaddr, paddr, prot, mmu_idx, pd);
2281 #endif
2283 address = vaddr;
2284 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2285 /* IO memory case (romd handled later) */
2286 address |= TLB_MMIO;
2288 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2289 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2290 /* Normal RAM. */
2291 iotlb = pd & TARGET_PAGE_MASK;
2292 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2293 iotlb |= IO_MEM_NOTDIRTY;
2294 else
2295 iotlb |= IO_MEM_ROM;
2296 } else {
2297 /* IO handlers are currently passed a physical address.
2298 It would be nice to pass an offset from the base address
2299 of that region. This would avoid having to special case RAM,
2300 and avoid full address decoding in every device.
2301 We can't use the high bits of pd for this because
2302 IO_MEM_ROMD uses these as a ram address. */
2303 iotlb = (pd & ~TARGET_PAGE_MASK);
2304 if (p) {
2305 iotlb += p->region_offset;
2306 } else {
2307 iotlb += paddr;
2311 code_address = address;
2312 /* Make accesses to pages with watchpoints go via the
2313 watchpoint trap routines. */
2314 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2315 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2316 /* Avoid trapping reads of pages with a write breakpoint. */
2317 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2318 iotlb = io_mem_watch + paddr;
2319 address |= TLB_MMIO;
2320 break;
2325 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2326 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2327 te = &env->tlb_table[mmu_idx][index];
2328 te->addend = addend - vaddr;
2329 if (prot & PAGE_READ) {
2330 te->addr_read = address;
2331 } else {
2332 te->addr_read = -1;
2335 if (prot & PAGE_EXEC) {
2336 te->addr_code = code_address;
2337 } else {
2338 te->addr_code = -1;
2340 if (prot & PAGE_WRITE) {
2341 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2342 (pd & IO_MEM_ROMD)) {
2343 /* Write access calls the I/O callback. */
2344 te->addr_write = address | TLB_MMIO;
2345 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2346 !cpu_physical_memory_is_dirty(pd)) {
2347 te->addr_write = address | TLB_NOTDIRTY;
2348 } else {
2349 te->addr_write = address;
2351 } else {
2352 te->addr_write = -1;
2356 #else
2358 void tlb_flush(CPUState *env, int flush_global)
2362 void tlb_flush_page(CPUState *env, target_ulong addr)
2367 * Walks guest process memory "regions" one by one
2368 * and calls callback function 'fn' for each region.
2371 struct walk_memory_regions_data
2373 walk_memory_regions_fn fn;
2374 void *priv;
2375 unsigned long start;
2376 int prot;
2379 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2380 abi_ulong end, int new_prot)
2382 if (data->start != -1ul) {
2383 int rc = data->fn(data->priv, data->start, end, data->prot);
2384 if (rc != 0) {
2385 return rc;
2389 data->start = (new_prot ? end : -1ul);
2390 data->prot = new_prot;
2392 return 0;
2395 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2396 abi_ulong base, int level, void **lp)
2398 abi_ulong pa;
2399 int i, rc;
2401 if (*lp == NULL) {
2402 return walk_memory_regions_end(data, base, 0);
2405 if (level == 0) {
2406 PageDesc *pd = *lp;
2407 for (i = 0; i < L2_SIZE; ++i) {
2408 int prot = pd[i].flags;
2410 pa = base | (i << TARGET_PAGE_BITS);
2411 if (prot != data->prot) {
2412 rc = walk_memory_regions_end(data, pa, prot);
2413 if (rc != 0) {
2414 return rc;
2418 } else {
2419 void **pp = *lp;
2420 for (i = 0; i < L2_SIZE; ++i) {
2421 pa = base | ((abi_ulong)i <<
2422 (TARGET_PAGE_BITS + L2_BITS * level));
2423 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2424 if (rc != 0) {
2425 return rc;
2430 return 0;
2433 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2435 struct walk_memory_regions_data data;
2436 unsigned long i;
2438 data.fn = fn;
2439 data.priv = priv;
2440 data.start = -1ul;
2441 data.prot = 0;
2443 for (i = 0; i < V_L1_SIZE; i++) {
2444 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2445 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2446 if (rc != 0) {
2447 return rc;
2451 return walk_memory_regions_end(&data, 0, 0);
2454 static int dump_region(void *priv, abi_ulong start,
2455 abi_ulong end, unsigned long prot)
2457 FILE *f = (FILE *)priv;
2459 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2460 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2461 start, end, end - start,
2462 ((prot & PAGE_READ) ? 'r' : '-'),
2463 ((prot & PAGE_WRITE) ? 'w' : '-'),
2464 ((prot & PAGE_EXEC) ? 'x' : '-'));
2466 return (0);
2469 /* dump memory mappings */
2470 void page_dump(FILE *f)
2472 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2473 "start", "end", "size", "prot");
2474 walk_memory_regions(f, dump_region);
2477 int page_get_flags(target_ulong address)
2479 PageDesc *p;
2481 p = page_find(address >> TARGET_PAGE_BITS);
2482 if (!p)
2483 return 0;
2484 return p->flags;
2487 /* Modify the flags of a page and invalidate the code if necessary.
2488 The flag PAGE_WRITE_ORG is positioned automatically depending
2489 on PAGE_WRITE. The mmap_lock should already be held. */
2490 void page_set_flags(target_ulong start, target_ulong end, int flags)
2492 target_ulong addr, len;
2494 /* This function should never be called with addresses outside the
2495 guest address space. If this assert fires, it probably indicates
2496 a missing call to h2g_valid. */
2497 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2498 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2499 #endif
2500 assert(start < end);
2502 start = start & TARGET_PAGE_MASK;
2503 end = TARGET_PAGE_ALIGN(end);
2505 if (flags & PAGE_WRITE) {
2506 flags |= PAGE_WRITE_ORG;
2509 for (addr = start, len = end - start;
2510 len != 0;
2511 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2512 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2514 /* If the write protection bit is set, then we invalidate
2515 the code inside. */
2516 if (!(p->flags & PAGE_WRITE) &&
2517 (flags & PAGE_WRITE) &&
2518 p->first_tb) {
2519 tb_invalidate_phys_page(addr, 0, NULL);
2521 p->flags = flags;
2525 int page_check_range(target_ulong start, target_ulong len, int flags)
2527 PageDesc *p;
2528 target_ulong end;
2529 target_ulong addr;
2531 /* This function should never be called with addresses outside the
2532 guest address space. If this assert fires, it probably indicates
2533 a missing call to h2g_valid. */
2534 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2535 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2536 #endif
2538 if (len == 0) {
2539 return 0;
2541 if (start + len - 1 < start) {
2542 /* We've wrapped around. */
2543 return -1;
2546 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2547 start = start & TARGET_PAGE_MASK;
2549 for (addr = start, len = end - start;
2550 len != 0;
2551 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2552 p = page_find(addr >> TARGET_PAGE_BITS);
2553 if( !p )
2554 return -1;
2555 if( !(p->flags & PAGE_VALID) )
2556 return -1;
2558 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2559 return -1;
2560 if (flags & PAGE_WRITE) {
2561 if (!(p->flags & PAGE_WRITE_ORG))
2562 return -1;
2563 /* unprotect the page if it was put read-only because it
2564 contains translated code */
2565 if (!(p->flags & PAGE_WRITE)) {
2566 if (!page_unprotect(addr, 0, NULL))
2567 return -1;
2569 return 0;
2572 return 0;
2575 /* called from signal handler: invalidate the code and unprotect the
2576 page. Return TRUE if the fault was successfully handled. */
2577 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2579 unsigned int prot;
2580 PageDesc *p;
2581 target_ulong host_start, host_end, addr;
2583 /* Technically this isn't safe inside a signal handler. However we
2584 know this only ever happens in a synchronous SEGV handler, so in
2585 practice it seems to be ok. */
2586 mmap_lock();
2588 p = page_find(address >> TARGET_PAGE_BITS);
2589 if (!p) {
2590 mmap_unlock();
2591 return 0;
2594 /* if the page was really writable, then we change its
2595 protection back to writable */
2596 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2597 host_start = address & qemu_host_page_mask;
2598 host_end = host_start + qemu_host_page_size;
2600 prot = 0;
2601 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2602 p = page_find(addr >> TARGET_PAGE_BITS);
2603 p->flags |= PAGE_WRITE;
2604 prot |= p->flags;
2606 /* and since the content will be modified, we must invalidate
2607 the corresponding translated code. */
2608 tb_invalidate_phys_page(addr, pc, puc);
2609 #ifdef DEBUG_TB_CHECK
2610 tb_invalidate_check(addr);
2611 #endif
2613 mprotect((void *)g2h(host_start), qemu_host_page_size,
2614 prot & PAGE_BITS);
2616 mmap_unlock();
2617 return 1;
2619 mmap_unlock();
2620 return 0;
2623 static inline void tlb_set_dirty(CPUState *env,
2624 unsigned long addr, target_ulong vaddr)
2627 #endif /* defined(CONFIG_USER_ONLY) */
2629 #if !defined(CONFIG_USER_ONLY)
2631 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2632 typedef struct subpage_t {
2633 target_phys_addr_t base;
2634 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2635 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2636 } subpage_t;
2638 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2639 ram_addr_t memory, ram_addr_t region_offset);
2640 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2641 ram_addr_t orig_memory,
2642 ram_addr_t region_offset);
2643 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2644 need_subpage) \
2645 do { \
2646 if (addr > start_addr) \
2647 start_addr2 = 0; \
2648 else { \
2649 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2650 if (start_addr2 > 0) \
2651 need_subpage = 1; \
2654 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2655 end_addr2 = TARGET_PAGE_SIZE - 1; \
2656 else { \
2657 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2658 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2659 need_subpage = 1; \
2661 } while (0)
2663 /* register physical memory.
2664 For RAM, 'size' must be a multiple of the target page size.
2665 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2666 io memory page. The address used when calling the IO function is
2667 the offset from the start of the region, plus region_offset. Both
2668 start_addr and region_offset are rounded down to a page boundary
2669 before calculating this offset. This should not be a problem unless
2670 the low bits of start_addr and region_offset differ. */
2671 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2672 ram_addr_t size,
2673 ram_addr_t phys_offset,
2674 ram_addr_t region_offset,
2675 bool log_dirty)
2677 target_phys_addr_t addr, end_addr;
2678 PhysPageDesc *p;
2679 CPUState *env;
2680 ram_addr_t orig_size = size;
2681 subpage_t *subpage;
2683 assert(size);
2684 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2686 if (phys_offset == IO_MEM_UNASSIGNED) {
2687 region_offset = start_addr;
2689 region_offset &= TARGET_PAGE_MASK;
2690 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2691 end_addr = start_addr + (target_phys_addr_t)size;
2693 addr = start_addr;
2694 do {
2695 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2696 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2697 ram_addr_t orig_memory = p->phys_offset;
2698 target_phys_addr_t start_addr2, end_addr2;
2699 int need_subpage = 0;
2701 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2702 need_subpage);
2703 if (need_subpage) {
2704 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2705 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2706 &p->phys_offset, orig_memory,
2707 p->region_offset);
2708 } else {
2709 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2710 >> IO_MEM_SHIFT];
2712 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2713 region_offset);
2714 p->region_offset = 0;
2715 } else {
2716 p->phys_offset = phys_offset;
2717 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2718 (phys_offset & IO_MEM_ROMD))
2719 phys_offset += TARGET_PAGE_SIZE;
2721 } else {
2722 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2723 p->phys_offset = phys_offset;
2724 p->region_offset = region_offset;
2725 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2726 (phys_offset & IO_MEM_ROMD)) {
2727 phys_offset += TARGET_PAGE_SIZE;
2728 } else {
2729 target_phys_addr_t start_addr2, end_addr2;
2730 int need_subpage = 0;
2732 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2733 end_addr2, need_subpage);
2735 if (need_subpage) {
2736 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2737 &p->phys_offset, IO_MEM_UNASSIGNED,
2738 addr & TARGET_PAGE_MASK);
2739 subpage_register(subpage, start_addr2, end_addr2,
2740 phys_offset, region_offset);
2741 p->region_offset = 0;
2745 region_offset += TARGET_PAGE_SIZE;
2746 addr += TARGET_PAGE_SIZE;
2747 } while (addr != end_addr);
2749 /* since each CPU stores ram addresses in its TLB cache, we must
2750 reset the modified entries */
2751 /* XXX: slow ! */
2752 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2753 tlb_flush(env, 1);
2757 /* XXX: temporary until new memory mapping API */
2758 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2760 PhysPageDesc *p;
2762 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2763 if (!p)
2764 return IO_MEM_UNASSIGNED;
2765 return p->phys_offset;
2768 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2770 if (kvm_enabled())
2771 kvm_coalesce_mmio_region(addr, size);
2774 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2776 if (kvm_enabled())
2777 kvm_uncoalesce_mmio_region(addr, size);
2780 void qemu_flush_coalesced_mmio_buffer(void)
2782 if (kvm_enabled())
2783 kvm_flush_coalesced_mmio_buffer();
2786 #if defined(__linux__) && !defined(TARGET_S390X)
2788 #include <sys/vfs.h>
2790 #define HUGETLBFS_MAGIC 0x958458f6
2792 static long gethugepagesize(const char *path)
2794 struct statfs fs;
2795 int ret;
2797 do {
2798 ret = statfs(path, &fs);
2799 } while (ret != 0 && errno == EINTR);
2801 if (ret != 0) {
2802 perror(path);
2803 return 0;
2806 if (fs.f_type != HUGETLBFS_MAGIC)
2807 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2809 return fs.f_bsize;
2812 static void *file_ram_alloc(RAMBlock *block,
2813 ram_addr_t memory,
2814 const char *path)
2816 char *filename;
2817 void *area;
2818 int fd;
2819 #ifdef MAP_POPULATE
2820 int flags;
2821 #endif
2822 unsigned long hpagesize;
2824 hpagesize = gethugepagesize(path);
2825 if (!hpagesize) {
2826 return NULL;
2829 if (memory < hpagesize) {
2830 return NULL;
2833 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2834 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2835 return NULL;
2838 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2839 return NULL;
2842 fd = mkstemp(filename);
2843 if (fd < 0) {
2844 perror("unable to create backing store for hugepages");
2845 free(filename);
2846 return NULL;
2848 unlink(filename);
2849 free(filename);
2851 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2854 * ftruncate is not supported by hugetlbfs in older
2855 * hosts, so don't bother bailing out on errors.
2856 * If anything goes wrong with it under other filesystems,
2857 * mmap will fail.
2859 if (ftruncate(fd, memory))
2860 perror("ftruncate");
2862 #ifdef MAP_POPULATE
2863 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2864 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2865 * to sidestep this quirk.
2867 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2868 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2869 #else
2870 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2871 #endif
2872 if (area == MAP_FAILED) {
2873 perror("file_ram_alloc: can't mmap RAM pages");
2874 close(fd);
2875 return (NULL);
2877 block->fd = fd;
2878 return area;
2880 #endif
2882 static ram_addr_t find_ram_offset(ram_addr_t size)
2884 RAMBlock *block, *next_block;
2885 ram_addr_t offset = 0, mingap = RAM_ADDR_MAX;
2887 if (QLIST_EMPTY(&ram_list.blocks))
2888 return 0;
2890 QLIST_FOREACH(block, &ram_list.blocks, next) {
2891 ram_addr_t end, next = RAM_ADDR_MAX;
2893 end = block->offset + block->length;
2895 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2896 if (next_block->offset >= end) {
2897 next = MIN(next, next_block->offset);
2900 if (next - end >= size && next - end < mingap) {
2901 offset = end;
2902 mingap = next - end;
2905 return offset;
2908 static ram_addr_t last_ram_offset(void)
2910 RAMBlock *block;
2911 ram_addr_t last = 0;
2913 QLIST_FOREACH(block, &ram_list.blocks, next)
2914 last = MAX(last, block->offset + block->length);
2916 return last;
2919 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2920 ram_addr_t size, void *host)
2922 RAMBlock *new_block, *block;
2924 size = TARGET_PAGE_ALIGN(size);
2925 new_block = g_malloc0(sizeof(*new_block));
2927 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2928 char *id = dev->parent_bus->info->get_dev_path(dev);
2929 if (id) {
2930 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2931 g_free(id);
2934 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2936 QLIST_FOREACH(block, &ram_list.blocks, next) {
2937 if (!strcmp(block->idstr, new_block->idstr)) {
2938 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2939 new_block->idstr);
2940 abort();
2944 new_block->offset = find_ram_offset(size);
2945 if (host) {
2946 new_block->host = host;
2947 new_block->flags |= RAM_PREALLOC_MASK;
2948 } else {
2949 if (mem_path) {
2950 #if defined (__linux__) && !defined(TARGET_S390X)
2951 new_block->host = file_ram_alloc(new_block, size, mem_path);
2952 if (!new_block->host) {
2953 new_block->host = qemu_vmalloc(size);
2954 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2956 #else
2957 fprintf(stderr, "-mem-path option unsupported\n");
2958 exit(1);
2959 #endif
2960 } else {
2961 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2962 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2963 an system defined value, which is at least 256GB. Larger systems
2964 have larger values. We put the guest between the end of data
2965 segment (system break) and this value. We use 32GB as a base to
2966 have enough room for the system break to grow. */
2967 new_block->host = mmap((void*)0x800000000, size,
2968 PROT_EXEC|PROT_READ|PROT_WRITE,
2969 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2970 if (new_block->host == MAP_FAILED) {
2971 fprintf(stderr, "Allocating RAM failed\n");
2972 abort();
2974 #else
2975 if (xen_enabled()) {
2976 xen_ram_alloc(new_block->offset, size);
2977 } else {
2978 new_block->host = qemu_vmalloc(size);
2980 #endif
2981 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2984 new_block->length = size;
2986 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2988 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2989 last_ram_offset() >> TARGET_PAGE_BITS);
2990 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2991 0xff, size >> TARGET_PAGE_BITS);
2993 if (kvm_enabled())
2994 kvm_setup_guest_memory(new_block->host, size);
2996 return new_block->offset;
2999 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
3001 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
3004 void qemu_ram_free_from_ptr(ram_addr_t addr)
3006 RAMBlock *block;
3008 QLIST_FOREACH(block, &ram_list.blocks, next) {
3009 if (addr == block->offset) {
3010 QLIST_REMOVE(block, next);
3011 g_free(block);
3012 return;
3017 void qemu_ram_free(ram_addr_t addr)
3019 RAMBlock *block;
3021 QLIST_FOREACH(block, &ram_list.blocks, next) {
3022 if (addr == block->offset) {
3023 QLIST_REMOVE(block, next);
3024 if (block->flags & RAM_PREALLOC_MASK) {
3026 } else if (mem_path) {
3027 #if defined (__linux__) && !defined(TARGET_S390X)
3028 if (block->fd) {
3029 munmap(block->host, block->length);
3030 close(block->fd);
3031 } else {
3032 qemu_vfree(block->host);
3034 #else
3035 abort();
3036 #endif
3037 } else {
3038 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3039 munmap(block->host, block->length);
3040 #else
3041 if (xen_enabled()) {
3042 xen_invalidate_map_cache_entry(block->host);
3043 } else {
3044 qemu_vfree(block->host);
3046 #endif
3048 g_free(block);
3049 return;
3055 #ifndef _WIN32
3056 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3058 RAMBlock *block;
3059 ram_addr_t offset;
3060 int flags;
3061 void *area, *vaddr;
3063 QLIST_FOREACH(block, &ram_list.blocks, next) {
3064 offset = addr - block->offset;
3065 if (offset < block->length) {
3066 vaddr = block->host + offset;
3067 if (block->flags & RAM_PREALLOC_MASK) {
3069 } else {
3070 flags = MAP_FIXED;
3071 munmap(vaddr, length);
3072 if (mem_path) {
3073 #if defined(__linux__) && !defined(TARGET_S390X)
3074 if (block->fd) {
3075 #ifdef MAP_POPULATE
3076 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3077 MAP_PRIVATE;
3078 #else
3079 flags |= MAP_PRIVATE;
3080 #endif
3081 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3082 flags, block->fd, offset);
3083 } else {
3084 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3085 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3086 flags, -1, 0);
3088 #else
3089 abort();
3090 #endif
3091 } else {
3092 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3093 flags |= MAP_SHARED | MAP_ANONYMOUS;
3094 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3095 flags, -1, 0);
3096 #else
3097 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3098 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3099 flags, -1, 0);
3100 #endif
3102 if (area != vaddr) {
3103 fprintf(stderr, "Could not remap addr: "
3104 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3105 length, addr);
3106 exit(1);
3108 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3110 return;
3114 #endif /* !_WIN32 */
3116 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3117 With the exception of the softmmu code in this file, this should
3118 only be used for local memory (e.g. video ram) that the device owns,
3119 and knows it isn't going to access beyond the end of the block.
3121 It should not be used for general purpose DMA.
3122 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3124 void *qemu_get_ram_ptr(ram_addr_t addr)
3126 RAMBlock *block;
3128 QLIST_FOREACH(block, &ram_list.blocks, next) {
3129 if (addr - block->offset < block->length) {
3130 /* Move this entry to to start of the list. */
3131 if (block != QLIST_FIRST(&ram_list.blocks)) {
3132 QLIST_REMOVE(block, next);
3133 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3135 if (xen_enabled()) {
3136 /* We need to check if the requested address is in the RAM
3137 * because we don't want to map the entire memory in QEMU.
3138 * In that case just map until the end of the page.
3140 if (block->offset == 0) {
3141 return xen_map_cache(addr, 0, 0);
3142 } else if (block->host == NULL) {
3143 block->host =
3144 xen_map_cache(block->offset, block->length, 1);
3147 return block->host + (addr - block->offset);
3151 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3152 abort();
3154 return NULL;
3157 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3158 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3160 void *qemu_safe_ram_ptr(ram_addr_t addr)
3162 RAMBlock *block;
3164 QLIST_FOREACH(block, &ram_list.blocks, next) {
3165 if (addr - block->offset < block->length) {
3166 if (xen_enabled()) {
3167 /* We need to check if the requested address is in the RAM
3168 * because we don't want to map the entire memory in QEMU.
3169 * In that case just map until the end of the page.
3171 if (block->offset == 0) {
3172 return xen_map_cache(addr, 0, 0);
3173 } else if (block->host == NULL) {
3174 block->host =
3175 xen_map_cache(block->offset, block->length, 1);
3178 return block->host + (addr - block->offset);
3182 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3183 abort();
3185 return NULL;
3188 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3189 * but takes a size argument */
3190 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3192 if (*size == 0) {
3193 return NULL;
3195 if (xen_enabled()) {
3196 return xen_map_cache(addr, *size, 1);
3197 } else {
3198 RAMBlock *block;
3200 QLIST_FOREACH(block, &ram_list.blocks, next) {
3201 if (addr - block->offset < block->length) {
3202 if (addr - block->offset + *size > block->length)
3203 *size = block->length - addr + block->offset;
3204 return block->host + (addr - block->offset);
3208 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3209 abort();
3213 void qemu_put_ram_ptr(void *addr)
3215 trace_qemu_put_ram_ptr(addr);
3218 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3220 RAMBlock *block;
3221 uint8_t *host = ptr;
3223 if (xen_enabled()) {
3224 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3225 return 0;
3228 QLIST_FOREACH(block, &ram_list.blocks, next) {
3229 /* This case append when the block is not mapped. */
3230 if (block->host == NULL) {
3231 continue;
3233 if (host - block->host < block->length) {
3234 *ram_addr = block->offset + (host - block->host);
3235 return 0;
3239 return -1;
3242 /* Some of the softmmu routines need to translate from a host pointer
3243 (typically a TLB entry) back to a ram offset. */
3244 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3246 ram_addr_t ram_addr;
3248 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3249 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3250 abort();
3252 return ram_addr;
3255 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3257 #ifdef DEBUG_UNASSIGNED
3258 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3259 #endif
3260 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3261 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3262 #endif
3263 return 0;
3266 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3268 #ifdef DEBUG_UNASSIGNED
3269 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3270 #endif
3271 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3272 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3273 #endif
3274 return 0;
3277 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3279 #ifdef DEBUG_UNASSIGNED
3280 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3281 #endif
3282 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3283 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3284 #endif
3285 return 0;
3288 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3290 #ifdef DEBUG_UNASSIGNED
3291 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3292 #endif
3293 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3294 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3295 #endif
3298 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3300 #ifdef DEBUG_UNASSIGNED
3301 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3302 #endif
3303 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3304 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3305 #endif
3308 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3310 #ifdef DEBUG_UNASSIGNED
3311 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3312 #endif
3313 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3314 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3315 #endif
3318 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3319 unassigned_mem_readb,
3320 unassigned_mem_readw,
3321 unassigned_mem_readl,
3324 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3325 unassigned_mem_writeb,
3326 unassigned_mem_writew,
3327 unassigned_mem_writel,
3330 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3331 uint32_t val)
3333 int dirty_flags;
3334 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3335 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3336 #if !defined(CONFIG_USER_ONLY)
3337 tb_invalidate_phys_page_fast(ram_addr, 1);
3338 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3339 #endif
3341 stb_p(qemu_get_ram_ptr(ram_addr), val);
3342 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3343 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3344 /* we remove the notdirty callback only if the code has been
3345 flushed */
3346 if (dirty_flags == 0xff)
3347 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3350 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3351 uint32_t val)
3353 int dirty_flags;
3354 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3355 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3356 #if !defined(CONFIG_USER_ONLY)
3357 tb_invalidate_phys_page_fast(ram_addr, 2);
3358 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3359 #endif
3361 stw_p(qemu_get_ram_ptr(ram_addr), val);
3362 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3363 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3364 /* we remove the notdirty callback only if the code has been
3365 flushed */
3366 if (dirty_flags == 0xff)
3367 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3370 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3371 uint32_t val)
3373 int dirty_flags;
3374 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3375 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3376 #if !defined(CONFIG_USER_ONLY)
3377 tb_invalidate_phys_page_fast(ram_addr, 4);
3378 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3379 #endif
3381 stl_p(qemu_get_ram_ptr(ram_addr), val);
3382 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3383 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3384 /* we remove the notdirty callback only if the code has been
3385 flushed */
3386 if (dirty_flags == 0xff)
3387 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3390 static CPUReadMemoryFunc * const error_mem_read[3] = {
3391 NULL, /* never used */
3392 NULL, /* never used */
3393 NULL, /* never used */
3396 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3397 notdirty_mem_writeb,
3398 notdirty_mem_writew,
3399 notdirty_mem_writel,
3402 /* Generate a debug exception if a watchpoint has been hit. */
3403 static void check_watchpoint(int offset, int len_mask, int flags)
3405 CPUState *env = cpu_single_env;
3406 target_ulong pc, cs_base;
3407 TranslationBlock *tb;
3408 target_ulong vaddr;
3409 CPUWatchpoint *wp;
3410 int cpu_flags;
3412 if (env->watchpoint_hit) {
3413 /* We re-entered the check after replacing the TB. Now raise
3414 * the debug interrupt so that is will trigger after the
3415 * current instruction. */
3416 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3417 return;
3419 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3420 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3421 if ((vaddr == (wp->vaddr & len_mask) ||
3422 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3423 wp->flags |= BP_WATCHPOINT_HIT;
3424 if (!env->watchpoint_hit) {
3425 env->watchpoint_hit = wp;
3426 tb = tb_find_pc(env->mem_io_pc);
3427 if (!tb) {
3428 cpu_abort(env, "check_watchpoint: could not find TB for "
3429 "pc=%p", (void *)env->mem_io_pc);
3431 cpu_restore_state(tb, env, env->mem_io_pc);
3432 tb_phys_invalidate(tb, -1);
3433 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3434 env->exception_index = EXCP_DEBUG;
3435 } else {
3436 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3437 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3439 cpu_resume_from_signal(env, NULL);
3441 } else {
3442 wp->flags &= ~BP_WATCHPOINT_HIT;
3447 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3448 so these check for a hit then pass through to the normal out-of-line
3449 phys routines. */
3450 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3452 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3453 return ldub_phys(addr);
3456 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3458 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3459 return lduw_phys(addr);
3462 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3464 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3465 return ldl_phys(addr);
3468 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3469 uint32_t val)
3471 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3472 stb_phys(addr, val);
3475 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3476 uint32_t val)
3478 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3479 stw_phys(addr, val);
3482 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3483 uint32_t val)
3485 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3486 stl_phys(addr, val);
3489 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3490 watch_mem_readb,
3491 watch_mem_readw,
3492 watch_mem_readl,
3495 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3496 watch_mem_writeb,
3497 watch_mem_writew,
3498 watch_mem_writel,
3501 static inline uint32_t subpage_readlen (subpage_t *mmio,
3502 target_phys_addr_t addr,
3503 unsigned int len)
3505 unsigned int idx = SUBPAGE_IDX(addr);
3506 #if defined(DEBUG_SUBPAGE)
3507 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3508 mmio, len, addr, idx);
3509 #endif
3511 addr += mmio->region_offset[idx];
3512 idx = mmio->sub_io_index[idx];
3513 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3516 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3517 uint32_t value, unsigned int len)
3519 unsigned int idx = SUBPAGE_IDX(addr);
3520 #if defined(DEBUG_SUBPAGE)
3521 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3522 __func__, mmio, len, addr, idx, value);
3523 #endif
3525 addr += mmio->region_offset[idx];
3526 idx = mmio->sub_io_index[idx];
3527 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3530 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3532 return subpage_readlen(opaque, addr, 0);
3535 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3536 uint32_t value)
3538 subpage_writelen(opaque, addr, value, 0);
3541 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3543 return subpage_readlen(opaque, addr, 1);
3546 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3547 uint32_t value)
3549 subpage_writelen(opaque, addr, value, 1);
3552 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3554 return subpage_readlen(opaque, addr, 2);
3557 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3558 uint32_t value)
3560 subpage_writelen(opaque, addr, value, 2);
3563 static CPUReadMemoryFunc * const subpage_read[] = {
3564 &subpage_readb,
3565 &subpage_readw,
3566 &subpage_readl,
3569 static CPUWriteMemoryFunc * const subpage_write[] = {
3570 &subpage_writeb,
3571 &subpage_writew,
3572 &subpage_writel,
3575 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3576 ram_addr_t memory, ram_addr_t region_offset)
3578 int idx, eidx;
3580 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3581 return -1;
3582 idx = SUBPAGE_IDX(start);
3583 eidx = SUBPAGE_IDX(end);
3584 #if defined(DEBUG_SUBPAGE)
3585 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3586 mmio, start, end, idx, eidx, memory);
3587 #endif
3588 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3589 memory = IO_MEM_UNASSIGNED;
3590 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3591 for (; idx <= eidx; idx++) {
3592 mmio->sub_io_index[idx] = memory;
3593 mmio->region_offset[idx] = region_offset;
3596 return 0;
3599 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3600 ram_addr_t orig_memory,
3601 ram_addr_t region_offset)
3603 subpage_t *mmio;
3604 int subpage_memory;
3606 mmio = g_malloc0(sizeof(subpage_t));
3608 mmio->base = base;
3609 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3610 DEVICE_NATIVE_ENDIAN);
3611 #if defined(DEBUG_SUBPAGE)
3612 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3613 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3614 #endif
3615 *phys = subpage_memory | IO_MEM_SUBPAGE;
3616 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3618 return mmio;
3621 static int get_free_io_mem_idx(void)
3623 int i;
3625 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3626 if (!io_mem_used[i]) {
3627 io_mem_used[i] = 1;
3628 return i;
3630 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3631 return -1;
3635 * Usually, devices operate in little endian mode. There are devices out
3636 * there that operate in big endian too. Each device gets byte swapped
3637 * mmio if plugged onto a CPU that does the other endianness.
3639 * CPU Device swap?
3641 * little little no
3642 * little big yes
3643 * big little yes
3644 * big big no
3647 typedef struct SwapEndianContainer {
3648 CPUReadMemoryFunc *read[3];
3649 CPUWriteMemoryFunc *write[3];
3650 void *opaque;
3651 } SwapEndianContainer;
3653 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3655 uint32_t val;
3656 SwapEndianContainer *c = opaque;
3657 val = c->read[0](c->opaque, addr);
3658 return val;
3661 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3663 uint32_t val;
3664 SwapEndianContainer *c = opaque;
3665 val = bswap16(c->read[1](c->opaque, addr));
3666 return val;
3669 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3671 uint32_t val;
3672 SwapEndianContainer *c = opaque;
3673 val = bswap32(c->read[2](c->opaque, addr));
3674 return val;
3677 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3678 swapendian_mem_readb,
3679 swapendian_mem_readw,
3680 swapendian_mem_readl
3683 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3684 uint32_t val)
3686 SwapEndianContainer *c = opaque;
3687 c->write[0](c->opaque, addr, val);
3690 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3691 uint32_t val)
3693 SwapEndianContainer *c = opaque;
3694 c->write[1](c->opaque, addr, bswap16(val));
3697 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3698 uint32_t val)
3700 SwapEndianContainer *c = opaque;
3701 c->write[2](c->opaque, addr, bswap32(val));
3704 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3705 swapendian_mem_writeb,
3706 swapendian_mem_writew,
3707 swapendian_mem_writel
3710 static void swapendian_init(int io_index)
3712 SwapEndianContainer *c = g_malloc(sizeof(SwapEndianContainer));
3713 int i;
3715 /* Swap mmio for big endian targets */
3716 c->opaque = io_mem_opaque[io_index];
3717 for (i = 0; i < 3; i++) {
3718 c->read[i] = io_mem_read[io_index][i];
3719 c->write[i] = io_mem_write[io_index][i];
3721 io_mem_read[io_index][i] = swapendian_readfn[i];
3722 io_mem_write[io_index][i] = swapendian_writefn[i];
3724 io_mem_opaque[io_index] = c;
3727 static void swapendian_del(int io_index)
3729 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3730 g_free(io_mem_opaque[io_index]);
3734 /* mem_read and mem_write are arrays of functions containing the
3735 function to access byte (index 0), word (index 1) and dword (index
3736 2). Functions can be omitted with a NULL function pointer.
3737 If io_index is non zero, the corresponding io zone is
3738 modified. If it is zero, a new io zone is allocated. The return
3739 value can be used with cpu_register_physical_memory(). (-1) is
3740 returned if error. */
3741 static int cpu_register_io_memory_fixed(int io_index,
3742 CPUReadMemoryFunc * const *mem_read,
3743 CPUWriteMemoryFunc * const *mem_write,
3744 void *opaque, enum device_endian endian)
3746 int i;
3748 if (io_index <= 0) {
3749 io_index = get_free_io_mem_idx();
3750 if (io_index == -1)
3751 return io_index;
3752 } else {
3753 io_index >>= IO_MEM_SHIFT;
3754 if (io_index >= IO_MEM_NB_ENTRIES)
3755 return -1;
3758 for (i = 0; i < 3; ++i) {
3759 io_mem_read[io_index][i]
3760 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3762 for (i = 0; i < 3; ++i) {
3763 io_mem_write[io_index][i]
3764 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3766 io_mem_opaque[io_index] = opaque;
3768 switch (endian) {
3769 case DEVICE_BIG_ENDIAN:
3770 #ifndef TARGET_WORDS_BIGENDIAN
3771 swapendian_init(io_index);
3772 #endif
3773 break;
3774 case DEVICE_LITTLE_ENDIAN:
3775 #ifdef TARGET_WORDS_BIGENDIAN
3776 swapendian_init(io_index);
3777 #endif
3778 break;
3779 case DEVICE_NATIVE_ENDIAN:
3780 default:
3781 break;
3784 return (io_index << IO_MEM_SHIFT);
3787 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3788 CPUWriteMemoryFunc * const *mem_write,
3789 void *opaque, enum device_endian endian)
3791 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3794 void cpu_unregister_io_memory(int io_table_address)
3796 int i;
3797 int io_index = io_table_address >> IO_MEM_SHIFT;
3799 swapendian_del(io_index);
3801 for (i=0;i < 3; i++) {
3802 io_mem_read[io_index][i] = unassigned_mem_read[i];
3803 io_mem_write[io_index][i] = unassigned_mem_write[i];
3805 io_mem_opaque[io_index] = NULL;
3806 io_mem_used[io_index] = 0;
3809 static void io_mem_init(void)
3811 int i;
3813 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3814 unassigned_mem_write, NULL,
3815 DEVICE_NATIVE_ENDIAN);
3816 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3817 unassigned_mem_write, NULL,
3818 DEVICE_NATIVE_ENDIAN);
3819 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3820 notdirty_mem_write, NULL,
3821 DEVICE_NATIVE_ENDIAN);
3822 for (i=0; i<5; i++)
3823 io_mem_used[i] = 1;
3825 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3826 watch_mem_write, NULL,
3827 DEVICE_NATIVE_ENDIAN);
3830 static void memory_map_init(void)
3832 system_memory = g_malloc(sizeof(*system_memory));
3833 memory_region_init(system_memory, "system", INT64_MAX);
3834 set_system_memory_map(system_memory);
3836 system_io = g_malloc(sizeof(*system_io));
3837 memory_region_init(system_io, "io", 65536);
3838 set_system_io_map(system_io);
3841 MemoryRegion *get_system_memory(void)
3843 return system_memory;
3846 MemoryRegion *get_system_io(void)
3848 return system_io;
3851 #endif /* !defined(CONFIG_USER_ONLY) */
3853 /* physical memory access (slow version, mainly for debug) */
3854 #if defined(CONFIG_USER_ONLY)
3855 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3856 uint8_t *buf, int len, int is_write)
3858 int l, flags;
3859 target_ulong page;
3860 void * p;
3862 while (len > 0) {
3863 page = addr & TARGET_PAGE_MASK;
3864 l = (page + TARGET_PAGE_SIZE) - addr;
3865 if (l > len)
3866 l = len;
3867 flags = page_get_flags(page);
3868 if (!(flags & PAGE_VALID))
3869 return -1;
3870 if (is_write) {
3871 if (!(flags & PAGE_WRITE))
3872 return -1;
3873 /* XXX: this code should not depend on lock_user */
3874 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3875 return -1;
3876 memcpy(p, buf, l);
3877 unlock_user(p, addr, l);
3878 } else {
3879 if (!(flags & PAGE_READ))
3880 return -1;
3881 /* XXX: this code should not depend on lock_user */
3882 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3883 return -1;
3884 memcpy(buf, p, l);
3885 unlock_user(p, addr, 0);
3887 len -= l;
3888 buf += l;
3889 addr += l;
3891 return 0;
3894 #else
3895 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3896 int len, int is_write)
3898 int l, io_index;
3899 uint8_t *ptr;
3900 uint32_t val;
3901 target_phys_addr_t page;
3902 ram_addr_t pd;
3903 PhysPageDesc *p;
3905 while (len > 0) {
3906 page = addr & TARGET_PAGE_MASK;
3907 l = (page + TARGET_PAGE_SIZE) - addr;
3908 if (l > len)
3909 l = len;
3910 p = phys_page_find(page >> TARGET_PAGE_BITS);
3911 if (!p) {
3912 pd = IO_MEM_UNASSIGNED;
3913 } else {
3914 pd = p->phys_offset;
3917 if (is_write) {
3918 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3919 target_phys_addr_t addr1 = addr;
3920 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3921 if (p)
3922 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3923 /* XXX: could force cpu_single_env to NULL to avoid
3924 potential bugs */
3925 if (l >= 4 && ((addr1 & 3) == 0)) {
3926 /* 32 bit write access */
3927 val = ldl_p(buf);
3928 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3929 l = 4;
3930 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3931 /* 16 bit write access */
3932 val = lduw_p(buf);
3933 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3934 l = 2;
3935 } else {
3936 /* 8 bit write access */
3937 val = ldub_p(buf);
3938 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3939 l = 1;
3941 } else {
3942 ram_addr_t addr1;
3943 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3944 /* RAM case */
3945 ptr = qemu_get_ram_ptr(addr1);
3946 memcpy(ptr, buf, l);
3947 if (!cpu_physical_memory_is_dirty(addr1)) {
3948 /* invalidate code */
3949 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3950 /* set dirty bit */
3951 cpu_physical_memory_set_dirty_flags(
3952 addr1, (0xff & ~CODE_DIRTY_FLAG));
3954 /* qemu doesn't execute guest code directly, but kvm does
3955 therefore flush instruction caches */
3956 if (kvm_enabled())
3957 flush_icache_range((unsigned long)ptr,
3958 ((unsigned long)ptr)+l);
3959 qemu_put_ram_ptr(ptr);
3961 } else {
3962 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3963 !(pd & IO_MEM_ROMD)) {
3964 target_phys_addr_t addr1 = addr;
3965 /* I/O case */
3966 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3967 if (p)
3968 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3969 if (l >= 4 && ((addr1 & 3) == 0)) {
3970 /* 32 bit read access */
3971 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3972 stl_p(buf, val);
3973 l = 4;
3974 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3975 /* 16 bit read access */
3976 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3977 stw_p(buf, val);
3978 l = 2;
3979 } else {
3980 /* 8 bit read access */
3981 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3982 stb_p(buf, val);
3983 l = 1;
3985 } else {
3986 /* RAM case */
3987 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3988 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3989 qemu_put_ram_ptr(ptr);
3992 len -= l;
3993 buf += l;
3994 addr += l;
3998 /* used for ROM loading : can write in RAM and ROM */
3999 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
4000 const uint8_t *buf, int len)
4002 int l;
4003 uint8_t *ptr;
4004 target_phys_addr_t page;
4005 unsigned long pd;
4006 PhysPageDesc *p;
4008 while (len > 0) {
4009 page = addr & TARGET_PAGE_MASK;
4010 l = (page + TARGET_PAGE_SIZE) - addr;
4011 if (l > len)
4012 l = len;
4013 p = phys_page_find(page >> TARGET_PAGE_BITS);
4014 if (!p) {
4015 pd = IO_MEM_UNASSIGNED;
4016 } else {
4017 pd = p->phys_offset;
4020 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
4021 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
4022 !(pd & IO_MEM_ROMD)) {
4023 /* do nothing */
4024 } else {
4025 unsigned long addr1;
4026 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4027 /* ROM/RAM case */
4028 ptr = qemu_get_ram_ptr(addr1);
4029 memcpy(ptr, buf, l);
4030 qemu_put_ram_ptr(ptr);
4032 len -= l;
4033 buf += l;
4034 addr += l;
4038 typedef struct {
4039 void *buffer;
4040 target_phys_addr_t addr;
4041 target_phys_addr_t len;
4042 } BounceBuffer;
4044 static BounceBuffer bounce;
4046 typedef struct MapClient {
4047 void *opaque;
4048 void (*callback)(void *opaque);
4049 QLIST_ENTRY(MapClient) link;
4050 } MapClient;
4052 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4053 = QLIST_HEAD_INITIALIZER(map_client_list);
4055 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4057 MapClient *client = g_malloc(sizeof(*client));
4059 client->opaque = opaque;
4060 client->callback = callback;
4061 QLIST_INSERT_HEAD(&map_client_list, client, link);
4062 return client;
4065 void cpu_unregister_map_client(void *_client)
4067 MapClient *client = (MapClient *)_client;
4069 QLIST_REMOVE(client, link);
4070 g_free(client);
4073 static void cpu_notify_map_clients(void)
4075 MapClient *client;
4077 while (!QLIST_EMPTY(&map_client_list)) {
4078 client = QLIST_FIRST(&map_client_list);
4079 client->callback(client->opaque);
4080 cpu_unregister_map_client(client);
4084 /* Map a physical memory region into a host virtual address.
4085 * May map a subset of the requested range, given by and returned in *plen.
4086 * May return NULL if resources needed to perform the mapping are exhausted.
4087 * Use only for reads OR writes - not for read-modify-write operations.
4088 * Use cpu_register_map_client() to know when retrying the map operation is
4089 * likely to succeed.
4091 void *cpu_physical_memory_map(target_phys_addr_t addr,
4092 target_phys_addr_t *plen,
4093 int is_write)
4095 target_phys_addr_t len = *plen;
4096 target_phys_addr_t todo = 0;
4097 int l;
4098 target_phys_addr_t page;
4099 unsigned long pd;
4100 PhysPageDesc *p;
4101 ram_addr_t raddr = RAM_ADDR_MAX;
4102 ram_addr_t rlen;
4103 void *ret;
4105 while (len > 0) {
4106 page = addr & TARGET_PAGE_MASK;
4107 l = (page + TARGET_PAGE_SIZE) - addr;
4108 if (l > len)
4109 l = len;
4110 p = phys_page_find(page >> TARGET_PAGE_BITS);
4111 if (!p) {
4112 pd = IO_MEM_UNASSIGNED;
4113 } else {
4114 pd = p->phys_offset;
4117 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4118 if (todo || bounce.buffer) {
4119 break;
4121 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4122 bounce.addr = addr;
4123 bounce.len = l;
4124 if (!is_write) {
4125 cpu_physical_memory_read(addr, bounce.buffer, l);
4128 *plen = l;
4129 return bounce.buffer;
4131 if (!todo) {
4132 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4135 len -= l;
4136 addr += l;
4137 todo += l;
4139 rlen = todo;
4140 ret = qemu_ram_ptr_length(raddr, &rlen);
4141 *plen = rlen;
4142 return ret;
4145 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4146 * Will also mark the memory as dirty if is_write == 1. access_len gives
4147 * the amount of memory that was actually read or written by the caller.
4149 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4150 int is_write, target_phys_addr_t access_len)
4152 unsigned long flush_len = (unsigned long)access_len;
4154 if (buffer != bounce.buffer) {
4155 if (is_write) {
4156 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4157 while (access_len) {
4158 unsigned l;
4159 l = TARGET_PAGE_SIZE;
4160 if (l > access_len)
4161 l = access_len;
4162 if (!cpu_physical_memory_is_dirty(addr1)) {
4163 /* invalidate code */
4164 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4165 /* set dirty bit */
4166 cpu_physical_memory_set_dirty_flags(
4167 addr1, (0xff & ~CODE_DIRTY_FLAG));
4169 addr1 += l;
4170 access_len -= l;
4172 dma_flush_range((unsigned long)buffer,
4173 (unsigned long)buffer + flush_len);
4175 if (xen_enabled()) {
4176 xen_invalidate_map_cache_entry(buffer);
4178 return;
4180 if (is_write) {
4181 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4183 qemu_vfree(bounce.buffer);
4184 bounce.buffer = NULL;
4185 cpu_notify_map_clients();
4188 /* warning: addr must be aligned */
4189 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4190 enum device_endian endian)
4192 int io_index;
4193 uint8_t *ptr;
4194 uint32_t val;
4195 unsigned long pd;
4196 PhysPageDesc *p;
4198 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4199 if (!p) {
4200 pd = IO_MEM_UNASSIGNED;
4201 } else {
4202 pd = p->phys_offset;
4205 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4206 !(pd & IO_MEM_ROMD)) {
4207 /* I/O case */
4208 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4209 if (p)
4210 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4211 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4212 #if defined(TARGET_WORDS_BIGENDIAN)
4213 if (endian == DEVICE_LITTLE_ENDIAN) {
4214 val = bswap32(val);
4216 #else
4217 if (endian == DEVICE_BIG_ENDIAN) {
4218 val = bswap32(val);
4220 #endif
4221 } else {
4222 /* RAM case */
4223 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4224 (addr & ~TARGET_PAGE_MASK);
4225 switch (endian) {
4226 case DEVICE_LITTLE_ENDIAN:
4227 val = ldl_le_p(ptr);
4228 break;
4229 case DEVICE_BIG_ENDIAN:
4230 val = ldl_be_p(ptr);
4231 break;
4232 default:
4233 val = ldl_p(ptr);
4234 break;
4237 return val;
4240 uint32_t ldl_phys(target_phys_addr_t addr)
4242 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4245 uint32_t ldl_le_phys(target_phys_addr_t addr)
4247 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4250 uint32_t ldl_be_phys(target_phys_addr_t addr)
4252 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4255 /* warning: addr must be aligned */
4256 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4257 enum device_endian endian)
4259 int io_index;
4260 uint8_t *ptr;
4261 uint64_t val;
4262 unsigned long pd;
4263 PhysPageDesc *p;
4265 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4266 if (!p) {
4267 pd = IO_MEM_UNASSIGNED;
4268 } else {
4269 pd = p->phys_offset;
4272 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4273 !(pd & IO_MEM_ROMD)) {
4274 /* I/O case */
4275 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4276 if (p)
4277 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4279 /* XXX This is broken when device endian != cpu endian.
4280 Fix and add "endian" variable check */
4281 #ifdef TARGET_WORDS_BIGENDIAN
4282 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4283 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4284 #else
4285 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4286 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4287 #endif
4288 } else {
4289 /* RAM case */
4290 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4291 (addr & ~TARGET_PAGE_MASK);
4292 switch (endian) {
4293 case DEVICE_LITTLE_ENDIAN:
4294 val = ldq_le_p(ptr);
4295 break;
4296 case DEVICE_BIG_ENDIAN:
4297 val = ldq_be_p(ptr);
4298 break;
4299 default:
4300 val = ldq_p(ptr);
4301 break;
4304 return val;
4307 uint64_t ldq_phys(target_phys_addr_t addr)
4309 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4312 uint64_t ldq_le_phys(target_phys_addr_t addr)
4314 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4317 uint64_t ldq_be_phys(target_phys_addr_t addr)
4319 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4322 /* XXX: optimize */
4323 uint32_t ldub_phys(target_phys_addr_t addr)
4325 uint8_t val;
4326 cpu_physical_memory_read(addr, &val, 1);
4327 return val;
4330 /* warning: addr must be aligned */
4331 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4332 enum device_endian endian)
4334 int io_index;
4335 uint8_t *ptr;
4336 uint64_t val;
4337 unsigned long pd;
4338 PhysPageDesc *p;
4340 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4341 if (!p) {
4342 pd = IO_MEM_UNASSIGNED;
4343 } else {
4344 pd = p->phys_offset;
4347 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4348 !(pd & IO_MEM_ROMD)) {
4349 /* I/O case */
4350 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4351 if (p)
4352 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4353 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4354 #if defined(TARGET_WORDS_BIGENDIAN)
4355 if (endian == DEVICE_LITTLE_ENDIAN) {
4356 val = bswap16(val);
4358 #else
4359 if (endian == DEVICE_BIG_ENDIAN) {
4360 val = bswap16(val);
4362 #endif
4363 } else {
4364 /* RAM case */
4365 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4366 (addr & ~TARGET_PAGE_MASK);
4367 switch (endian) {
4368 case DEVICE_LITTLE_ENDIAN:
4369 val = lduw_le_p(ptr);
4370 break;
4371 case DEVICE_BIG_ENDIAN:
4372 val = lduw_be_p(ptr);
4373 break;
4374 default:
4375 val = lduw_p(ptr);
4376 break;
4379 return val;
4382 uint32_t lduw_phys(target_phys_addr_t addr)
4384 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4387 uint32_t lduw_le_phys(target_phys_addr_t addr)
4389 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4392 uint32_t lduw_be_phys(target_phys_addr_t addr)
4394 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4397 /* warning: addr must be aligned. The ram page is not masked as dirty
4398 and the code inside is not invalidated. It is useful if the dirty
4399 bits are used to track modified PTEs */
4400 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4402 int io_index;
4403 uint8_t *ptr;
4404 unsigned long pd;
4405 PhysPageDesc *p;
4407 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4408 if (!p) {
4409 pd = IO_MEM_UNASSIGNED;
4410 } else {
4411 pd = p->phys_offset;
4414 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4415 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4416 if (p)
4417 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4418 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4419 } else {
4420 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4421 ptr = qemu_get_ram_ptr(addr1);
4422 stl_p(ptr, val);
4424 if (unlikely(in_migration)) {
4425 if (!cpu_physical_memory_is_dirty(addr1)) {
4426 /* invalidate code */
4427 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4428 /* set dirty bit */
4429 cpu_physical_memory_set_dirty_flags(
4430 addr1, (0xff & ~CODE_DIRTY_FLAG));
4436 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4438 int io_index;
4439 uint8_t *ptr;
4440 unsigned long pd;
4441 PhysPageDesc *p;
4443 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4444 if (!p) {
4445 pd = IO_MEM_UNASSIGNED;
4446 } else {
4447 pd = p->phys_offset;
4450 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4451 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4452 if (p)
4453 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4454 #ifdef TARGET_WORDS_BIGENDIAN
4455 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4456 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4457 #else
4458 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4459 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4460 #endif
4461 } else {
4462 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4463 (addr & ~TARGET_PAGE_MASK);
4464 stq_p(ptr, val);
4468 /* warning: addr must be aligned */
4469 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4470 enum device_endian endian)
4472 int io_index;
4473 uint8_t *ptr;
4474 unsigned long pd;
4475 PhysPageDesc *p;
4477 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4478 if (!p) {
4479 pd = IO_MEM_UNASSIGNED;
4480 } else {
4481 pd = p->phys_offset;
4484 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4485 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4486 if (p)
4487 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4488 #if defined(TARGET_WORDS_BIGENDIAN)
4489 if (endian == DEVICE_LITTLE_ENDIAN) {
4490 val = bswap32(val);
4492 #else
4493 if (endian == DEVICE_BIG_ENDIAN) {
4494 val = bswap32(val);
4496 #endif
4497 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4498 } else {
4499 unsigned long addr1;
4500 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4501 /* RAM case */
4502 ptr = qemu_get_ram_ptr(addr1);
4503 switch (endian) {
4504 case DEVICE_LITTLE_ENDIAN:
4505 stl_le_p(ptr, val);
4506 break;
4507 case DEVICE_BIG_ENDIAN:
4508 stl_be_p(ptr, val);
4509 break;
4510 default:
4511 stl_p(ptr, val);
4512 break;
4514 if (!cpu_physical_memory_is_dirty(addr1)) {
4515 /* invalidate code */
4516 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4517 /* set dirty bit */
4518 cpu_physical_memory_set_dirty_flags(addr1,
4519 (0xff & ~CODE_DIRTY_FLAG));
4524 void stl_phys(target_phys_addr_t addr, uint32_t val)
4526 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4529 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4531 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4534 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4536 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4539 /* XXX: optimize */
4540 void stb_phys(target_phys_addr_t addr, uint32_t val)
4542 uint8_t v = val;
4543 cpu_physical_memory_write(addr, &v, 1);
4546 /* warning: addr must be aligned */
4547 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4548 enum device_endian endian)
4550 int io_index;
4551 uint8_t *ptr;
4552 unsigned long pd;
4553 PhysPageDesc *p;
4555 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4556 if (!p) {
4557 pd = IO_MEM_UNASSIGNED;
4558 } else {
4559 pd = p->phys_offset;
4562 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4563 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4564 if (p)
4565 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4566 #if defined(TARGET_WORDS_BIGENDIAN)
4567 if (endian == DEVICE_LITTLE_ENDIAN) {
4568 val = bswap16(val);
4570 #else
4571 if (endian == DEVICE_BIG_ENDIAN) {
4572 val = bswap16(val);
4574 #endif
4575 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4576 } else {
4577 unsigned long addr1;
4578 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4579 /* RAM case */
4580 ptr = qemu_get_ram_ptr(addr1);
4581 switch (endian) {
4582 case DEVICE_LITTLE_ENDIAN:
4583 stw_le_p(ptr, val);
4584 break;
4585 case DEVICE_BIG_ENDIAN:
4586 stw_be_p(ptr, val);
4587 break;
4588 default:
4589 stw_p(ptr, val);
4590 break;
4592 if (!cpu_physical_memory_is_dirty(addr1)) {
4593 /* invalidate code */
4594 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4595 /* set dirty bit */
4596 cpu_physical_memory_set_dirty_flags(addr1,
4597 (0xff & ~CODE_DIRTY_FLAG));
4602 void stw_phys(target_phys_addr_t addr, uint32_t val)
4604 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4607 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4609 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4612 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4614 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4617 /* XXX: optimize */
4618 void stq_phys(target_phys_addr_t addr, uint64_t val)
4620 val = tswap64(val);
4621 cpu_physical_memory_write(addr, &val, 8);
4624 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4626 val = cpu_to_le64(val);
4627 cpu_physical_memory_write(addr, &val, 8);
4630 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4632 val = cpu_to_be64(val);
4633 cpu_physical_memory_write(addr, &val, 8);
4636 /* virtual memory access for debug (includes writing to ROM) */
4637 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4638 uint8_t *buf, int len, int is_write)
4640 int l;
4641 target_phys_addr_t phys_addr;
4642 target_ulong page;
4644 while (len > 0) {
4645 page = addr & TARGET_PAGE_MASK;
4646 phys_addr = cpu_get_phys_page_debug(env, page);
4647 /* if no physical page mapped, return an error */
4648 if (phys_addr == -1)
4649 return -1;
4650 l = (page + TARGET_PAGE_SIZE) - addr;
4651 if (l > len)
4652 l = len;
4653 phys_addr += (addr & ~TARGET_PAGE_MASK);
4654 if (is_write)
4655 cpu_physical_memory_write_rom(phys_addr, buf, l);
4656 else
4657 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4658 len -= l;
4659 buf += l;
4660 addr += l;
4662 return 0;
4664 #endif
4666 /* in deterministic execution mode, instructions doing device I/Os
4667 must be at the end of the TB */
4668 void cpu_io_recompile(CPUState *env, void *retaddr)
4670 TranslationBlock *tb;
4671 uint32_t n, cflags;
4672 target_ulong pc, cs_base;
4673 uint64_t flags;
4675 tb = tb_find_pc((unsigned long)retaddr);
4676 if (!tb) {
4677 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4678 retaddr);
4680 n = env->icount_decr.u16.low + tb->icount;
4681 cpu_restore_state(tb, env, (unsigned long)retaddr);
4682 /* Calculate how many instructions had been executed before the fault
4683 occurred. */
4684 n = n - env->icount_decr.u16.low;
4685 /* Generate a new TB ending on the I/O insn. */
4686 n++;
4687 /* On MIPS and SH, delay slot instructions can only be restarted if
4688 they were already the first instruction in the TB. If this is not
4689 the first instruction in a TB then re-execute the preceding
4690 branch. */
4691 #if defined(TARGET_MIPS)
4692 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4693 env->active_tc.PC -= 4;
4694 env->icount_decr.u16.low++;
4695 env->hflags &= ~MIPS_HFLAG_BMASK;
4697 #elif defined(TARGET_SH4)
4698 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4699 && n > 1) {
4700 env->pc -= 2;
4701 env->icount_decr.u16.low++;
4702 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4704 #endif
4705 /* This should never happen. */
4706 if (n > CF_COUNT_MASK)
4707 cpu_abort(env, "TB too big during recompile");
4709 cflags = n | CF_LAST_IO;
4710 pc = tb->pc;
4711 cs_base = tb->cs_base;
4712 flags = tb->flags;
4713 tb_phys_invalidate(tb, -1);
4714 /* FIXME: In theory this could raise an exception. In practice
4715 we have already translated the block once so it's probably ok. */
4716 tb_gen_code(env, pc, cs_base, flags, cflags);
4717 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4718 the first in the TB) then we end up generating a whole new TB and
4719 repeating the fault, which is horribly inefficient.
4720 Better would be to execute just this insn uncached, or generate a
4721 second new TB. */
4722 cpu_resume_from_signal(env, NULL);
4725 #if !defined(CONFIG_USER_ONLY)
4727 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4729 int i, target_code_size, max_target_code_size;
4730 int direct_jmp_count, direct_jmp2_count, cross_page;
4731 TranslationBlock *tb;
4733 target_code_size = 0;
4734 max_target_code_size = 0;
4735 cross_page = 0;
4736 direct_jmp_count = 0;
4737 direct_jmp2_count = 0;
4738 for(i = 0; i < nb_tbs; i++) {
4739 tb = &tbs[i];
4740 target_code_size += tb->size;
4741 if (tb->size > max_target_code_size)
4742 max_target_code_size = tb->size;
4743 if (tb->page_addr[1] != -1)
4744 cross_page++;
4745 if (tb->tb_next_offset[0] != 0xffff) {
4746 direct_jmp_count++;
4747 if (tb->tb_next_offset[1] != 0xffff) {
4748 direct_jmp2_count++;
4752 /* XXX: avoid using doubles ? */
4753 cpu_fprintf(f, "Translation buffer state:\n");
4754 cpu_fprintf(f, "gen code size %td/%ld\n",
4755 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4756 cpu_fprintf(f, "TB count %d/%d\n",
4757 nb_tbs, code_gen_max_blocks);
4758 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4759 nb_tbs ? target_code_size / nb_tbs : 0,
4760 max_target_code_size);
4761 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4762 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4763 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4764 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4765 cross_page,
4766 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4767 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4768 direct_jmp_count,
4769 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4770 direct_jmp2_count,
4771 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4772 cpu_fprintf(f, "\nStatistics:\n");
4773 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4774 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4775 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4776 #ifdef CONFIG_PROFILER
4777 tcg_dump_info(f, cpu_fprintf);
4778 #endif
4781 #define MMUSUFFIX _cmmu
4782 #define GETPC() NULL
4783 #define env cpu_single_env
4784 #define SOFTMMU_CODE_ACCESS
4786 #define SHIFT 0
4787 #include "softmmu_template.h"
4789 #define SHIFT 1
4790 #include "softmmu_template.h"
4792 #define SHIFT 2
4793 #include "softmmu_template.h"
4795 #define SHIFT 3
4796 #include "softmmu_template.h"
4798 #undef env
4800 #endif