ehci: switch to nanoseconds
[qemu.git] / exec.c
blobb03b5bed81d0c80794536841b187c2c5308093bc
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "exec-all.h"
30 #include "tcg.h"
31 #include "hw/hw.h"
32 #include "hw/qdev.h"
33 #include "osdep.h"
34 #include "kvm.h"
35 #include "hw/xen.h"
36 #include "qemu-timer.h"
37 #if defined(CONFIG_USER_ONLY)
38 #include <qemu.h>
39 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
40 #include <sys/param.h>
41 #if __FreeBSD_version >= 700104
42 #define HAVE_KINFO_GETVMMAP
43 #define sigqueue sigqueue_freebsd /* avoid redefinition */
44 #include <sys/time.h>
45 #include <sys/proc.h>
46 #include <machine/profile.h>
47 #define _KERNEL
48 #include <sys/user.h>
49 #undef _KERNEL
50 #undef sigqueue
51 #include <libutil.h>
52 #endif
53 #endif
54 #else /* !CONFIG_USER_ONLY */
55 #include "xen-mapcache.h"
56 #include "trace.h"
57 #endif
59 //#define DEBUG_TB_INVALIDATE
60 //#define DEBUG_FLUSH
61 //#define DEBUG_TLB
62 //#define DEBUG_UNASSIGNED
64 /* make various TB consistency checks */
65 //#define DEBUG_TB_CHECK
66 //#define DEBUG_TLB_CHECK
68 //#define DEBUG_IOPORT
69 //#define DEBUG_SUBPAGE
71 #if !defined(CONFIG_USER_ONLY)
72 /* TB consistency checks only implemented for usermode emulation. */
73 #undef DEBUG_TB_CHECK
74 #endif
76 #define SMC_BITMAP_USE_THRESHOLD 10
78 static TranslationBlock *tbs;
79 static int code_gen_max_blocks;
80 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
81 static int nb_tbs;
82 /* any access to the tbs or the page table must use this lock */
83 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
85 #if defined(__arm__) || defined(__sparc_v9__)
86 /* The prologue must be reachable with a direct jump. ARM and Sparc64
87 have limited branch ranges (possibly also PPC) so place it in a
88 section close to code segment. */
89 #define code_gen_section \
90 __attribute__((__section__(".gen_code"))) \
91 __attribute__((aligned (32)))
92 #elif defined(_WIN32)
93 /* Maximum alignment for Win32 is 16. */
94 #define code_gen_section \
95 __attribute__((aligned (16)))
96 #else
97 #define code_gen_section \
98 __attribute__((aligned (32)))
99 #endif
101 uint8_t code_gen_prologue[1024] code_gen_section;
102 static uint8_t *code_gen_buffer;
103 static unsigned long code_gen_buffer_size;
104 /* threshold to flush the translated code buffer */
105 static unsigned long code_gen_buffer_max_size;
106 static uint8_t *code_gen_ptr;
108 #if !defined(CONFIG_USER_ONLY)
109 int phys_ram_fd;
110 static int in_migration;
112 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
113 #endif
115 CPUState *first_cpu;
116 /* current CPU in the current thread. It is only valid inside
117 cpu_exec() */
118 CPUState *cpu_single_env;
119 /* 0 = Do not count executed instructions.
120 1 = Precise instruction counting.
121 2 = Adaptive rate instruction counting. */
122 int use_icount = 0;
123 /* Current instruction counter. While executing translated code this may
124 include some instructions that have not yet been executed. */
125 int64_t qemu_icount;
127 typedef struct PageDesc {
128 /* list of TBs intersecting this ram page */
129 TranslationBlock *first_tb;
130 /* in order to optimize self modifying code, we count the number
131 of lookups we do to a given page to use a bitmap */
132 unsigned int code_write_count;
133 uint8_t *code_bitmap;
134 #if defined(CONFIG_USER_ONLY)
135 unsigned long flags;
136 #endif
137 } PageDesc;
139 /* In system mode we want L1_MAP to be based on ram offsets,
140 while in user mode we want it to be based on virtual addresses. */
141 #if !defined(CONFIG_USER_ONLY)
142 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
143 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
144 #else
145 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
146 #endif
147 #else
148 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
149 #endif
151 /* Size of the L2 (and L3, etc) page tables. */
152 #define L2_BITS 10
153 #define L2_SIZE (1 << L2_BITS)
155 /* The bits remaining after N lower levels of page tables. */
156 #define P_L1_BITS_REM \
157 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
158 #define V_L1_BITS_REM \
159 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
161 /* Size of the L1 page table. Avoid silly small sizes. */
162 #if P_L1_BITS_REM < 4
163 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
164 #else
165 #define P_L1_BITS P_L1_BITS_REM
166 #endif
168 #if V_L1_BITS_REM < 4
169 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
170 #else
171 #define V_L1_BITS V_L1_BITS_REM
172 #endif
174 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
175 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
177 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
178 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
180 unsigned long qemu_real_host_page_size;
181 unsigned long qemu_host_page_bits;
182 unsigned long qemu_host_page_size;
183 unsigned long qemu_host_page_mask;
185 /* This is a multi-level map on the virtual address space.
186 The bottom level has pointers to PageDesc. */
187 static void *l1_map[V_L1_SIZE];
189 #if !defined(CONFIG_USER_ONLY)
190 typedef struct PhysPageDesc {
191 /* offset in host memory of the page + io_index in the low bits */
192 ram_addr_t phys_offset;
193 ram_addr_t region_offset;
194 } PhysPageDesc;
196 /* This is a multi-level map on the physical address space.
197 The bottom level has pointers to PhysPageDesc. */
198 static void *l1_phys_map[P_L1_SIZE];
200 static void io_mem_init(void);
202 /* io memory support */
203 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
204 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
205 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
206 static char io_mem_used[IO_MEM_NB_ENTRIES];
207 static int io_mem_watch;
208 #endif
210 /* log support */
211 #ifdef WIN32
212 static const char *logfilename = "qemu.log";
213 #else
214 static const char *logfilename = "/tmp/qemu.log";
215 #endif
216 FILE *logfile;
217 int loglevel;
218 static int log_append = 0;
220 /* statistics */
221 #if !defined(CONFIG_USER_ONLY)
222 static int tlb_flush_count;
223 #endif
224 static int tb_flush_count;
225 static int tb_phys_invalidate_count;
227 #ifdef _WIN32
228 static void map_exec(void *addr, long size)
230 DWORD old_protect;
231 VirtualProtect(addr, size,
232 PAGE_EXECUTE_READWRITE, &old_protect);
235 #else
236 static void map_exec(void *addr, long size)
238 unsigned long start, end, page_size;
240 page_size = getpagesize();
241 start = (unsigned long)addr;
242 start &= ~(page_size - 1);
244 end = (unsigned long)addr + size;
245 end += page_size - 1;
246 end &= ~(page_size - 1);
248 mprotect((void *)start, end - start,
249 PROT_READ | PROT_WRITE | PROT_EXEC);
251 #endif
253 static void page_init(void)
255 /* NOTE: we can always suppose that qemu_host_page_size >=
256 TARGET_PAGE_SIZE */
257 #ifdef _WIN32
259 SYSTEM_INFO system_info;
261 GetSystemInfo(&system_info);
262 qemu_real_host_page_size = system_info.dwPageSize;
264 #else
265 qemu_real_host_page_size = getpagesize();
266 #endif
267 if (qemu_host_page_size == 0)
268 qemu_host_page_size = qemu_real_host_page_size;
269 if (qemu_host_page_size < TARGET_PAGE_SIZE)
270 qemu_host_page_size = TARGET_PAGE_SIZE;
271 qemu_host_page_bits = 0;
272 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
273 qemu_host_page_bits++;
274 qemu_host_page_mask = ~(qemu_host_page_size - 1);
276 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
278 #ifdef HAVE_KINFO_GETVMMAP
279 struct kinfo_vmentry *freep;
280 int i, cnt;
282 freep = kinfo_getvmmap(getpid(), &cnt);
283 if (freep) {
284 mmap_lock();
285 for (i = 0; i < cnt; i++) {
286 unsigned long startaddr, endaddr;
288 startaddr = freep[i].kve_start;
289 endaddr = freep[i].kve_end;
290 if (h2g_valid(startaddr)) {
291 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
293 if (h2g_valid(endaddr)) {
294 endaddr = h2g(endaddr);
295 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
296 } else {
297 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
298 endaddr = ~0ul;
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
300 #endif
304 free(freep);
305 mmap_unlock();
307 #else
308 FILE *f;
310 last_brk = (unsigned long)sbrk(0);
312 f = fopen("/compat/linux/proc/self/maps", "r");
313 if (f) {
314 mmap_lock();
316 do {
317 unsigned long startaddr, endaddr;
318 int n;
320 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
322 if (n == 2 && h2g_valid(startaddr)) {
323 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
325 if (h2g_valid(endaddr)) {
326 endaddr = h2g(endaddr);
327 } else {
328 endaddr = ~0ul;
330 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
332 } while (!feof(f));
334 fclose(f);
335 mmap_unlock();
337 #endif
339 #endif
342 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
344 PageDesc *pd;
345 void **lp;
346 int i;
348 #if defined(CONFIG_USER_ONLY)
349 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
350 # define ALLOC(P, SIZE) \
351 do { \
352 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
353 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
354 } while (0)
355 #else
356 # define ALLOC(P, SIZE) \
357 do { P = qemu_mallocz(SIZE); } while (0)
358 #endif
360 /* Level 1. Always allocated. */
361 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
363 /* Level 2..N-1. */
364 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
365 void **p = *lp;
367 if (p == NULL) {
368 if (!alloc) {
369 return NULL;
371 ALLOC(p, sizeof(void *) * L2_SIZE);
372 *lp = p;
375 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
378 pd = *lp;
379 if (pd == NULL) {
380 if (!alloc) {
381 return NULL;
383 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
384 *lp = pd;
387 #undef ALLOC
389 return pd + (index & (L2_SIZE - 1));
392 static inline PageDesc *page_find(tb_page_addr_t index)
394 return page_find_alloc(index, 0);
397 #if !defined(CONFIG_USER_ONLY)
398 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
400 PhysPageDesc *pd;
401 void **lp;
402 int i;
404 /* Level 1. Always allocated. */
405 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
407 /* Level 2..N-1. */
408 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
409 void **p = *lp;
410 if (p == NULL) {
411 if (!alloc) {
412 return NULL;
414 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
416 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
419 pd = *lp;
420 if (pd == NULL) {
421 int i;
423 if (!alloc) {
424 return NULL;
427 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
429 for (i = 0; i < L2_SIZE; i++) {
430 pd[i].phys_offset = IO_MEM_UNASSIGNED;
431 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
435 return pd + (index & (L2_SIZE - 1));
438 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
440 return phys_page_find_alloc(index, 0);
443 static void tlb_protect_code(ram_addr_t ram_addr);
444 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
445 target_ulong vaddr);
446 #define mmap_lock() do { } while(0)
447 #define mmap_unlock() do { } while(0)
448 #endif
450 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
452 #if defined(CONFIG_USER_ONLY)
453 /* Currently it is not recommended to allocate big chunks of data in
454 user mode. It will change when a dedicated libc will be used */
455 #define USE_STATIC_CODE_GEN_BUFFER
456 #endif
458 #ifdef USE_STATIC_CODE_GEN_BUFFER
459 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
460 __attribute__((aligned (CODE_GEN_ALIGN)));
461 #endif
463 static void code_gen_alloc(unsigned long tb_size)
465 #ifdef USE_STATIC_CODE_GEN_BUFFER
466 code_gen_buffer = static_code_gen_buffer;
467 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
468 map_exec(code_gen_buffer, code_gen_buffer_size);
469 #else
470 code_gen_buffer_size = tb_size;
471 if (code_gen_buffer_size == 0) {
472 #if defined(CONFIG_USER_ONLY)
473 /* in user mode, phys_ram_size is not meaningful */
474 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
475 #else
476 /* XXX: needs adjustments */
477 code_gen_buffer_size = (unsigned long)(ram_size / 4);
478 #endif
480 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
481 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
482 /* The code gen buffer location may have constraints depending on
483 the host cpu and OS */
484 #if defined(__linux__)
486 int flags;
487 void *start = NULL;
489 flags = MAP_PRIVATE | MAP_ANONYMOUS;
490 #if defined(__x86_64__)
491 flags |= MAP_32BIT;
492 /* Cannot map more than that */
493 if (code_gen_buffer_size > (800 * 1024 * 1024))
494 code_gen_buffer_size = (800 * 1024 * 1024);
495 #elif defined(__sparc_v9__)
496 // Map the buffer below 2G, so we can use direct calls and branches
497 flags |= MAP_FIXED;
498 start = (void *) 0x60000000UL;
499 if (code_gen_buffer_size > (512 * 1024 * 1024))
500 code_gen_buffer_size = (512 * 1024 * 1024);
501 #elif defined(__arm__)
502 /* Map the buffer below 32M, so we can use direct calls and branches */
503 flags |= MAP_FIXED;
504 start = (void *) 0x01000000UL;
505 if (code_gen_buffer_size > 16 * 1024 * 1024)
506 code_gen_buffer_size = 16 * 1024 * 1024;
507 #elif defined(__s390x__)
508 /* Map the buffer so that we can use direct calls and branches. */
509 /* We have a +- 4GB range on the branches; leave some slop. */
510 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
511 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
513 start = (void *)0x90000000UL;
514 #endif
515 code_gen_buffer = mmap(start, code_gen_buffer_size,
516 PROT_WRITE | PROT_READ | PROT_EXEC,
517 flags, -1, 0);
518 if (code_gen_buffer == MAP_FAILED) {
519 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
520 exit(1);
523 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
524 || defined(__DragonFly__) || defined(__OpenBSD__)
526 int flags;
527 void *addr = NULL;
528 flags = MAP_PRIVATE | MAP_ANONYMOUS;
529 #if defined(__x86_64__)
530 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
531 * 0x40000000 is free */
532 flags |= MAP_FIXED;
533 addr = (void *)0x40000000;
534 /* Cannot map more than that */
535 if (code_gen_buffer_size > (800 * 1024 * 1024))
536 code_gen_buffer_size = (800 * 1024 * 1024);
537 #elif defined(__sparc_v9__)
538 // Map the buffer below 2G, so we can use direct calls and branches
539 flags |= MAP_FIXED;
540 addr = (void *) 0x60000000UL;
541 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
542 code_gen_buffer_size = (512 * 1024 * 1024);
544 #endif
545 code_gen_buffer = mmap(addr, code_gen_buffer_size,
546 PROT_WRITE | PROT_READ | PROT_EXEC,
547 flags, -1, 0);
548 if (code_gen_buffer == MAP_FAILED) {
549 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
550 exit(1);
553 #else
554 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
555 map_exec(code_gen_buffer, code_gen_buffer_size);
556 #endif
557 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
558 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
559 code_gen_buffer_max_size = code_gen_buffer_size -
560 (TCG_MAX_OP_SIZE * OPC_MAX_SIZE);
561 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
562 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
565 /* Must be called before using the QEMU cpus. 'tb_size' is the size
566 (in bytes) allocated to the translation buffer. Zero means default
567 size. */
568 void cpu_exec_init_all(unsigned long tb_size)
570 cpu_gen_init();
571 code_gen_alloc(tb_size);
572 code_gen_ptr = code_gen_buffer;
573 page_init();
574 #if !defined(CONFIG_USER_ONLY)
575 io_mem_init();
576 #endif
577 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
578 /* There's no guest base to take into account, so go ahead and
579 initialize the prologue now. */
580 tcg_prologue_init(&tcg_ctx);
581 #endif
584 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
586 static int cpu_common_post_load(void *opaque, int version_id)
588 CPUState *env = opaque;
590 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
591 version_id is increased. */
592 env->interrupt_request &= ~0x01;
593 tlb_flush(env, 1);
595 return 0;
598 static const VMStateDescription vmstate_cpu_common = {
599 .name = "cpu_common",
600 .version_id = 1,
601 .minimum_version_id = 1,
602 .minimum_version_id_old = 1,
603 .post_load = cpu_common_post_load,
604 .fields = (VMStateField []) {
605 VMSTATE_UINT32(halted, CPUState),
606 VMSTATE_UINT32(interrupt_request, CPUState),
607 VMSTATE_END_OF_LIST()
610 #endif
612 CPUState *qemu_get_cpu(int cpu)
614 CPUState *env = first_cpu;
616 while (env) {
617 if (env->cpu_index == cpu)
618 break;
619 env = env->next_cpu;
622 return env;
625 void cpu_exec_init(CPUState *env)
627 CPUState **penv;
628 int cpu_index;
630 #if defined(CONFIG_USER_ONLY)
631 cpu_list_lock();
632 #endif
633 env->next_cpu = NULL;
634 penv = &first_cpu;
635 cpu_index = 0;
636 while (*penv != NULL) {
637 penv = &(*penv)->next_cpu;
638 cpu_index++;
640 env->cpu_index = cpu_index;
641 env->numa_node = 0;
642 QTAILQ_INIT(&env->breakpoints);
643 QTAILQ_INIT(&env->watchpoints);
644 #ifndef CONFIG_USER_ONLY
645 env->thread_id = qemu_get_thread_id();
646 #endif
647 *penv = env;
648 #if defined(CONFIG_USER_ONLY)
649 cpu_list_unlock();
650 #endif
651 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
652 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
653 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
654 cpu_save, cpu_load, env);
655 #endif
658 /* Allocate a new translation block. Flush the translation buffer if
659 too many translation blocks or too much generated code. */
660 static TranslationBlock *tb_alloc(target_ulong pc)
662 TranslationBlock *tb;
664 if (nb_tbs >= code_gen_max_blocks ||
665 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
666 return NULL;
667 tb = &tbs[nb_tbs++];
668 tb->pc = pc;
669 tb->cflags = 0;
670 return tb;
673 void tb_free(TranslationBlock *tb)
675 /* In practice this is mostly used for single use temporary TB
676 Ignore the hard cases and just back up if this TB happens to
677 be the last one generated. */
678 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
679 code_gen_ptr = tb->tc_ptr;
680 nb_tbs--;
684 static inline void invalidate_page_bitmap(PageDesc *p)
686 if (p->code_bitmap) {
687 qemu_free(p->code_bitmap);
688 p->code_bitmap = NULL;
690 p->code_write_count = 0;
693 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
695 static void page_flush_tb_1 (int level, void **lp)
697 int i;
699 if (*lp == NULL) {
700 return;
702 if (level == 0) {
703 PageDesc *pd = *lp;
704 for (i = 0; i < L2_SIZE; ++i) {
705 pd[i].first_tb = NULL;
706 invalidate_page_bitmap(pd + i);
708 } else {
709 void **pp = *lp;
710 for (i = 0; i < L2_SIZE; ++i) {
711 page_flush_tb_1 (level - 1, pp + i);
716 static void page_flush_tb(void)
718 int i;
719 for (i = 0; i < V_L1_SIZE; i++) {
720 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
724 /* flush all the translation blocks */
725 /* XXX: tb_flush is currently not thread safe */
726 void tb_flush(CPUState *env1)
728 CPUState *env;
729 #if defined(DEBUG_FLUSH)
730 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
731 (unsigned long)(code_gen_ptr - code_gen_buffer),
732 nb_tbs, nb_tbs > 0 ?
733 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
734 #endif
735 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
736 cpu_abort(env1, "Internal error: code buffer overflow\n");
738 nb_tbs = 0;
740 for(env = first_cpu; env != NULL; env = env->next_cpu) {
741 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
744 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
745 page_flush_tb();
747 code_gen_ptr = code_gen_buffer;
748 /* XXX: flush processor icache at this point if cache flush is
749 expensive */
750 tb_flush_count++;
753 #ifdef DEBUG_TB_CHECK
755 static void tb_invalidate_check(target_ulong address)
757 TranslationBlock *tb;
758 int i;
759 address &= TARGET_PAGE_MASK;
760 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
761 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
762 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
763 address >= tb->pc + tb->size)) {
764 printf("ERROR invalidate: address=" TARGET_FMT_lx
765 " PC=%08lx size=%04x\n",
766 address, (long)tb->pc, tb->size);
772 /* verify that all the pages have correct rights for code */
773 static void tb_page_check(void)
775 TranslationBlock *tb;
776 int i, flags1, flags2;
778 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
779 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
780 flags1 = page_get_flags(tb->pc);
781 flags2 = page_get_flags(tb->pc + tb->size - 1);
782 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
783 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
784 (long)tb->pc, tb->size, flags1, flags2);
790 #endif
792 /* invalidate one TB */
793 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
794 int next_offset)
796 TranslationBlock *tb1;
797 for(;;) {
798 tb1 = *ptb;
799 if (tb1 == tb) {
800 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
801 break;
803 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
807 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
809 TranslationBlock *tb1;
810 unsigned int n1;
812 for(;;) {
813 tb1 = *ptb;
814 n1 = (long)tb1 & 3;
815 tb1 = (TranslationBlock *)((long)tb1 & ~3);
816 if (tb1 == tb) {
817 *ptb = tb1->page_next[n1];
818 break;
820 ptb = &tb1->page_next[n1];
824 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
826 TranslationBlock *tb1, **ptb;
827 unsigned int n1;
829 ptb = &tb->jmp_next[n];
830 tb1 = *ptb;
831 if (tb1) {
832 /* find tb(n) in circular list */
833 for(;;) {
834 tb1 = *ptb;
835 n1 = (long)tb1 & 3;
836 tb1 = (TranslationBlock *)((long)tb1 & ~3);
837 if (n1 == n && tb1 == tb)
838 break;
839 if (n1 == 2) {
840 ptb = &tb1->jmp_first;
841 } else {
842 ptb = &tb1->jmp_next[n1];
845 /* now we can suppress tb(n) from the list */
846 *ptb = tb->jmp_next[n];
848 tb->jmp_next[n] = NULL;
852 /* reset the jump entry 'n' of a TB so that it is not chained to
853 another TB */
854 static inline void tb_reset_jump(TranslationBlock *tb, int n)
856 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
859 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
861 CPUState *env;
862 PageDesc *p;
863 unsigned int h, n1;
864 tb_page_addr_t phys_pc;
865 TranslationBlock *tb1, *tb2;
867 /* remove the TB from the hash list */
868 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
869 h = tb_phys_hash_func(phys_pc);
870 tb_remove(&tb_phys_hash[h], tb,
871 offsetof(TranslationBlock, phys_hash_next));
873 /* remove the TB from the page list */
874 if (tb->page_addr[0] != page_addr) {
875 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
876 tb_page_remove(&p->first_tb, tb);
877 invalidate_page_bitmap(p);
879 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
880 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
881 tb_page_remove(&p->first_tb, tb);
882 invalidate_page_bitmap(p);
885 tb_invalidated_flag = 1;
887 /* remove the TB from the hash list */
888 h = tb_jmp_cache_hash_func(tb->pc);
889 for(env = first_cpu; env != NULL; env = env->next_cpu) {
890 if (env->tb_jmp_cache[h] == tb)
891 env->tb_jmp_cache[h] = NULL;
894 /* suppress this TB from the two jump lists */
895 tb_jmp_remove(tb, 0);
896 tb_jmp_remove(tb, 1);
898 /* suppress any remaining jumps to this TB */
899 tb1 = tb->jmp_first;
900 for(;;) {
901 n1 = (long)tb1 & 3;
902 if (n1 == 2)
903 break;
904 tb1 = (TranslationBlock *)((long)tb1 & ~3);
905 tb2 = tb1->jmp_next[n1];
906 tb_reset_jump(tb1, n1);
907 tb1->jmp_next[n1] = NULL;
908 tb1 = tb2;
910 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
912 tb_phys_invalidate_count++;
915 static inline void set_bits(uint8_t *tab, int start, int len)
917 int end, mask, end1;
919 end = start + len;
920 tab += start >> 3;
921 mask = 0xff << (start & 7);
922 if ((start & ~7) == (end & ~7)) {
923 if (start < end) {
924 mask &= ~(0xff << (end & 7));
925 *tab |= mask;
927 } else {
928 *tab++ |= mask;
929 start = (start + 8) & ~7;
930 end1 = end & ~7;
931 while (start < end1) {
932 *tab++ = 0xff;
933 start += 8;
935 if (start < end) {
936 mask = ~(0xff << (end & 7));
937 *tab |= mask;
942 static void build_page_bitmap(PageDesc *p)
944 int n, tb_start, tb_end;
945 TranslationBlock *tb;
947 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
949 tb = p->first_tb;
950 while (tb != NULL) {
951 n = (long)tb & 3;
952 tb = (TranslationBlock *)((long)tb & ~3);
953 /* NOTE: this is subtle as a TB may span two physical pages */
954 if (n == 0) {
955 /* NOTE: tb_end may be after the end of the page, but
956 it is not a problem */
957 tb_start = tb->pc & ~TARGET_PAGE_MASK;
958 tb_end = tb_start + tb->size;
959 if (tb_end > TARGET_PAGE_SIZE)
960 tb_end = TARGET_PAGE_SIZE;
961 } else {
962 tb_start = 0;
963 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
965 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
966 tb = tb->page_next[n];
970 TranslationBlock *tb_gen_code(CPUState *env,
971 target_ulong pc, target_ulong cs_base,
972 int flags, int cflags)
974 TranslationBlock *tb;
975 uint8_t *tc_ptr;
976 tb_page_addr_t phys_pc, phys_page2;
977 target_ulong virt_page2;
978 int code_gen_size;
980 phys_pc = get_page_addr_code(env, pc);
981 tb = tb_alloc(pc);
982 if (!tb) {
983 /* flush must be done */
984 tb_flush(env);
985 /* cannot fail at this point */
986 tb = tb_alloc(pc);
987 /* Don't forget to invalidate previous TB info. */
988 tb_invalidated_flag = 1;
990 tc_ptr = code_gen_ptr;
991 tb->tc_ptr = tc_ptr;
992 tb->cs_base = cs_base;
993 tb->flags = flags;
994 tb->cflags = cflags;
995 cpu_gen_code(env, tb, &code_gen_size);
996 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
998 /* check next page if needed */
999 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1000 phys_page2 = -1;
1001 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1002 phys_page2 = get_page_addr_code(env, virt_page2);
1004 tb_link_page(tb, phys_pc, phys_page2);
1005 return tb;
1008 /* invalidate all TBs which intersect with the target physical page
1009 starting in range [start;end[. NOTE: start and end must refer to
1010 the same physical page. 'is_cpu_write_access' should be true if called
1011 from a real cpu write access: the virtual CPU will exit the current
1012 TB if code is modified inside this TB. */
1013 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1014 int is_cpu_write_access)
1016 TranslationBlock *tb, *tb_next, *saved_tb;
1017 CPUState *env = cpu_single_env;
1018 tb_page_addr_t tb_start, tb_end;
1019 PageDesc *p;
1020 int n;
1021 #ifdef TARGET_HAS_PRECISE_SMC
1022 int current_tb_not_found = is_cpu_write_access;
1023 TranslationBlock *current_tb = NULL;
1024 int current_tb_modified = 0;
1025 target_ulong current_pc = 0;
1026 target_ulong current_cs_base = 0;
1027 int current_flags = 0;
1028 #endif /* TARGET_HAS_PRECISE_SMC */
1030 p = page_find(start >> TARGET_PAGE_BITS);
1031 if (!p)
1032 return;
1033 if (!p->code_bitmap &&
1034 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1035 is_cpu_write_access) {
1036 /* build code bitmap */
1037 build_page_bitmap(p);
1040 /* we remove all the TBs in the range [start, end[ */
1041 /* XXX: see if in some cases it could be faster to invalidate all the code */
1042 tb = p->first_tb;
1043 while (tb != NULL) {
1044 n = (long)tb & 3;
1045 tb = (TranslationBlock *)((long)tb & ~3);
1046 tb_next = tb->page_next[n];
1047 /* NOTE: this is subtle as a TB may span two physical pages */
1048 if (n == 0) {
1049 /* NOTE: tb_end may be after the end of the page, but
1050 it is not a problem */
1051 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1052 tb_end = tb_start + tb->size;
1053 } else {
1054 tb_start = tb->page_addr[1];
1055 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1057 if (!(tb_end <= start || tb_start >= end)) {
1058 #ifdef TARGET_HAS_PRECISE_SMC
1059 if (current_tb_not_found) {
1060 current_tb_not_found = 0;
1061 current_tb = NULL;
1062 if (env->mem_io_pc) {
1063 /* now we have a real cpu fault */
1064 current_tb = tb_find_pc(env->mem_io_pc);
1067 if (current_tb == tb &&
1068 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1069 /* If we are modifying the current TB, we must stop
1070 its execution. We could be more precise by checking
1071 that the modification is after the current PC, but it
1072 would require a specialized function to partially
1073 restore the CPU state */
1075 current_tb_modified = 1;
1076 cpu_restore_state(current_tb, env, env->mem_io_pc);
1077 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1078 &current_flags);
1080 #endif /* TARGET_HAS_PRECISE_SMC */
1081 /* we need to do that to handle the case where a signal
1082 occurs while doing tb_phys_invalidate() */
1083 saved_tb = NULL;
1084 if (env) {
1085 saved_tb = env->current_tb;
1086 env->current_tb = NULL;
1088 tb_phys_invalidate(tb, -1);
1089 if (env) {
1090 env->current_tb = saved_tb;
1091 if (env->interrupt_request && env->current_tb)
1092 cpu_interrupt(env, env->interrupt_request);
1095 tb = tb_next;
1097 #if !defined(CONFIG_USER_ONLY)
1098 /* if no code remaining, no need to continue to use slow writes */
1099 if (!p->first_tb) {
1100 invalidate_page_bitmap(p);
1101 if (is_cpu_write_access) {
1102 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1105 #endif
1106 #ifdef TARGET_HAS_PRECISE_SMC
1107 if (current_tb_modified) {
1108 /* we generate a block containing just the instruction
1109 modifying the memory. It will ensure that it cannot modify
1110 itself */
1111 env->current_tb = NULL;
1112 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1113 cpu_resume_from_signal(env, NULL);
1115 #endif
1118 /* len must be <= 8 and start must be a multiple of len */
1119 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1121 PageDesc *p;
1122 int offset, b;
1123 #if 0
1124 if (1) {
1125 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1126 cpu_single_env->mem_io_vaddr, len,
1127 cpu_single_env->eip,
1128 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1130 #endif
1131 p = page_find(start >> TARGET_PAGE_BITS);
1132 if (!p)
1133 return;
1134 if (p->code_bitmap) {
1135 offset = start & ~TARGET_PAGE_MASK;
1136 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1137 if (b & ((1 << len) - 1))
1138 goto do_invalidate;
1139 } else {
1140 do_invalidate:
1141 tb_invalidate_phys_page_range(start, start + len, 1);
1145 #if !defined(CONFIG_SOFTMMU)
1146 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1147 unsigned long pc, void *puc)
1149 TranslationBlock *tb;
1150 PageDesc *p;
1151 int n;
1152 #ifdef TARGET_HAS_PRECISE_SMC
1153 TranslationBlock *current_tb = NULL;
1154 CPUState *env = cpu_single_env;
1155 int current_tb_modified = 0;
1156 target_ulong current_pc = 0;
1157 target_ulong current_cs_base = 0;
1158 int current_flags = 0;
1159 #endif
1161 addr &= TARGET_PAGE_MASK;
1162 p = page_find(addr >> TARGET_PAGE_BITS);
1163 if (!p)
1164 return;
1165 tb = p->first_tb;
1166 #ifdef TARGET_HAS_PRECISE_SMC
1167 if (tb && pc != 0) {
1168 current_tb = tb_find_pc(pc);
1170 #endif
1171 while (tb != NULL) {
1172 n = (long)tb & 3;
1173 tb = (TranslationBlock *)((long)tb & ~3);
1174 #ifdef TARGET_HAS_PRECISE_SMC
1175 if (current_tb == tb &&
1176 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1177 /* If we are modifying the current TB, we must stop
1178 its execution. We could be more precise by checking
1179 that the modification is after the current PC, but it
1180 would require a specialized function to partially
1181 restore the CPU state */
1183 current_tb_modified = 1;
1184 cpu_restore_state(current_tb, env, pc);
1185 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1186 &current_flags);
1188 #endif /* TARGET_HAS_PRECISE_SMC */
1189 tb_phys_invalidate(tb, addr);
1190 tb = tb->page_next[n];
1192 p->first_tb = NULL;
1193 #ifdef TARGET_HAS_PRECISE_SMC
1194 if (current_tb_modified) {
1195 /* we generate a block containing just the instruction
1196 modifying the memory. It will ensure that it cannot modify
1197 itself */
1198 env->current_tb = NULL;
1199 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1200 cpu_resume_from_signal(env, puc);
1202 #endif
1204 #endif
1206 /* add the tb in the target page and protect it if necessary */
1207 static inline void tb_alloc_page(TranslationBlock *tb,
1208 unsigned int n, tb_page_addr_t page_addr)
1210 PageDesc *p;
1211 TranslationBlock *last_first_tb;
1213 tb->page_addr[n] = page_addr;
1214 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1215 tb->page_next[n] = p->first_tb;
1216 last_first_tb = p->first_tb;
1217 p->first_tb = (TranslationBlock *)((long)tb | n);
1218 invalidate_page_bitmap(p);
1220 #if defined(TARGET_HAS_SMC) || 1
1222 #if defined(CONFIG_USER_ONLY)
1223 if (p->flags & PAGE_WRITE) {
1224 target_ulong addr;
1225 PageDesc *p2;
1226 int prot;
1228 /* force the host page as non writable (writes will have a
1229 page fault + mprotect overhead) */
1230 page_addr &= qemu_host_page_mask;
1231 prot = 0;
1232 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1233 addr += TARGET_PAGE_SIZE) {
1235 p2 = page_find (addr >> TARGET_PAGE_BITS);
1236 if (!p2)
1237 continue;
1238 prot |= p2->flags;
1239 p2->flags &= ~PAGE_WRITE;
1241 mprotect(g2h(page_addr), qemu_host_page_size,
1242 (prot & PAGE_BITS) & ~PAGE_WRITE);
1243 #ifdef DEBUG_TB_INVALIDATE
1244 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1245 page_addr);
1246 #endif
1248 #else
1249 /* if some code is already present, then the pages are already
1250 protected. So we handle the case where only the first TB is
1251 allocated in a physical page */
1252 if (!last_first_tb) {
1253 tlb_protect_code(page_addr);
1255 #endif
1257 #endif /* TARGET_HAS_SMC */
1260 /* add a new TB and link it to the physical page tables. phys_page2 is
1261 (-1) to indicate that only one page contains the TB. */
1262 void tb_link_page(TranslationBlock *tb,
1263 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1265 unsigned int h;
1266 TranslationBlock **ptb;
1268 /* Grab the mmap lock to stop another thread invalidating this TB
1269 before we are done. */
1270 mmap_lock();
1271 /* add in the physical hash table */
1272 h = tb_phys_hash_func(phys_pc);
1273 ptb = &tb_phys_hash[h];
1274 tb->phys_hash_next = *ptb;
1275 *ptb = tb;
1277 /* add in the page list */
1278 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1279 if (phys_page2 != -1)
1280 tb_alloc_page(tb, 1, phys_page2);
1281 else
1282 tb->page_addr[1] = -1;
1284 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1285 tb->jmp_next[0] = NULL;
1286 tb->jmp_next[1] = NULL;
1288 /* init original jump addresses */
1289 if (tb->tb_next_offset[0] != 0xffff)
1290 tb_reset_jump(tb, 0);
1291 if (tb->tb_next_offset[1] != 0xffff)
1292 tb_reset_jump(tb, 1);
1294 #ifdef DEBUG_TB_CHECK
1295 tb_page_check();
1296 #endif
1297 mmap_unlock();
1300 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1301 tb[1].tc_ptr. Return NULL if not found */
1302 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1304 int m_min, m_max, m;
1305 unsigned long v;
1306 TranslationBlock *tb;
1308 if (nb_tbs <= 0)
1309 return NULL;
1310 if (tc_ptr < (unsigned long)code_gen_buffer ||
1311 tc_ptr >= (unsigned long)code_gen_ptr)
1312 return NULL;
1313 /* binary search (cf Knuth) */
1314 m_min = 0;
1315 m_max = nb_tbs - 1;
1316 while (m_min <= m_max) {
1317 m = (m_min + m_max) >> 1;
1318 tb = &tbs[m];
1319 v = (unsigned long)tb->tc_ptr;
1320 if (v == tc_ptr)
1321 return tb;
1322 else if (tc_ptr < v) {
1323 m_max = m - 1;
1324 } else {
1325 m_min = m + 1;
1328 return &tbs[m_max];
1331 static void tb_reset_jump_recursive(TranslationBlock *tb);
1333 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1335 TranslationBlock *tb1, *tb_next, **ptb;
1336 unsigned int n1;
1338 tb1 = tb->jmp_next[n];
1339 if (tb1 != NULL) {
1340 /* find head of list */
1341 for(;;) {
1342 n1 = (long)tb1 & 3;
1343 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1344 if (n1 == 2)
1345 break;
1346 tb1 = tb1->jmp_next[n1];
1348 /* we are now sure now that tb jumps to tb1 */
1349 tb_next = tb1;
1351 /* remove tb from the jmp_first list */
1352 ptb = &tb_next->jmp_first;
1353 for(;;) {
1354 tb1 = *ptb;
1355 n1 = (long)tb1 & 3;
1356 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1357 if (n1 == n && tb1 == tb)
1358 break;
1359 ptb = &tb1->jmp_next[n1];
1361 *ptb = tb->jmp_next[n];
1362 tb->jmp_next[n] = NULL;
1364 /* suppress the jump to next tb in generated code */
1365 tb_reset_jump(tb, n);
1367 /* suppress jumps in the tb on which we could have jumped */
1368 tb_reset_jump_recursive(tb_next);
1372 static void tb_reset_jump_recursive(TranslationBlock *tb)
1374 tb_reset_jump_recursive2(tb, 0);
1375 tb_reset_jump_recursive2(tb, 1);
1378 #if defined(TARGET_HAS_ICE)
1379 #if defined(CONFIG_USER_ONLY)
1380 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1382 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1384 #else
1385 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1387 target_phys_addr_t addr;
1388 target_ulong pd;
1389 ram_addr_t ram_addr;
1390 PhysPageDesc *p;
1392 addr = cpu_get_phys_page_debug(env, pc);
1393 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1394 if (!p) {
1395 pd = IO_MEM_UNASSIGNED;
1396 } else {
1397 pd = p->phys_offset;
1399 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1400 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1402 #endif
1403 #endif /* TARGET_HAS_ICE */
1405 #if defined(CONFIG_USER_ONLY)
1406 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1411 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1412 int flags, CPUWatchpoint **watchpoint)
1414 return -ENOSYS;
1416 #else
1417 /* Add a watchpoint. */
1418 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1419 int flags, CPUWatchpoint **watchpoint)
1421 target_ulong len_mask = ~(len - 1);
1422 CPUWatchpoint *wp;
1424 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1425 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1426 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1427 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1428 return -EINVAL;
1430 wp = qemu_malloc(sizeof(*wp));
1432 wp->vaddr = addr;
1433 wp->len_mask = len_mask;
1434 wp->flags = flags;
1436 /* keep all GDB-injected watchpoints in front */
1437 if (flags & BP_GDB)
1438 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1439 else
1440 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1442 tlb_flush_page(env, addr);
1444 if (watchpoint)
1445 *watchpoint = wp;
1446 return 0;
1449 /* Remove a specific watchpoint. */
1450 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1451 int flags)
1453 target_ulong len_mask = ~(len - 1);
1454 CPUWatchpoint *wp;
1456 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1457 if (addr == wp->vaddr && len_mask == wp->len_mask
1458 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1459 cpu_watchpoint_remove_by_ref(env, wp);
1460 return 0;
1463 return -ENOENT;
1466 /* Remove a specific watchpoint by reference. */
1467 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1469 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1471 tlb_flush_page(env, watchpoint->vaddr);
1473 qemu_free(watchpoint);
1476 /* Remove all matching watchpoints. */
1477 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1479 CPUWatchpoint *wp, *next;
1481 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1482 if (wp->flags & mask)
1483 cpu_watchpoint_remove_by_ref(env, wp);
1486 #endif
1488 /* Add a breakpoint. */
1489 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1490 CPUBreakpoint **breakpoint)
1492 #if defined(TARGET_HAS_ICE)
1493 CPUBreakpoint *bp;
1495 bp = qemu_malloc(sizeof(*bp));
1497 bp->pc = pc;
1498 bp->flags = flags;
1500 /* keep all GDB-injected breakpoints in front */
1501 if (flags & BP_GDB)
1502 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1503 else
1504 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1506 breakpoint_invalidate(env, pc);
1508 if (breakpoint)
1509 *breakpoint = bp;
1510 return 0;
1511 #else
1512 return -ENOSYS;
1513 #endif
1516 /* Remove a specific breakpoint. */
1517 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1519 #if defined(TARGET_HAS_ICE)
1520 CPUBreakpoint *bp;
1522 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1523 if (bp->pc == pc && bp->flags == flags) {
1524 cpu_breakpoint_remove_by_ref(env, bp);
1525 return 0;
1528 return -ENOENT;
1529 #else
1530 return -ENOSYS;
1531 #endif
1534 /* Remove a specific breakpoint by reference. */
1535 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1537 #if defined(TARGET_HAS_ICE)
1538 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1540 breakpoint_invalidate(env, breakpoint->pc);
1542 qemu_free(breakpoint);
1543 #endif
1546 /* Remove all matching breakpoints. */
1547 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1549 #if defined(TARGET_HAS_ICE)
1550 CPUBreakpoint *bp, *next;
1552 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1553 if (bp->flags & mask)
1554 cpu_breakpoint_remove_by_ref(env, bp);
1556 #endif
1559 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1560 CPU loop after each instruction */
1561 void cpu_single_step(CPUState *env, int enabled)
1563 #if defined(TARGET_HAS_ICE)
1564 if (env->singlestep_enabled != enabled) {
1565 env->singlestep_enabled = enabled;
1566 if (kvm_enabled())
1567 kvm_update_guest_debug(env, 0);
1568 else {
1569 /* must flush all the translated code to avoid inconsistencies */
1570 /* XXX: only flush what is necessary */
1571 tb_flush(env);
1574 #endif
1577 /* enable or disable low levels log */
1578 void cpu_set_log(int log_flags)
1580 loglevel = log_flags;
1581 if (loglevel && !logfile) {
1582 logfile = fopen(logfilename, log_append ? "a" : "w");
1583 if (!logfile) {
1584 perror(logfilename);
1585 _exit(1);
1587 #if !defined(CONFIG_SOFTMMU)
1588 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1590 static char logfile_buf[4096];
1591 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1593 #elif !defined(_WIN32)
1594 /* Win32 doesn't support line-buffering and requires size >= 2 */
1595 setvbuf(logfile, NULL, _IOLBF, 0);
1596 #endif
1597 log_append = 1;
1599 if (!loglevel && logfile) {
1600 fclose(logfile);
1601 logfile = NULL;
1605 void cpu_set_log_filename(const char *filename)
1607 logfilename = strdup(filename);
1608 if (logfile) {
1609 fclose(logfile);
1610 logfile = NULL;
1612 cpu_set_log(loglevel);
1615 static void cpu_unlink_tb(CPUState *env)
1617 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1618 problem and hope the cpu will stop of its own accord. For userspace
1619 emulation this often isn't actually as bad as it sounds. Often
1620 signals are used primarily to interrupt blocking syscalls. */
1621 TranslationBlock *tb;
1622 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1624 spin_lock(&interrupt_lock);
1625 tb = env->current_tb;
1626 /* if the cpu is currently executing code, we must unlink it and
1627 all the potentially executing TB */
1628 if (tb) {
1629 env->current_tb = NULL;
1630 tb_reset_jump_recursive(tb);
1632 spin_unlock(&interrupt_lock);
1635 #ifndef CONFIG_USER_ONLY
1636 /* mask must never be zero, except for A20 change call */
1637 static void tcg_handle_interrupt(CPUState *env, int mask)
1639 int old_mask;
1641 old_mask = env->interrupt_request;
1642 env->interrupt_request |= mask;
1645 * If called from iothread context, wake the target cpu in
1646 * case its halted.
1648 if (!qemu_cpu_is_self(env)) {
1649 qemu_cpu_kick(env);
1650 return;
1653 if (use_icount) {
1654 env->icount_decr.u16.high = 0xffff;
1655 if (!can_do_io(env)
1656 && (mask & ~old_mask) != 0) {
1657 cpu_abort(env, "Raised interrupt while not in I/O function");
1659 } else {
1660 cpu_unlink_tb(env);
1664 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1666 #else /* CONFIG_USER_ONLY */
1668 void cpu_interrupt(CPUState *env, int mask)
1670 env->interrupt_request |= mask;
1671 cpu_unlink_tb(env);
1673 #endif /* CONFIG_USER_ONLY */
1675 void cpu_reset_interrupt(CPUState *env, int mask)
1677 env->interrupt_request &= ~mask;
1680 void cpu_exit(CPUState *env)
1682 env->exit_request = 1;
1683 cpu_unlink_tb(env);
1686 const CPULogItem cpu_log_items[] = {
1687 { CPU_LOG_TB_OUT_ASM, "out_asm",
1688 "show generated host assembly code for each compiled TB" },
1689 { CPU_LOG_TB_IN_ASM, "in_asm",
1690 "show target assembly code for each compiled TB" },
1691 { CPU_LOG_TB_OP, "op",
1692 "show micro ops for each compiled TB" },
1693 { CPU_LOG_TB_OP_OPT, "op_opt",
1694 "show micro ops "
1695 #ifdef TARGET_I386
1696 "before eflags optimization and "
1697 #endif
1698 "after liveness analysis" },
1699 { CPU_LOG_INT, "int",
1700 "show interrupts/exceptions in short format" },
1701 { CPU_LOG_EXEC, "exec",
1702 "show trace before each executed TB (lots of logs)" },
1703 { CPU_LOG_TB_CPU, "cpu",
1704 "show CPU state before block translation" },
1705 #ifdef TARGET_I386
1706 { CPU_LOG_PCALL, "pcall",
1707 "show protected mode far calls/returns/exceptions" },
1708 { CPU_LOG_RESET, "cpu_reset",
1709 "show CPU state before CPU resets" },
1710 #endif
1711 #ifdef DEBUG_IOPORT
1712 { CPU_LOG_IOPORT, "ioport",
1713 "show all i/o ports accesses" },
1714 #endif
1715 { 0, NULL, NULL },
1718 #ifndef CONFIG_USER_ONLY
1719 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1720 = QLIST_HEAD_INITIALIZER(memory_client_list);
1722 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1723 ram_addr_t size,
1724 ram_addr_t phys_offset,
1725 bool log_dirty)
1727 CPUPhysMemoryClient *client;
1728 QLIST_FOREACH(client, &memory_client_list, list) {
1729 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1733 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1734 target_phys_addr_t end)
1736 CPUPhysMemoryClient *client;
1737 QLIST_FOREACH(client, &memory_client_list, list) {
1738 int r = client->sync_dirty_bitmap(client, start, end);
1739 if (r < 0)
1740 return r;
1742 return 0;
1745 static int cpu_notify_migration_log(int enable)
1747 CPUPhysMemoryClient *client;
1748 QLIST_FOREACH(client, &memory_client_list, list) {
1749 int r = client->migration_log(client, enable);
1750 if (r < 0)
1751 return r;
1753 return 0;
1756 struct last_map {
1757 target_phys_addr_t start_addr;
1758 ram_addr_t size;
1759 ram_addr_t phys_offset;
1762 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1763 * address. Each intermediate table provides the next L2_BITs of guest
1764 * physical address space. The number of levels vary based on host and
1765 * guest configuration, making it efficient to build the final guest
1766 * physical address by seeding the L1 offset and shifting and adding in
1767 * each L2 offset as we recurse through them. */
1768 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1769 void **lp, target_phys_addr_t addr,
1770 struct last_map *map)
1772 int i;
1774 if (*lp == NULL) {
1775 return;
1777 if (level == 0) {
1778 PhysPageDesc *pd = *lp;
1779 addr <<= L2_BITS + TARGET_PAGE_BITS;
1780 for (i = 0; i < L2_SIZE; ++i) {
1781 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1782 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1784 if (map->size &&
1785 start_addr == map->start_addr + map->size &&
1786 pd[i].phys_offset == map->phys_offset + map->size) {
1788 map->size += TARGET_PAGE_SIZE;
1789 continue;
1790 } else if (map->size) {
1791 client->set_memory(client, map->start_addr,
1792 map->size, map->phys_offset, false);
1795 map->start_addr = start_addr;
1796 map->size = TARGET_PAGE_SIZE;
1797 map->phys_offset = pd[i].phys_offset;
1800 } else {
1801 void **pp = *lp;
1802 for (i = 0; i < L2_SIZE; ++i) {
1803 phys_page_for_each_1(client, level - 1, pp + i,
1804 (addr << L2_BITS) | i, map);
1809 static void phys_page_for_each(CPUPhysMemoryClient *client)
1811 int i;
1812 struct last_map map = { };
1814 for (i = 0; i < P_L1_SIZE; ++i) {
1815 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1816 l1_phys_map + i, i, &map);
1818 if (map.size) {
1819 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1820 false);
1824 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1826 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1827 phys_page_for_each(client);
1830 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1832 QLIST_REMOVE(client, list);
1834 #endif
1836 static int cmp1(const char *s1, int n, const char *s2)
1838 if (strlen(s2) != n)
1839 return 0;
1840 return memcmp(s1, s2, n) == 0;
1843 /* takes a comma separated list of log masks. Return 0 if error. */
1844 int cpu_str_to_log_mask(const char *str)
1846 const CPULogItem *item;
1847 int mask;
1848 const char *p, *p1;
1850 p = str;
1851 mask = 0;
1852 for(;;) {
1853 p1 = strchr(p, ',');
1854 if (!p1)
1855 p1 = p + strlen(p);
1856 if(cmp1(p,p1-p,"all")) {
1857 for(item = cpu_log_items; item->mask != 0; item++) {
1858 mask |= item->mask;
1860 } else {
1861 for(item = cpu_log_items; item->mask != 0; item++) {
1862 if (cmp1(p, p1 - p, item->name))
1863 goto found;
1865 return 0;
1867 found:
1868 mask |= item->mask;
1869 if (*p1 != ',')
1870 break;
1871 p = p1 + 1;
1873 return mask;
1876 void cpu_abort(CPUState *env, const char *fmt, ...)
1878 va_list ap;
1879 va_list ap2;
1881 va_start(ap, fmt);
1882 va_copy(ap2, ap);
1883 fprintf(stderr, "qemu: fatal: ");
1884 vfprintf(stderr, fmt, ap);
1885 fprintf(stderr, "\n");
1886 #ifdef TARGET_I386
1887 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1888 #else
1889 cpu_dump_state(env, stderr, fprintf, 0);
1890 #endif
1891 if (qemu_log_enabled()) {
1892 qemu_log("qemu: fatal: ");
1893 qemu_log_vprintf(fmt, ap2);
1894 qemu_log("\n");
1895 #ifdef TARGET_I386
1896 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1897 #else
1898 log_cpu_state(env, 0);
1899 #endif
1900 qemu_log_flush();
1901 qemu_log_close();
1903 va_end(ap2);
1904 va_end(ap);
1905 #if defined(CONFIG_USER_ONLY)
1907 struct sigaction act;
1908 sigfillset(&act.sa_mask);
1909 act.sa_handler = SIG_DFL;
1910 sigaction(SIGABRT, &act, NULL);
1912 #endif
1913 abort();
1916 CPUState *cpu_copy(CPUState *env)
1918 CPUState *new_env = cpu_init(env->cpu_model_str);
1919 CPUState *next_cpu = new_env->next_cpu;
1920 int cpu_index = new_env->cpu_index;
1921 #if defined(TARGET_HAS_ICE)
1922 CPUBreakpoint *bp;
1923 CPUWatchpoint *wp;
1924 #endif
1926 memcpy(new_env, env, sizeof(CPUState));
1928 /* Preserve chaining and index. */
1929 new_env->next_cpu = next_cpu;
1930 new_env->cpu_index = cpu_index;
1932 /* Clone all break/watchpoints.
1933 Note: Once we support ptrace with hw-debug register access, make sure
1934 BP_CPU break/watchpoints are handled correctly on clone. */
1935 QTAILQ_INIT(&env->breakpoints);
1936 QTAILQ_INIT(&env->watchpoints);
1937 #if defined(TARGET_HAS_ICE)
1938 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1939 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1941 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1942 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1943 wp->flags, NULL);
1945 #endif
1947 return new_env;
1950 #if !defined(CONFIG_USER_ONLY)
1952 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1954 unsigned int i;
1956 /* Discard jump cache entries for any tb which might potentially
1957 overlap the flushed page. */
1958 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1959 memset (&env->tb_jmp_cache[i], 0,
1960 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1962 i = tb_jmp_cache_hash_page(addr);
1963 memset (&env->tb_jmp_cache[i], 0,
1964 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1967 static CPUTLBEntry s_cputlb_empty_entry = {
1968 .addr_read = -1,
1969 .addr_write = -1,
1970 .addr_code = -1,
1971 .addend = -1,
1974 /* NOTE: if flush_global is true, also flush global entries (not
1975 implemented yet) */
1976 void tlb_flush(CPUState *env, int flush_global)
1978 int i;
1980 #if defined(DEBUG_TLB)
1981 printf("tlb_flush:\n");
1982 #endif
1983 /* must reset current TB so that interrupts cannot modify the
1984 links while we are modifying them */
1985 env->current_tb = NULL;
1987 for(i = 0; i < CPU_TLB_SIZE; i++) {
1988 int mmu_idx;
1989 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1990 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1994 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1996 env->tlb_flush_addr = -1;
1997 env->tlb_flush_mask = 0;
1998 tlb_flush_count++;
2001 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2003 if (addr == (tlb_entry->addr_read &
2004 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2005 addr == (tlb_entry->addr_write &
2006 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2007 addr == (tlb_entry->addr_code &
2008 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2009 *tlb_entry = s_cputlb_empty_entry;
2013 void tlb_flush_page(CPUState *env, target_ulong addr)
2015 int i;
2016 int mmu_idx;
2018 #if defined(DEBUG_TLB)
2019 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2020 #endif
2021 /* Check if we need to flush due to large pages. */
2022 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2023 #if defined(DEBUG_TLB)
2024 printf("tlb_flush_page: forced full flush ("
2025 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2026 env->tlb_flush_addr, env->tlb_flush_mask);
2027 #endif
2028 tlb_flush(env, 1);
2029 return;
2031 /* must reset current TB so that interrupts cannot modify the
2032 links while we are modifying them */
2033 env->current_tb = NULL;
2035 addr &= TARGET_PAGE_MASK;
2036 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2037 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2038 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2040 tlb_flush_jmp_cache(env, addr);
2043 /* update the TLBs so that writes to code in the virtual page 'addr'
2044 can be detected */
2045 static void tlb_protect_code(ram_addr_t ram_addr)
2047 cpu_physical_memory_reset_dirty(ram_addr,
2048 ram_addr + TARGET_PAGE_SIZE,
2049 CODE_DIRTY_FLAG);
2052 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2053 tested for self modifying code */
2054 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2055 target_ulong vaddr)
2057 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2060 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2061 unsigned long start, unsigned long length)
2063 unsigned long addr;
2064 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2065 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2066 if ((addr - start) < length) {
2067 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2072 /* Note: start and end must be within the same ram block. */
2073 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2074 int dirty_flags)
2076 CPUState *env;
2077 unsigned long length, start1;
2078 int i;
2080 start &= TARGET_PAGE_MASK;
2081 end = TARGET_PAGE_ALIGN(end);
2083 length = end - start;
2084 if (length == 0)
2085 return;
2086 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2088 /* we modify the TLB cache so that the dirty bit will be set again
2089 when accessing the range */
2090 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2091 /* Check that we don't span multiple blocks - this breaks the
2092 address comparisons below. */
2093 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2094 != (end - 1) - start) {
2095 abort();
2098 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2099 int mmu_idx;
2100 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2101 for(i = 0; i < CPU_TLB_SIZE; i++)
2102 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2103 start1, length);
2108 int cpu_physical_memory_set_dirty_tracking(int enable)
2110 int ret = 0;
2111 in_migration = enable;
2112 ret = cpu_notify_migration_log(!!enable);
2113 return ret;
2116 int cpu_physical_memory_get_dirty_tracking(void)
2118 return in_migration;
2121 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2122 target_phys_addr_t end_addr)
2124 int ret;
2126 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2127 return ret;
2130 int cpu_physical_log_start(target_phys_addr_t start_addr,
2131 ram_addr_t size)
2133 CPUPhysMemoryClient *client;
2134 QLIST_FOREACH(client, &memory_client_list, list) {
2135 if (client->log_start) {
2136 int r = client->log_start(client, start_addr, size);
2137 if (r < 0) {
2138 return r;
2142 return 0;
2145 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2146 ram_addr_t size)
2148 CPUPhysMemoryClient *client;
2149 QLIST_FOREACH(client, &memory_client_list, list) {
2150 if (client->log_stop) {
2151 int r = client->log_stop(client, start_addr, size);
2152 if (r < 0) {
2153 return r;
2157 return 0;
2160 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2162 ram_addr_t ram_addr;
2163 void *p;
2165 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2166 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2167 + tlb_entry->addend);
2168 ram_addr = qemu_ram_addr_from_host_nofail(p);
2169 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2170 tlb_entry->addr_write |= TLB_NOTDIRTY;
2175 /* update the TLB according to the current state of the dirty bits */
2176 void cpu_tlb_update_dirty(CPUState *env)
2178 int i;
2179 int mmu_idx;
2180 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2181 for(i = 0; i < CPU_TLB_SIZE; i++)
2182 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2186 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2188 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2189 tlb_entry->addr_write = vaddr;
2192 /* update the TLB corresponding to virtual page vaddr
2193 so that it is no longer dirty */
2194 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2196 int i;
2197 int mmu_idx;
2199 vaddr &= TARGET_PAGE_MASK;
2200 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2201 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2202 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2205 /* Our TLB does not support large pages, so remember the area covered by
2206 large pages and trigger a full TLB flush if these are invalidated. */
2207 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2208 target_ulong size)
2210 target_ulong mask = ~(size - 1);
2212 if (env->tlb_flush_addr == (target_ulong)-1) {
2213 env->tlb_flush_addr = vaddr & mask;
2214 env->tlb_flush_mask = mask;
2215 return;
2217 /* Extend the existing region to include the new page.
2218 This is a compromise between unnecessary flushes and the cost
2219 of maintaining a full variable size TLB. */
2220 mask &= env->tlb_flush_mask;
2221 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2222 mask <<= 1;
2224 env->tlb_flush_addr &= mask;
2225 env->tlb_flush_mask = mask;
2228 /* Add a new TLB entry. At most one entry for a given virtual address
2229 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2230 supplied size is only used by tlb_flush_page. */
2231 void tlb_set_page(CPUState *env, target_ulong vaddr,
2232 target_phys_addr_t paddr, int prot,
2233 int mmu_idx, target_ulong size)
2235 PhysPageDesc *p;
2236 unsigned long pd;
2237 unsigned int index;
2238 target_ulong address;
2239 target_ulong code_address;
2240 unsigned long addend;
2241 CPUTLBEntry *te;
2242 CPUWatchpoint *wp;
2243 target_phys_addr_t iotlb;
2245 assert(size >= TARGET_PAGE_SIZE);
2246 if (size != TARGET_PAGE_SIZE) {
2247 tlb_add_large_page(env, vaddr, size);
2249 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2250 if (!p) {
2251 pd = IO_MEM_UNASSIGNED;
2252 } else {
2253 pd = p->phys_offset;
2255 #if defined(DEBUG_TLB)
2256 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2257 " prot=%x idx=%d pd=0x%08lx\n",
2258 vaddr, paddr, prot, mmu_idx, pd);
2259 #endif
2261 address = vaddr;
2262 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2263 /* IO memory case (romd handled later) */
2264 address |= TLB_MMIO;
2266 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2267 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2268 /* Normal RAM. */
2269 iotlb = pd & TARGET_PAGE_MASK;
2270 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2271 iotlb |= IO_MEM_NOTDIRTY;
2272 else
2273 iotlb |= IO_MEM_ROM;
2274 } else {
2275 /* IO handlers are currently passed a physical address.
2276 It would be nice to pass an offset from the base address
2277 of that region. This would avoid having to special case RAM,
2278 and avoid full address decoding in every device.
2279 We can't use the high bits of pd for this because
2280 IO_MEM_ROMD uses these as a ram address. */
2281 iotlb = (pd & ~TARGET_PAGE_MASK);
2282 if (p) {
2283 iotlb += p->region_offset;
2284 } else {
2285 iotlb += paddr;
2289 code_address = address;
2290 /* Make accesses to pages with watchpoints go via the
2291 watchpoint trap routines. */
2292 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2293 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2294 /* Avoid trapping reads of pages with a write breakpoint. */
2295 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2296 iotlb = io_mem_watch + paddr;
2297 address |= TLB_MMIO;
2298 break;
2303 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2304 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2305 te = &env->tlb_table[mmu_idx][index];
2306 te->addend = addend - vaddr;
2307 if (prot & PAGE_READ) {
2308 te->addr_read = address;
2309 } else {
2310 te->addr_read = -1;
2313 if (prot & PAGE_EXEC) {
2314 te->addr_code = code_address;
2315 } else {
2316 te->addr_code = -1;
2318 if (prot & PAGE_WRITE) {
2319 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2320 (pd & IO_MEM_ROMD)) {
2321 /* Write access calls the I/O callback. */
2322 te->addr_write = address | TLB_MMIO;
2323 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2324 !cpu_physical_memory_is_dirty(pd)) {
2325 te->addr_write = address | TLB_NOTDIRTY;
2326 } else {
2327 te->addr_write = address;
2329 } else {
2330 te->addr_write = -1;
2334 #else
2336 void tlb_flush(CPUState *env, int flush_global)
2340 void tlb_flush_page(CPUState *env, target_ulong addr)
2345 * Walks guest process memory "regions" one by one
2346 * and calls callback function 'fn' for each region.
2349 struct walk_memory_regions_data
2351 walk_memory_regions_fn fn;
2352 void *priv;
2353 unsigned long start;
2354 int prot;
2357 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2358 abi_ulong end, int new_prot)
2360 if (data->start != -1ul) {
2361 int rc = data->fn(data->priv, data->start, end, data->prot);
2362 if (rc != 0) {
2363 return rc;
2367 data->start = (new_prot ? end : -1ul);
2368 data->prot = new_prot;
2370 return 0;
2373 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2374 abi_ulong base, int level, void **lp)
2376 abi_ulong pa;
2377 int i, rc;
2379 if (*lp == NULL) {
2380 return walk_memory_regions_end(data, base, 0);
2383 if (level == 0) {
2384 PageDesc *pd = *lp;
2385 for (i = 0; i < L2_SIZE; ++i) {
2386 int prot = pd[i].flags;
2388 pa = base | (i << TARGET_PAGE_BITS);
2389 if (prot != data->prot) {
2390 rc = walk_memory_regions_end(data, pa, prot);
2391 if (rc != 0) {
2392 return rc;
2396 } else {
2397 void **pp = *lp;
2398 for (i = 0; i < L2_SIZE; ++i) {
2399 pa = base | ((abi_ulong)i <<
2400 (TARGET_PAGE_BITS + L2_BITS * level));
2401 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2402 if (rc != 0) {
2403 return rc;
2408 return 0;
2411 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2413 struct walk_memory_regions_data data;
2414 unsigned long i;
2416 data.fn = fn;
2417 data.priv = priv;
2418 data.start = -1ul;
2419 data.prot = 0;
2421 for (i = 0; i < V_L1_SIZE; i++) {
2422 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2423 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2424 if (rc != 0) {
2425 return rc;
2429 return walk_memory_regions_end(&data, 0, 0);
2432 static int dump_region(void *priv, abi_ulong start,
2433 abi_ulong end, unsigned long prot)
2435 FILE *f = (FILE *)priv;
2437 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2438 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2439 start, end, end - start,
2440 ((prot & PAGE_READ) ? 'r' : '-'),
2441 ((prot & PAGE_WRITE) ? 'w' : '-'),
2442 ((prot & PAGE_EXEC) ? 'x' : '-'));
2444 return (0);
2447 /* dump memory mappings */
2448 void page_dump(FILE *f)
2450 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2451 "start", "end", "size", "prot");
2452 walk_memory_regions(f, dump_region);
2455 int page_get_flags(target_ulong address)
2457 PageDesc *p;
2459 p = page_find(address >> TARGET_PAGE_BITS);
2460 if (!p)
2461 return 0;
2462 return p->flags;
2465 /* Modify the flags of a page and invalidate the code if necessary.
2466 The flag PAGE_WRITE_ORG is positioned automatically depending
2467 on PAGE_WRITE. The mmap_lock should already be held. */
2468 void page_set_flags(target_ulong start, target_ulong end, int flags)
2470 target_ulong addr, len;
2472 /* This function should never be called with addresses outside the
2473 guest address space. If this assert fires, it probably indicates
2474 a missing call to h2g_valid. */
2475 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2476 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2477 #endif
2478 assert(start < end);
2480 start = start & TARGET_PAGE_MASK;
2481 end = TARGET_PAGE_ALIGN(end);
2483 if (flags & PAGE_WRITE) {
2484 flags |= PAGE_WRITE_ORG;
2487 for (addr = start, len = end - start;
2488 len != 0;
2489 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2490 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2492 /* If the write protection bit is set, then we invalidate
2493 the code inside. */
2494 if (!(p->flags & PAGE_WRITE) &&
2495 (flags & PAGE_WRITE) &&
2496 p->first_tb) {
2497 tb_invalidate_phys_page(addr, 0, NULL);
2499 p->flags = flags;
2503 int page_check_range(target_ulong start, target_ulong len, int flags)
2505 PageDesc *p;
2506 target_ulong end;
2507 target_ulong addr;
2509 /* This function should never be called with addresses outside the
2510 guest address space. If this assert fires, it probably indicates
2511 a missing call to h2g_valid. */
2512 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2513 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2514 #endif
2516 if (len == 0) {
2517 return 0;
2519 if (start + len - 1 < start) {
2520 /* We've wrapped around. */
2521 return -1;
2524 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2525 start = start & TARGET_PAGE_MASK;
2527 for (addr = start, len = end - start;
2528 len != 0;
2529 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2530 p = page_find(addr >> TARGET_PAGE_BITS);
2531 if( !p )
2532 return -1;
2533 if( !(p->flags & PAGE_VALID) )
2534 return -1;
2536 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2537 return -1;
2538 if (flags & PAGE_WRITE) {
2539 if (!(p->flags & PAGE_WRITE_ORG))
2540 return -1;
2541 /* unprotect the page if it was put read-only because it
2542 contains translated code */
2543 if (!(p->flags & PAGE_WRITE)) {
2544 if (!page_unprotect(addr, 0, NULL))
2545 return -1;
2547 return 0;
2550 return 0;
2553 /* called from signal handler: invalidate the code and unprotect the
2554 page. Return TRUE if the fault was successfully handled. */
2555 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2557 unsigned int prot;
2558 PageDesc *p;
2559 target_ulong host_start, host_end, addr;
2561 /* Technically this isn't safe inside a signal handler. However we
2562 know this only ever happens in a synchronous SEGV handler, so in
2563 practice it seems to be ok. */
2564 mmap_lock();
2566 p = page_find(address >> TARGET_PAGE_BITS);
2567 if (!p) {
2568 mmap_unlock();
2569 return 0;
2572 /* if the page was really writable, then we change its
2573 protection back to writable */
2574 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2575 host_start = address & qemu_host_page_mask;
2576 host_end = host_start + qemu_host_page_size;
2578 prot = 0;
2579 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2580 p = page_find(addr >> TARGET_PAGE_BITS);
2581 p->flags |= PAGE_WRITE;
2582 prot |= p->flags;
2584 /* and since the content will be modified, we must invalidate
2585 the corresponding translated code. */
2586 tb_invalidate_phys_page(addr, pc, puc);
2587 #ifdef DEBUG_TB_CHECK
2588 tb_invalidate_check(addr);
2589 #endif
2591 mprotect((void *)g2h(host_start), qemu_host_page_size,
2592 prot & PAGE_BITS);
2594 mmap_unlock();
2595 return 1;
2597 mmap_unlock();
2598 return 0;
2601 static inline void tlb_set_dirty(CPUState *env,
2602 unsigned long addr, target_ulong vaddr)
2605 #endif /* defined(CONFIG_USER_ONLY) */
2607 #if !defined(CONFIG_USER_ONLY)
2609 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2610 typedef struct subpage_t {
2611 target_phys_addr_t base;
2612 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2613 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2614 } subpage_t;
2616 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2617 ram_addr_t memory, ram_addr_t region_offset);
2618 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2619 ram_addr_t orig_memory,
2620 ram_addr_t region_offset);
2621 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2622 need_subpage) \
2623 do { \
2624 if (addr > start_addr) \
2625 start_addr2 = 0; \
2626 else { \
2627 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2628 if (start_addr2 > 0) \
2629 need_subpage = 1; \
2632 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2633 end_addr2 = TARGET_PAGE_SIZE - 1; \
2634 else { \
2635 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2636 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2637 need_subpage = 1; \
2639 } while (0)
2641 /* register physical memory.
2642 For RAM, 'size' must be a multiple of the target page size.
2643 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2644 io memory page. The address used when calling the IO function is
2645 the offset from the start of the region, plus region_offset. Both
2646 start_addr and region_offset are rounded down to a page boundary
2647 before calculating this offset. This should not be a problem unless
2648 the low bits of start_addr and region_offset differ. */
2649 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2650 ram_addr_t size,
2651 ram_addr_t phys_offset,
2652 ram_addr_t region_offset,
2653 bool log_dirty)
2655 target_phys_addr_t addr, end_addr;
2656 PhysPageDesc *p;
2657 CPUState *env;
2658 ram_addr_t orig_size = size;
2659 subpage_t *subpage;
2661 assert(size);
2662 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2664 if (phys_offset == IO_MEM_UNASSIGNED) {
2665 region_offset = start_addr;
2667 region_offset &= TARGET_PAGE_MASK;
2668 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2669 end_addr = start_addr + (target_phys_addr_t)size;
2671 addr = start_addr;
2672 do {
2673 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2674 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2675 ram_addr_t orig_memory = p->phys_offset;
2676 target_phys_addr_t start_addr2, end_addr2;
2677 int need_subpage = 0;
2679 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2680 need_subpage);
2681 if (need_subpage) {
2682 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2683 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2684 &p->phys_offset, orig_memory,
2685 p->region_offset);
2686 } else {
2687 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2688 >> IO_MEM_SHIFT];
2690 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2691 region_offset);
2692 p->region_offset = 0;
2693 } else {
2694 p->phys_offset = phys_offset;
2695 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2696 (phys_offset & IO_MEM_ROMD))
2697 phys_offset += TARGET_PAGE_SIZE;
2699 } else {
2700 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2701 p->phys_offset = phys_offset;
2702 p->region_offset = region_offset;
2703 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2704 (phys_offset & IO_MEM_ROMD)) {
2705 phys_offset += TARGET_PAGE_SIZE;
2706 } else {
2707 target_phys_addr_t start_addr2, end_addr2;
2708 int need_subpage = 0;
2710 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2711 end_addr2, need_subpage);
2713 if (need_subpage) {
2714 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2715 &p->phys_offset, IO_MEM_UNASSIGNED,
2716 addr & TARGET_PAGE_MASK);
2717 subpage_register(subpage, start_addr2, end_addr2,
2718 phys_offset, region_offset);
2719 p->region_offset = 0;
2723 region_offset += TARGET_PAGE_SIZE;
2724 addr += TARGET_PAGE_SIZE;
2725 } while (addr != end_addr);
2727 /* since each CPU stores ram addresses in its TLB cache, we must
2728 reset the modified entries */
2729 /* XXX: slow ! */
2730 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2731 tlb_flush(env, 1);
2735 /* XXX: temporary until new memory mapping API */
2736 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2738 PhysPageDesc *p;
2740 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2741 if (!p)
2742 return IO_MEM_UNASSIGNED;
2743 return p->phys_offset;
2746 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2748 if (kvm_enabled())
2749 kvm_coalesce_mmio_region(addr, size);
2752 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2754 if (kvm_enabled())
2755 kvm_uncoalesce_mmio_region(addr, size);
2758 void qemu_flush_coalesced_mmio_buffer(void)
2760 if (kvm_enabled())
2761 kvm_flush_coalesced_mmio_buffer();
2764 #if defined(__linux__) && !defined(TARGET_S390X)
2766 #include <sys/vfs.h>
2768 #define HUGETLBFS_MAGIC 0x958458f6
2770 static long gethugepagesize(const char *path)
2772 struct statfs fs;
2773 int ret;
2775 do {
2776 ret = statfs(path, &fs);
2777 } while (ret != 0 && errno == EINTR);
2779 if (ret != 0) {
2780 perror(path);
2781 return 0;
2784 if (fs.f_type != HUGETLBFS_MAGIC)
2785 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2787 return fs.f_bsize;
2790 static void *file_ram_alloc(RAMBlock *block,
2791 ram_addr_t memory,
2792 const char *path)
2794 char *filename;
2795 void *area;
2796 int fd;
2797 #ifdef MAP_POPULATE
2798 int flags;
2799 #endif
2800 unsigned long hpagesize;
2802 hpagesize = gethugepagesize(path);
2803 if (!hpagesize) {
2804 return NULL;
2807 if (memory < hpagesize) {
2808 return NULL;
2811 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2812 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2813 return NULL;
2816 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2817 return NULL;
2820 fd = mkstemp(filename);
2821 if (fd < 0) {
2822 perror("unable to create backing store for hugepages");
2823 free(filename);
2824 return NULL;
2826 unlink(filename);
2827 free(filename);
2829 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2832 * ftruncate is not supported by hugetlbfs in older
2833 * hosts, so don't bother bailing out on errors.
2834 * If anything goes wrong with it under other filesystems,
2835 * mmap will fail.
2837 if (ftruncate(fd, memory))
2838 perror("ftruncate");
2840 #ifdef MAP_POPULATE
2841 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2842 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2843 * to sidestep this quirk.
2845 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2846 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2847 #else
2848 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2849 #endif
2850 if (area == MAP_FAILED) {
2851 perror("file_ram_alloc: can't mmap RAM pages");
2852 close(fd);
2853 return (NULL);
2855 block->fd = fd;
2856 return area;
2858 #endif
2860 static ram_addr_t find_ram_offset(ram_addr_t size)
2862 RAMBlock *block, *next_block;
2863 ram_addr_t offset = 0, mingap = ULONG_MAX;
2865 if (QLIST_EMPTY(&ram_list.blocks))
2866 return 0;
2868 QLIST_FOREACH(block, &ram_list.blocks, next) {
2869 ram_addr_t end, next = ULONG_MAX;
2871 end = block->offset + block->length;
2873 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2874 if (next_block->offset >= end) {
2875 next = MIN(next, next_block->offset);
2878 if (next - end >= size && next - end < mingap) {
2879 offset = end;
2880 mingap = next - end;
2883 return offset;
2886 static ram_addr_t last_ram_offset(void)
2888 RAMBlock *block;
2889 ram_addr_t last = 0;
2891 QLIST_FOREACH(block, &ram_list.blocks, next)
2892 last = MAX(last, block->offset + block->length);
2894 return last;
2897 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2898 ram_addr_t size, void *host)
2900 RAMBlock *new_block, *block;
2902 size = TARGET_PAGE_ALIGN(size);
2903 new_block = qemu_mallocz(sizeof(*new_block));
2905 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2906 char *id = dev->parent_bus->info->get_dev_path(dev);
2907 if (id) {
2908 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2909 qemu_free(id);
2912 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2914 QLIST_FOREACH(block, &ram_list.blocks, next) {
2915 if (!strcmp(block->idstr, new_block->idstr)) {
2916 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2917 new_block->idstr);
2918 abort();
2922 new_block->offset = find_ram_offset(size);
2923 if (host) {
2924 new_block->host = host;
2925 new_block->flags |= RAM_PREALLOC_MASK;
2926 } else {
2927 if (mem_path) {
2928 #if defined (__linux__) && !defined(TARGET_S390X)
2929 new_block->host = file_ram_alloc(new_block, size, mem_path);
2930 if (!new_block->host) {
2931 new_block->host = qemu_vmalloc(size);
2932 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2934 #else
2935 fprintf(stderr, "-mem-path option unsupported\n");
2936 exit(1);
2937 #endif
2938 } else {
2939 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2940 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2941 an system defined value, which is at least 256GB. Larger systems
2942 have larger values. We put the guest between the end of data
2943 segment (system break) and this value. We use 32GB as a base to
2944 have enough room for the system break to grow. */
2945 new_block->host = mmap((void*)0x800000000, size,
2946 PROT_EXEC|PROT_READ|PROT_WRITE,
2947 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2948 if (new_block->host == MAP_FAILED) {
2949 fprintf(stderr, "Allocating RAM failed\n");
2950 abort();
2952 #else
2953 if (xen_mapcache_enabled()) {
2954 xen_ram_alloc(new_block->offset, size);
2955 } else {
2956 new_block->host = qemu_vmalloc(size);
2958 #endif
2959 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2962 new_block->length = size;
2964 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2966 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2967 last_ram_offset() >> TARGET_PAGE_BITS);
2968 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2969 0xff, size >> TARGET_PAGE_BITS);
2971 if (kvm_enabled())
2972 kvm_setup_guest_memory(new_block->host, size);
2974 return new_block->offset;
2977 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2979 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2982 void qemu_ram_free_from_ptr(ram_addr_t addr)
2984 RAMBlock *block;
2986 QLIST_FOREACH(block, &ram_list.blocks, next) {
2987 if (addr == block->offset) {
2988 QLIST_REMOVE(block, next);
2989 qemu_free(block);
2990 return;
2995 void qemu_ram_free(ram_addr_t addr)
2997 RAMBlock *block;
2999 QLIST_FOREACH(block, &ram_list.blocks, next) {
3000 if (addr == block->offset) {
3001 QLIST_REMOVE(block, next);
3002 if (block->flags & RAM_PREALLOC_MASK) {
3004 } else if (mem_path) {
3005 #if defined (__linux__) && !defined(TARGET_S390X)
3006 if (block->fd) {
3007 munmap(block->host, block->length);
3008 close(block->fd);
3009 } else {
3010 qemu_vfree(block->host);
3012 #else
3013 abort();
3014 #endif
3015 } else {
3016 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3017 munmap(block->host, block->length);
3018 #else
3019 if (xen_mapcache_enabled()) {
3020 qemu_invalidate_entry(block->host);
3021 } else {
3022 qemu_vfree(block->host);
3024 #endif
3026 qemu_free(block);
3027 return;
3033 #ifndef _WIN32
3034 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3036 RAMBlock *block;
3037 ram_addr_t offset;
3038 int flags;
3039 void *area, *vaddr;
3041 QLIST_FOREACH(block, &ram_list.blocks, next) {
3042 offset = addr - block->offset;
3043 if (offset < block->length) {
3044 vaddr = block->host + offset;
3045 if (block->flags & RAM_PREALLOC_MASK) {
3047 } else {
3048 flags = MAP_FIXED;
3049 munmap(vaddr, length);
3050 if (mem_path) {
3051 #if defined(__linux__) && !defined(TARGET_S390X)
3052 if (block->fd) {
3053 #ifdef MAP_POPULATE
3054 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3055 MAP_PRIVATE;
3056 #else
3057 flags |= MAP_PRIVATE;
3058 #endif
3059 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3060 flags, block->fd, offset);
3061 } else {
3062 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3063 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3064 flags, -1, 0);
3066 #else
3067 abort();
3068 #endif
3069 } else {
3070 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3071 flags |= MAP_SHARED | MAP_ANONYMOUS;
3072 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3073 flags, -1, 0);
3074 #else
3075 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3076 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3077 flags, -1, 0);
3078 #endif
3080 if (area != vaddr) {
3081 fprintf(stderr, "Could not remap addr: %lx@%lx\n",
3082 length, addr);
3083 exit(1);
3085 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3087 return;
3091 #endif /* !_WIN32 */
3093 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3094 With the exception of the softmmu code in this file, this should
3095 only be used for local memory (e.g. video ram) that the device owns,
3096 and knows it isn't going to access beyond the end of the block.
3098 It should not be used for general purpose DMA.
3099 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3101 void *qemu_get_ram_ptr(ram_addr_t addr)
3103 RAMBlock *block;
3105 QLIST_FOREACH(block, &ram_list.blocks, next) {
3106 if (addr - block->offset < block->length) {
3107 /* Move this entry to to start of the list. */
3108 if (block != QLIST_FIRST(&ram_list.blocks)) {
3109 QLIST_REMOVE(block, next);
3110 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3112 if (xen_mapcache_enabled()) {
3113 /* We need to check if the requested address is in the RAM
3114 * because we don't want to map the entire memory in QEMU.
3115 * In that case just map until the end of the page.
3117 if (block->offset == 0) {
3118 return qemu_map_cache(addr, 0, 0);
3119 } else if (block->host == NULL) {
3120 block->host = qemu_map_cache(block->offset, block->length, 1);
3123 return block->host + (addr - block->offset);
3127 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3128 abort();
3130 return NULL;
3133 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3134 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3136 void *qemu_safe_ram_ptr(ram_addr_t addr)
3138 RAMBlock *block;
3140 QLIST_FOREACH(block, &ram_list.blocks, next) {
3141 if (addr - block->offset < block->length) {
3142 if (xen_mapcache_enabled()) {
3143 /* We need to check if the requested address is in the RAM
3144 * because we don't want to map the entire memory in QEMU.
3145 * In that case just map until the end of the page.
3147 if (block->offset == 0) {
3148 return qemu_map_cache(addr, 0, 0);
3149 } else if (block->host == NULL) {
3150 block->host = qemu_map_cache(block->offset, block->length, 1);
3153 return block->host + (addr - block->offset);
3157 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3158 abort();
3160 return NULL;
3163 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3164 * but takes a size argument */
3165 void *qemu_ram_ptr_length(target_phys_addr_t addr, target_phys_addr_t *size)
3167 if (xen_mapcache_enabled())
3168 return qemu_map_cache(addr, *size, 1);
3169 else {
3170 RAMBlock *block;
3172 QLIST_FOREACH(block, &ram_list.blocks, next) {
3173 if (addr - block->offset < block->length) {
3174 if (addr - block->offset + *size > block->length)
3175 *size = block->length - addr + block->offset;
3176 return block->host + (addr - block->offset);
3180 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3181 abort();
3183 *size = 0;
3184 return NULL;
3188 void qemu_put_ram_ptr(void *addr)
3190 trace_qemu_put_ram_ptr(addr);
3193 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3195 RAMBlock *block;
3196 uint8_t *host = ptr;
3198 if (xen_mapcache_enabled()) {
3199 *ram_addr = qemu_ram_addr_from_mapcache(ptr);
3200 return 0;
3203 QLIST_FOREACH(block, &ram_list.blocks, next) {
3204 /* This case append when the block is not mapped. */
3205 if (block->host == NULL) {
3206 continue;
3208 if (host - block->host < block->length) {
3209 *ram_addr = block->offset + (host - block->host);
3210 return 0;
3214 return -1;
3217 /* Some of the softmmu routines need to translate from a host pointer
3218 (typically a TLB entry) back to a ram offset. */
3219 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3221 ram_addr_t ram_addr;
3223 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3224 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3225 abort();
3227 return ram_addr;
3230 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3232 #ifdef DEBUG_UNASSIGNED
3233 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3234 #endif
3235 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3236 do_unassigned_access(addr, 0, 0, 0, 1);
3237 #endif
3238 return 0;
3241 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3243 #ifdef DEBUG_UNASSIGNED
3244 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3245 #endif
3246 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3247 do_unassigned_access(addr, 0, 0, 0, 2);
3248 #endif
3249 return 0;
3252 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3254 #ifdef DEBUG_UNASSIGNED
3255 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3256 #endif
3257 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3258 do_unassigned_access(addr, 0, 0, 0, 4);
3259 #endif
3260 return 0;
3263 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3265 #ifdef DEBUG_UNASSIGNED
3266 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3267 #endif
3268 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3269 do_unassigned_access(addr, 1, 0, 0, 1);
3270 #endif
3273 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3275 #ifdef DEBUG_UNASSIGNED
3276 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3277 #endif
3278 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3279 do_unassigned_access(addr, 1, 0, 0, 2);
3280 #endif
3283 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3285 #ifdef DEBUG_UNASSIGNED
3286 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3287 #endif
3288 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3289 do_unassigned_access(addr, 1, 0, 0, 4);
3290 #endif
3293 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3294 unassigned_mem_readb,
3295 unassigned_mem_readw,
3296 unassigned_mem_readl,
3299 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3300 unassigned_mem_writeb,
3301 unassigned_mem_writew,
3302 unassigned_mem_writel,
3305 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3306 uint32_t val)
3308 int dirty_flags;
3309 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3310 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3311 #if !defined(CONFIG_USER_ONLY)
3312 tb_invalidate_phys_page_fast(ram_addr, 1);
3313 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3314 #endif
3316 stb_p(qemu_get_ram_ptr(ram_addr), val);
3317 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3318 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3319 /* we remove the notdirty callback only if the code has been
3320 flushed */
3321 if (dirty_flags == 0xff)
3322 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3325 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3326 uint32_t val)
3328 int dirty_flags;
3329 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3330 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3331 #if !defined(CONFIG_USER_ONLY)
3332 tb_invalidate_phys_page_fast(ram_addr, 2);
3333 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3334 #endif
3336 stw_p(qemu_get_ram_ptr(ram_addr), val);
3337 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3338 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3339 /* we remove the notdirty callback only if the code has been
3340 flushed */
3341 if (dirty_flags == 0xff)
3342 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3345 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3346 uint32_t val)
3348 int dirty_flags;
3349 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3350 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3351 #if !defined(CONFIG_USER_ONLY)
3352 tb_invalidate_phys_page_fast(ram_addr, 4);
3353 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3354 #endif
3356 stl_p(qemu_get_ram_ptr(ram_addr), val);
3357 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3358 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3359 /* we remove the notdirty callback only if the code has been
3360 flushed */
3361 if (dirty_flags == 0xff)
3362 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3365 static CPUReadMemoryFunc * const error_mem_read[3] = {
3366 NULL, /* never used */
3367 NULL, /* never used */
3368 NULL, /* never used */
3371 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3372 notdirty_mem_writeb,
3373 notdirty_mem_writew,
3374 notdirty_mem_writel,
3377 /* Generate a debug exception if a watchpoint has been hit. */
3378 static void check_watchpoint(int offset, int len_mask, int flags)
3380 CPUState *env = cpu_single_env;
3381 target_ulong pc, cs_base;
3382 TranslationBlock *tb;
3383 target_ulong vaddr;
3384 CPUWatchpoint *wp;
3385 int cpu_flags;
3387 if (env->watchpoint_hit) {
3388 /* We re-entered the check after replacing the TB. Now raise
3389 * the debug interrupt so that is will trigger after the
3390 * current instruction. */
3391 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3392 return;
3394 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3395 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3396 if ((vaddr == (wp->vaddr & len_mask) ||
3397 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3398 wp->flags |= BP_WATCHPOINT_HIT;
3399 if (!env->watchpoint_hit) {
3400 env->watchpoint_hit = wp;
3401 tb = tb_find_pc(env->mem_io_pc);
3402 if (!tb) {
3403 cpu_abort(env, "check_watchpoint: could not find TB for "
3404 "pc=%p", (void *)env->mem_io_pc);
3406 cpu_restore_state(tb, env, env->mem_io_pc);
3407 tb_phys_invalidate(tb, -1);
3408 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3409 env->exception_index = EXCP_DEBUG;
3410 } else {
3411 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3412 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3414 cpu_resume_from_signal(env, NULL);
3416 } else {
3417 wp->flags &= ~BP_WATCHPOINT_HIT;
3422 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3423 so these check for a hit then pass through to the normal out-of-line
3424 phys routines. */
3425 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3427 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3428 return ldub_phys(addr);
3431 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3433 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3434 return lduw_phys(addr);
3437 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3439 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3440 return ldl_phys(addr);
3443 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3444 uint32_t val)
3446 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3447 stb_phys(addr, val);
3450 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3451 uint32_t val)
3453 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3454 stw_phys(addr, val);
3457 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3458 uint32_t val)
3460 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3461 stl_phys(addr, val);
3464 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3465 watch_mem_readb,
3466 watch_mem_readw,
3467 watch_mem_readl,
3470 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3471 watch_mem_writeb,
3472 watch_mem_writew,
3473 watch_mem_writel,
3476 static inline uint32_t subpage_readlen (subpage_t *mmio,
3477 target_phys_addr_t addr,
3478 unsigned int len)
3480 unsigned int idx = SUBPAGE_IDX(addr);
3481 #if defined(DEBUG_SUBPAGE)
3482 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3483 mmio, len, addr, idx);
3484 #endif
3486 addr += mmio->region_offset[idx];
3487 idx = mmio->sub_io_index[idx];
3488 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3491 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3492 uint32_t value, unsigned int len)
3494 unsigned int idx = SUBPAGE_IDX(addr);
3495 #if defined(DEBUG_SUBPAGE)
3496 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3497 __func__, mmio, len, addr, idx, value);
3498 #endif
3500 addr += mmio->region_offset[idx];
3501 idx = mmio->sub_io_index[idx];
3502 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3505 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3507 return subpage_readlen(opaque, addr, 0);
3510 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3511 uint32_t value)
3513 subpage_writelen(opaque, addr, value, 0);
3516 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3518 return subpage_readlen(opaque, addr, 1);
3521 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3522 uint32_t value)
3524 subpage_writelen(opaque, addr, value, 1);
3527 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3529 return subpage_readlen(opaque, addr, 2);
3532 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3533 uint32_t value)
3535 subpage_writelen(opaque, addr, value, 2);
3538 static CPUReadMemoryFunc * const subpage_read[] = {
3539 &subpage_readb,
3540 &subpage_readw,
3541 &subpage_readl,
3544 static CPUWriteMemoryFunc * const subpage_write[] = {
3545 &subpage_writeb,
3546 &subpage_writew,
3547 &subpage_writel,
3550 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3551 ram_addr_t memory, ram_addr_t region_offset)
3553 int idx, eidx;
3555 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3556 return -1;
3557 idx = SUBPAGE_IDX(start);
3558 eidx = SUBPAGE_IDX(end);
3559 #if defined(DEBUG_SUBPAGE)
3560 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3561 mmio, start, end, idx, eidx, memory);
3562 #endif
3563 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3564 memory = IO_MEM_UNASSIGNED;
3565 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3566 for (; idx <= eidx; idx++) {
3567 mmio->sub_io_index[idx] = memory;
3568 mmio->region_offset[idx] = region_offset;
3571 return 0;
3574 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3575 ram_addr_t orig_memory,
3576 ram_addr_t region_offset)
3578 subpage_t *mmio;
3579 int subpage_memory;
3581 mmio = qemu_mallocz(sizeof(subpage_t));
3583 mmio->base = base;
3584 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3585 DEVICE_NATIVE_ENDIAN);
3586 #if defined(DEBUG_SUBPAGE)
3587 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3588 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3589 #endif
3590 *phys = subpage_memory | IO_MEM_SUBPAGE;
3591 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3593 return mmio;
3596 static int get_free_io_mem_idx(void)
3598 int i;
3600 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3601 if (!io_mem_used[i]) {
3602 io_mem_used[i] = 1;
3603 return i;
3605 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3606 return -1;
3610 * Usually, devices operate in little endian mode. There are devices out
3611 * there that operate in big endian too. Each device gets byte swapped
3612 * mmio if plugged onto a CPU that does the other endianness.
3614 * CPU Device swap?
3616 * little little no
3617 * little big yes
3618 * big little yes
3619 * big big no
3622 typedef struct SwapEndianContainer {
3623 CPUReadMemoryFunc *read[3];
3624 CPUWriteMemoryFunc *write[3];
3625 void *opaque;
3626 } SwapEndianContainer;
3628 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3630 uint32_t val;
3631 SwapEndianContainer *c = opaque;
3632 val = c->read[0](c->opaque, addr);
3633 return val;
3636 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3638 uint32_t val;
3639 SwapEndianContainer *c = opaque;
3640 val = bswap16(c->read[1](c->opaque, addr));
3641 return val;
3644 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3646 uint32_t val;
3647 SwapEndianContainer *c = opaque;
3648 val = bswap32(c->read[2](c->opaque, addr));
3649 return val;
3652 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3653 swapendian_mem_readb,
3654 swapendian_mem_readw,
3655 swapendian_mem_readl
3658 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3659 uint32_t val)
3661 SwapEndianContainer *c = opaque;
3662 c->write[0](c->opaque, addr, val);
3665 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3666 uint32_t val)
3668 SwapEndianContainer *c = opaque;
3669 c->write[1](c->opaque, addr, bswap16(val));
3672 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3673 uint32_t val)
3675 SwapEndianContainer *c = opaque;
3676 c->write[2](c->opaque, addr, bswap32(val));
3679 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3680 swapendian_mem_writeb,
3681 swapendian_mem_writew,
3682 swapendian_mem_writel
3685 static void swapendian_init(int io_index)
3687 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3688 int i;
3690 /* Swap mmio for big endian targets */
3691 c->opaque = io_mem_opaque[io_index];
3692 for (i = 0; i < 3; i++) {
3693 c->read[i] = io_mem_read[io_index][i];
3694 c->write[i] = io_mem_write[io_index][i];
3696 io_mem_read[io_index][i] = swapendian_readfn[i];
3697 io_mem_write[io_index][i] = swapendian_writefn[i];
3699 io_mem_opaque[io_index] = c;
3702 static void swapendian_del(int io_index)
3704 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3705 qemu_free(io_mem_opaque[io_index]);
3709 /* mem_read and mem_write are arrays of functions containing the
3710 function to access byte (index 0), word (index 1) and dword (index
3711 2). Functions can be omitted with a NULL function pointer.
3712 If io_index is non zero, the corresponding io zone is
3713 modified. If it is zero, a new io zone is allocated. The return
3714 value can be used with cpu_register_physical_memory(). (-1) is
3715 returned if error. */
3716 static int cpu_register_io_memory_fixed(int io_index,
3717 CPUReadMemoryFunc * const *mem_read,
3718 CPUWriteMemoryFunc * const *mem_write,
3719 void *opaque, enum device_endian endian)
3721 int i;
3723 if (io_index <= 0) {
3724 io_index = get_free_io_mem_idx();
3725 if (io_index == -1)
3726 return io_index;
3727 } else {
3728 io_index >>= IO_MEM_SHIFT;
3729 if (io_index >= IO_MEM_NB_ENTRIES)
3730 return -1;
3733 for (i = 0; i < 3; ++i) {
3734 io_mem_read[io_index][i]
3735 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3737 for (i = 0; i < 3; ++i) {
3738 io_mem_write[io_index][i]
3739 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3741 io_mem_opaque[io_index] = opaque;
3743 switch (endian) {
3744 case DEVICE_BIG_ENDIAN:
3745 #ifndef TARGET_WORDS_BIGENDIAN
3746 swapendian_init(io_index);
3747 #endif
3748 break;
3749 case DEVICE_LITTLE_ENDIAN:
3750 #ifdef TARGET_WORDS_BIGENDIAN
3751 swapendian_init(io_index);
3752 #endif
3753 break;
3754 case DEVICE_NATIVE_ENDIAN:
3755 default:
3756 break;
3759 return (io_index << IO_MEM_SHIFT);
3762 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3763 CPUWriteMemoryFunc * const *mem_write,
3764 void *opaque, enum device_endian endian)
3766 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3769 void cpu_unregister_io_memory(int io_table_address)
3771 int i;
3772 int io_index = io_table_address >> IO_MEM_SHIFT;
3774 swapendian_del(io_index);
3776 for (i=0;i < 3; i++) {
3777 io_mem_read[io_index][i] = unassigned_mem_read[i];
3778 io_mem_write[io_index][i] = unassigned_mem_write[i];
3780 io_mem_opaque[io_index] = NULL;
3781 io_mem_used[io_index] = 0;
3784 static void io_mem_init(void)
3786 int i;
3788 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3789 unassigned_mem_write, NULL,
3790 DEVICE_NATIVE_ENDIAN);
3791 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3792 unassigned_mem_write, NULL,
3793 DEVICE_NATIVE_ENDIAN);
3794 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3795 notdirty_mem_write, NULL,
3796 DEVICE_NATIVE_ENDIAN);
3797 for (i=0; i<5; i++)
3798 io_mem_used[i] = 1;
3800 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3801 watch_mem_write, NULL,
3802 DEVICE_NATIVE_ENDIAN);
3805 #endif /* !defined(CONFIG_USER_ONLY) */
3807 /* physical memory access (slow version, mainly for debug) */
3808 #if defined(CONFIG_USER_ONLY)
3809 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3810 uint8_t *buf, int len, int is_write)
3812 int l, flags;
3813 target_ulong page;
3814 void * p;
3816 while (len > 0) {
3817 page = addr & TARGET_PAGE_MASK;
3818 l = (page + TARGET_PAGE_SIZE) - addr;
3819 if (l > len)
3820 l = len;
3821 flags = page_get_flags(page);
3822 if (!(flags & PAGE_VALID))
3823 return -1;
3824 if (is_write) {
3825 if (!(flags & PAGE_WRITE))
3826 return -1;
3827 /* XXX: this code should not depend on lock_user */
3828 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3829 return -1;
3830 memcpy(p, buf, l);
3831 unlock_user(p, addr, l);
3832 } else {
3833 if (!(flags & PAGE_READ))
3834 return -1;
3835 /* XXX: this code should not depend on lock_user */
3836 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3837 return -1;
3838 memcpy(buf, p, l);
3839 unlock_user(p, addr, 0);
3841 len -= l;
3842 buf += l;
3843 addr += l;
3845 return 0;
3848 #else
3849 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3850 int len, int is_write)
3852 int l, io_index;
3853 uint8_t *ptr;
3854 uint32_t val;
3855 target_phys_addr_t page;
3856 unsigned long pd;
3857 PhysPageDesc *p;
3859 while (len > 0) {
3860 page = addr & TARGET_PAGE_MASK;
3861 l = (page + TARGET_PAGE_SIZE) - addr;
3862 if (l > len)
3863 l = len;
3864 p = phys_page_find(page >> TARGET_PAGE_BITS);
3865 if (!p) {
3866 pd = IO_MEM_UNASSIGNED;
3867 } else {
3868 pd = p->phys_offset;
3871 if (is_write) {
3872 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3873 target_phys_addr_t addr1 = addr;
3874 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3875 if (p)
3876 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3877 /* XXX: could force cpu_single_env to NULL to avoid
3878 potential bugs */
3879 if (l >= 4 && ((addr1 & 3) == 0)) {
3880 /* 32 bit write access */
3881 val = ldl_p(buf);
3882 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3883 l = 4;
3884 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3885 /* 16 bit write access */
3886 val = lduw_p(buf);
3887 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3888 l = 2;
3889 } else {
3890 /* 8 bit write access */
3891 val = ldub_p(buf);
3892 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3893 l = 1;
3895 } else {
3896 unsigned long addr1;
3897 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3898 /* RAM case */
3899 ptr = qemu_get_ram_ptr(addr1);
3900 memcpy(ptr, buf, l);
3901 if (!cpu_physical_memory_is_dirty(addr1)) {
3902 /* invalidate code */
3903 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3904 /* set dirty bit */
3905 cpu_physical_memory_set_dirty_flags(
3906 addr1, (0xff & ~CODE_DIRTY_FLAG));
3908 qemu_put_ram_ptr(ptr);
3910 } else {
3911 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3912 !(pd & IO_MEM_ROMD)) {
3913 target_phys_addr_t addr1 = addr;
3914 /* I/O case */
3915 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3916 if (p)
3917 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3918 if (l >= 4 && ((addr1 & 3) == 0)) {
3919 /* 32 bit read access */
3920 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3921 stl_p(buf, val);
3922 l = 4;
3923 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3924 /* 16 bit read access */
3925 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3926 stw_p(buf, val);
3927 l = 2;
3928 } else {
3929 /* 8 bit read access */
3930 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3931 stb_p(buf, val);
3932 l = 1;
3934 } else {
3935 /* RAM case */
3936 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3937 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3938 qemu_put_ram_ptr(ptr);
3941 len -= l;
3942 buf += l;
3943 addr += l;
3947 /* used for ROM loading : can write in RAM and ROM */
3948 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3949 const uint8_t *buf, int len)
3951 int l;
3952 uint8_t *ptr;
3953 target_phys_addr_t page;
3954 unsigned long pd;
3955 PhysPageDesc *p;
3957 while (len > 0) {
3958 page = addr & TARGET_PAGE_MASK;
3959 l = (page + TARGET_PAGE_SIZE) - addr;
3960 if (l > len)
3961 l = len;
3962 p = phys_page_find(page >> TARGET_PAGE_BITS);
3963 if (!p) {
3964 pd = IO_MEM_UNASSIGNED;
3965 } else {
3966 pd = p->phys_offset;
3969 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3970 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3971 !(pd & IO_MEM_ROMD)) {
3972 /* do nothing */
3973 } else {
3974 unsigned long addr1;
3975 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3976 /* ROM/RAM case */
3977 ptr = qemu_get_ram_ptr(addr1);
3978 memcpy(ptr, buf, l);
3979 qemu_put_ram_ptr(ptr);
3981 len -= l;
3982 buf += l;
3983 addr += l;
3987 typedef struct {
3988 void *buffer;
3989 target_phys_addr_t addr;
3990 target_phys_addr_t len;
3991 } BounceBuffer;
3993 static BounceBuffer bounce;
3995 typedef struct MapClient {
3996 void *opaque;
3997 void (*callback)(void *opaque);
3998 QLIST_ENTRY(MapClient) link;
3999 } MapClient;
4001 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4002 = QLIST_HEAD_INITIALIZER(map_client_list);
4004 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4006 MapClient *client = qemu_malloc(sizeof(*client));
4008 client->opaque = opaque;
4009 client->callback = callback;
4010 QLIST_INSERT_HEAD(&map_client_list, client, link);
4011 return client;
4014 void cpu_unregister_map_client(void *_client)
4016 MapClient *client = (MapClient *)_client;
4018 QLIST_REMOVE(client, link);
4019 qemu_free(client);
4022 static void cpu_notify_map_clients(void)
4024 MapClient *client;
4026 while (!QLIST_EMPTY(&map_client_list)) {
4027 client = QLIST_FIRST(&map_client_list);
4028 client->callback(client->opaque);
4029 cpu_unregister_map_client(client);
4033 /* Map a physical memory region into a host virtual address.
4034 * May map a subset of the requested range, given by and returned in *plen.
4035 * May return NULL if resources needed to perform the mapping are exhausted.
4036 * Use only for reads OR writes - not for read-modify-write operations.
4037 * Use cpu_register_map_client() to know when retrying the map operation is
4038 * likely to succeed.
4040 void *cpu_physical_memory_map(target_phys_addr_t addr,
4041 target_phys_addr_t *plen,
4042 int is_write)
4044 target_phys_addr_t len = *plen;
4045 target_phys_addr_t todo = 0;
4046 int l;
4047 target_phys_addr_t page;
4048 unsigned long pd;
4049 PhysPageDesc *p;
4050 target_phys_addr_t addr1 = addr;
4052 while (len > 0) {
4053 page = addr & TARGET_PAGE_MASK;
4054 l = (page + TARGET_PAGE_SIZE) - addr;
4055 if (l > len)
4056 l = len;
4057 p = phys_page_find(page >> TARGET_PAGE_BITS);
4058 if (!p) {
4059 pd = IO_MEM_UNASSIGNED;
4060 } else {
4061 pd = p->phys_offset;
4064 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4065 if (todo || bounce.buffer) {
4066 break;
4068 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4069 bounce.addr = addr;
4070 bounce.len = l;
4071 if (!is_write) {
4072 cpu_physical_memory_read(addr, bounce.buffer, l);
4075 *plen = l;
4076 return bounce.buffer;
4079 len -= l;
4080 addr += l;
4081 todo += l;
4083 *plen = todo;
4084 return qemu_ram_ptr_length(addr1, plen);
4087 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4088 * Will also mark the memory as dirty if is_write == 1. access_len gives
4089 * the amount of memory that was actually read or written by the caller.
4091 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4092 int is_write, target_phys_addr_t access_len)
4094 if (buffer != bounce.buffer) {
4095 if (is_write) {
4096 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4097 while (access_len) {
4098 unsigned l;
4099 l = TARGET_PAGE_SIZE;
4100 if (l > access_len)
4101 l = access_len;
4102 if (!cpu_physical_memory_is_dirty(addr1)) {
4103 /* invalidate code */
4104 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4105 /* set dirty bit */
4106 cpu_physical_memory_set_dirty_flags(
4107 addr1, (0xff & ~CODE_DIRTY_FLAG));
4109 addr1 += l;
4110 access_len -= l;
4113 if (xen_mapcache_enabled()) {
4114 qemu_invalidate_entry(buffer);
4116 return;
4118 if (is_write) {
4119 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4121 qemu_vfree(bounce.buffer);
4122 bounce.buffer = NULL;
4123 cpu_notify_map_clients();
4126 /* warning: addr must be aligned */
4127 uint32_t ldl_phys(target_phys_addr_t addr)
4129 int io_index;
4130 uint8_t *ptr;
4131 uint32_t val;
4132 unsigned long pd;
4133 PhysPageDesc *p;
4135 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4136 if (!p) {
4137 pd = IO_MEM_UNASSIGNED;
4138 } else {
4139 pd = p->phys_offset;
4142 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4143 !(pd & IO_MEM_ROMD)) {
4144 /* I/O case */
4145 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4146 if (p)
4147 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4148 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4149 } else {
4150 /* RAM case */
4151 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4152 (addr & ~TARGET_PAGE_MASK);
4153 val = ldl_p(ptr);
4155 return val;
4158 /* warning: addr must be aligned */
4159 uint64_t ldq_phys(target_phys_addr_t addr)
4161 int io_index;
4162 uint8_t *ptr;
4163 uint64_t val;
4164 unsigned long pd;
4165 PhysPageDesc *p;
4167 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4168 if (!p) {
4169 pd = IO_MEM_UNASSIGNED;
4170 } else {
4171 pd = p->phys_offset;
4174 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4175 !(pd & IO_MEM_ROMD)) {
4176 /* I/O case */
4177 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4178 if (p)
4179 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4180 #ifdef TARGET_WORDS_BIGENDIAN
4181 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4182 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4183 #else
4184 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4185 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4186 #endif
4187 } else {
4188 /* RAM case */
4189 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4190 (addr & ~TARGET_PAGE_MASK);
4191 val = ldq_p(ptr);
4193 return val;
4196 /* XXX: optimize */
4197 uint32_t ldub_phys(target_phys_addr_t addr)
4199 uint8_t val;
4200 cpu_physical_memory_read(addr, &val, 1);
4201 return val;
4204 /* warning: addr must be aligned */
4205 uint32_t lduw_phys(target_phys_addr_t addr)
4207 int io_index;
4208 uint8_t *ptr;
4209 uint64_t val;
4210 unsigned long pd;
4211 PhysPageDesc *p;
4213 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4214 if (!p) {
4215 pd = IO_MEM_UNASSIGNED;
4216 } else {
4217 pd = p->phys_offset;
4220 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4221 !(pd & IO_MEM_ROMD)) {
4222 /* I/O case */
4223 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4224 if (p)
4225 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4226 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4227 } else {
4228 /* RAM case */
4229 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4230 (addr & ~TARGET_PAGE_MASK);
4231 val = lduw_p(ptr);
4233 return val;
4236 /* warning: addr must be aligned. The ram page is not masked as dirty
4237 and the code inside is not invalidated. It is useful if the dirty
4238 bits are used to track modified PTEs */
4239 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4241 int io_index;
4242 uint8_t *ptr;
4243 unsigned long pd;
4244 PhysPageDesc *p;
4246 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4247 if (!p) {
4248 pd = IO_MEM_UNASSIGNED;
4249 } else {
4250 pd = p->phys_offset;
4253 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4254 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4255 if (p)
4256 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4257 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4258 } else {
4259 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4260 ptr = qemu_get_ram_ptr(addr1);
4261 stl_p(ptr, val);
4263 if (unlikely(in_migration)) {
4264 if (!cpu_physical_memory_is_dirty(addr1)) {
4265 /* invalidate code */
4266 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4267 /* set dirty bit */
4268 cpu_physical_memory_set_dirty_flags(
4269 addr1, (0xff & ~CODE_DIRTY_FLAG));
4275 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4277 int io_index;
4278 uint8_t *ptr;
4279 unsigned long pd;
4280 PhysPageDesc *p;
4282 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4283 if (!p) {
4284 pd = IO_MEM_UNASSIGNED;
4285 } else {
4286 pd = p->phys_offset;
4289 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4290 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4291 if (p)
4292 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4293 #ifdef TARGET_WORDS_BIGENDIAN
4294 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4295 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4296 #else
4297 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4298 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4299 #endif
4300 } else {
4301 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4302 (addr & ~TARGET_PAGE_MASK);
4303 stq_p(ptr, val);
4307 /* warning: addr must be aligned */
4308 void stl_phys(target_phys_addr_t addr, uint32_t val)
4310 int io_index;
4311 uint8_t *ptr;
4312 unsigned long pd;
4313 PhysPageDesc *p;
4315 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4316 if (!p) {
4317 pd = IO_MEM_UNASSIGNED;
4318 } else {
4319 pd = p->phys_offset;
4322 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4323 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4324 if (p)
4325 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4326 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4327 } else {
4328 unsigned long addr1;
4329 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4330 /* RAM case */
4331 ptr = qemu_get_ram_ptr(addr1);
4332 stl_p(ptr, val);
4333 if (!cpu_physical_memory_is_dirty(addr1)) {
4334 /* invalidate code */
4335 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4336 /* set dirty bit */
4337 cpu_physical_memory_set_dirty_flags(addr1,
4338 (0xff & ~CODE_DIRTY_FLAG));
4343 /* XXX: optimize */
4344 void stb_phys(target_phys_addr_t addr, uint32_t val)
4346 uint8_t v = val;
4347 cpu_physical_memory_write(addr, &v, 1);
4350 /* warning: addr must be aligned */
4351 void stw_phys(target_phys_addr_t addr, uint32_t val)
4353 int io_index;
4354 uint8_t *ptr;
4355 unsigned long pd;
4356 PhysPageDesc *p;
4358 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4359 if (!p) {
4360 pd = IO_MEM_UNASSIGNED;
4361 } else {
4362 pd = p->phys_offset;
4365 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4366 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4367 if (p)
4368 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4369 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4370 } else {
4371 unsigned long addr1;
4372 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4373 /* RAM case */
4374 ptr = qemu_get_ram_ptr(addr1);
4375 stw_p(ptr, val);
4376 if (!cpu_physical_memory_is_dirty(addr1)) {
4377 /* invalidate code */
4378 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4379 /* set dirty bit */
4380 cpu_physical_memory_set_dirty_flags(addr1,
4381 (0xff & ~CODE_DIRTY_FLAG));
4386 /* XXX: optimize */
4387 void stq_phys(target_phys_addr_t addr, uint64_t val)
4389 val = tswap64(val);
4390 cpu_physical_memory_write(addr, &val, 8);
4393 /* virtual memory access for debug (includes writing to ROM) */
4394 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4395 uint8_t *buf, int len, int is_write)
4397 int l;
4398 target_phys_addr_t phys_addr;
4399 target_ulong page;
4401 while (len > 0) {
4402 page = addr & TARGET_PAGE_MASK;
4403 phys_addr = cpu_get_phys_page_debug(env, page);
4404 /* if no physical page mapped, return an error */
4405 if (phys_addr == -1)
4406 return -1;
4407 l = (page + TARGET_PAGE_SIZE) - addr;
4408 if (l > len)
4409 l = len;
4410 phys_addr += (addr & ~TARGET_PAGE_MASK);
4411 if (is_write)
4412 cpu_physical_memory_write_rom(phys_addr, buf, l);
4413 else
4414 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4415 len -= l;
4416 buf += l;
4417 addr += l;
4419 return 0;
4421 #endif
4423 /* in deterministic execution mode, instructions doing device I/Os
4424 must be at the end of the TB */
4425 void cpu_io_recompile(CPUState *env, void *retaddr)
4427 TranslationBlock *tb;
4428 uint32_t n, cflags;
4429 target_ulong pc, cs_base;
4430 uint64_t flags;
4432 tb = tb_find_pc((unsigned long)retaddr);
4433 if (!tb) {
4434 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4435 retaddr);
4437 n = env->icount_decr.u16.low + tb->icount;
4438 cpu_restore_state(tb, env, (unsigned long)retaddr);
4439 /* Calculate how many instructions had been executed before the fault
4440 occurred. */
4441 n = n - env->icount_decr.u16.low;
4442 /* Generate a new TB ending on the I/O insn. */
4443 n++;
4444 /* On MIPS and SH, delay slot instructions can only be restarted if
4445 they were already the first instruction in the TB. If this is not
4446 the first instruction in a TB then re-execute the preceding
4447 branch. */
4448 #if defined(TARGET_MIPS)
4449 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4450 env->active_tc.PC -= 4;
4451 env->icount_decr.u16.low++;
4452 env->hflags &= ~MIPS_HFLAG_BMASK;
4454 #elif defined(TARGET_SH4)
4455 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4456 && n > 1) {
4457 env->pc -= 2;
4458 env->icount_decr.u16.low++;
4459 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4461 #endif
4462 /* This should never happen. */
4463 if (n > CF_COUNT_MASK)
4464 cpu_abort(env, "TB too big during recompile");
4466 cflags = n | CF_LAST_IO;
4467 pc = tb->pc;
4468 cs_base = tb->cs_base;
4469 flags = tb->flags;
4470 tb_phys_invalidate(tb, -1);
4471 /* FIXME: In theory this could raise an exception. In practice
4472 we have already translated the block once so it's probably ok. */
4473 tb_gen_code(env, pc, cs_base, flags, cflags);
4474 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4475 the first in the TB) then we end up generating a whole new TB and
4476 repeating the fault, which is horribly inefficient.
4477 Better would be to execute just this insn uncached, or generate a
4478 second new TB. */
4479 cpu_resume_from_signal(env, NULL);
4482 #if !defined(CONFIG_USER_ONLY)
4484 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4486 int i, target_code_size, max_target_code_size;
4487 int direct_jmp_count, direct_jmp2_count, cross_page;
4488 TranslationBlock *tb;
4490 target_code_size = 0;
4491 max_target_code_size = 0;
4492 cross_page = 0;
4493 direct_jmp_count = 0;
4494 direct_jmp2_count = 0;
4495 for(i = 0; i < nb_tbs; i++) {
4496 tb = &tbs[i];
4497 target_code_size += tb->size;
4498 if (tb->size > max_target_code_size)
4499 max_target_code_size = tb->size;
4500 if (tb->page_addr[1] != -1)
4501 cross_page++;
4502 if (tb->tb_next_offset[0] != 0xffff) {
4503 direct_jmp_count++;
4504 if (tb->tb_next_offset[1] != 0xffff) {
4505 direct_jmp2_count++;
4509 /* XXX: avoid using doubles ? */
4510 cpu_fprintf(f, "Translation buffer state:\n");
4511 cpu_fprintf(f, "gen code size %td/%ld\n",
4512 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4513 cpu_fprintf(f, "TB count %d/%d\n",
4514 nb_tbs, code_gen_max_blocks);
4515 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4516 nb_tbs ? target_code_size / nb_tbs : 0,
4517 max_target_code_size);
4518 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4519 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4520 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4521 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4522 cross_page,
4523 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4524 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4525 direct_jmp_count,
4526 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4527 direct_jmp2_count,
4528 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4529 cpu_fprintf(f, "\nStatistics:\n");
4530 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4531 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4532 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4533 tcg_dump_info(f, cpu_fprintf);
4536 #define MMUSUFFIX _cmmu
4537 #define GETPC() NULL
4538 #define env cpu_single_env
4539 #define SOFTMMU_CODE_ACCESS
4541 #define SHIFT 0
4542 #include "softmmu_template.h"
4544 #define SHIFT 1
4545 #include "softmmu_template.h"
4547 #define SHIFT 2
4548 #include "softmmu_template.h"
4550 #define SHIFT 3
4551 #include "softmmu_template.h"
4553 #undef env
4555 #endif