raw-posix: Always check paio_init result
[qemu.git] / exec.c
blob476b507e5ec9ce13a09fd710a77c50aa8fdd1448
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 //#define DEBUG_TB_INVALIDATE
61 //#define DEBUG_FLUSH
62 //#define DEBUG_TLB
63 //#define DEBUG_UNASSIGNED
65 /* make various TB consistency checks */
66 //#define DEBUG_TB_CHECK
67 //#define DEBUG_TLB_CHECK
69 //#define DEBUG_IOPORT
70 //#define DEBUG_SUBPAGE
72 #if !defined(CONFIG_USER_ONLY)
73 /* TB consistency checks only implemented for usermode emulation. */
74 #undef DEBUG_TB_CHECK
75 #endif
77 #define SMC_BITMAP_USE_THRESHOLD 10
79 static TranslationBlock *tbs;
80 static int code_gen_max_blocks;
81 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
82 static int nb_tbs;
83 /* any access to the tbs or the page table must use this lock */
84 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
86 #if defined(__arm__) || defined(__sparc_v9__)
87 /* The prologue must be reachable with a direct jump. ARM and Sparc64
88 have limited branch ranges (possibly also PPC) so place it in a
89 section close to code segment. */
90 #define code_gen_section \
91 __attribute__((__section__(".gen_code"))) \
92 __attribute__((aligned (32)))
93 #elif defined(_WIN32)
94 /* Maximum alignment for Win32 is 16. */
95 #define code_gen_section \
96 __attribute__((aligned (16)))
97 #else
98 #define code_gen_section \
99 __attribute__((aligned (32)))
100 #endif
102 uint8_t code_gen_prologue[1024] code_gen_section;
103 static uint8_t *code_gen_buffer;
104 static unsigned long code_gen_buffer_size;
105 /* threshold to flush the translated code buffer */
106 static unsigned long code_gen_buffer_max_size;
107 static uint8_t *code_gen_ptr;
109 #if !defined(CONFIG_USER_ONLY)
110 int phys_ram_fd;
111 static int in_migration;
113 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
115 static MemoryRegion *system_memory;
117 #endif
119 CPUState *first_cpu;
120 /* current CPU in the current thread. It is only valid inside
121 cpu_exec() */
122 CPUState *cpu_single_env;
123 /* 0 = Do not count executed instructions.
124 1 = Precise instruction counting.
125 2 = Adaptive rate instruction counting. */
126 int use_icount = 0;
127 /* Current instruction counter. While executing translated code this may
128 include some instructions that have not yet been executed. */
129 int64_t qemu_icount;
131 typedef struct PageDesc {
132 /* list of TBs intersecting this ram page */
133 TranslationBlock *first_tb;
134 /* in order to optimize self modifying code, we count the number
135 of lookups we do to a given page to use a bitmap */
136 unsigned int code_write_count;
137 uint8_t *code_bitmap;
138 #if defined(CONFIG_USER_ONLY)
139 unsigned long flags;
140 #endif
141 } PageDesc;
143 /* In system mode we want L1_MAP to be based on ram offsets,
144 while in user mode we want it to be based on virtual addresses. */
145 #if !defined(CONFIG_USER_ONLY)
146 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
147 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
148 #else
149 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
150 #endif
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
153 #endif
155 /* Size of the L2 (and L3, etc) page tables. */
156 #define L2_BITS 10
157 #define L2_SIZE (1 << L2_BITS)
159 /* The bits remaining after N lower levels of page tables. */
160 #define P_L1_BITS_REM \
161 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
162 #define V_L1_BITS_REM \
163 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
165 /* Size of the L1 page table. Avoid silly small sizes. */
166 #if P_L1_BITS_REM < 4
167 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
168 #else
169 #define P_L1_BITS P_L1_BITS_REM
170 #endif
172 #if V_L1_BITS_REM < 4
173 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
174 #else
175 #define V_L1_BITS V_L1_BITS_REM
176 #endif
178 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
179 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
181 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
182 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
184 unsigned long qemu_real_host_page_size;
185 unsigned long qemu_host_page_bits;
186 unsigned long qemu_host_page_size;
187 unsigned long qemu_host_page_mask;
189 /* This is a multi-level map on the virtual address space.
190 The bottom level has pointers to PageDesc. */
191 static void *l1_map[V_L1_SIZE];
193 #if !defined(CONFIG_USER_ONLY)
194 typedef struct PhysPageDesc {
195 /* offset in host memory of the page + io_index in the low bits */
196 ram_addr_t phys_offset;
197 ram_addr_t region_offset;
198 } PhysPageDesc;
200 /* This is a multi-level map on the physical address space.
201 The bottom level has pointers to PhysPageDesc. */
202 static void *l1_phys_map[P_L1_SIZE];
204 static void io_mem_init(void);
205 static void memory_map_init(void);
207 /* io memory support */
208 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
209 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
210 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
211 static char io_mem_used[IO_MEM_NB_ENTRIES];
212 static int io_mem_watch;
213 #endif
215 /* log support */
216 #ifdef WIN32
217 static const char *logfilename = "qemu.log";
218 #else
219 static const char *logfilename = "/tmp/qemu.log";
220 #endif
221 FILE *logfile;
222 int loglevel;
223 static int log_append = 0;
225 /* statistics */
226 #if !defined(CONFIG_USER_ONLY)
227 static int tlb_flush_count;
228 #endif
229 static int tb_flush_count;
230 static int tb_phys_invalidate_count;
232 #ifdef _WIN32
233 static void map_exec(void *addr, long size)
235 DWORD old_protect;
236 VirtualProtect(addr, size,
237 PAGE_EXECUTE_READWRITE, &old_protect);
240 #else
241 static void map_exec(void *addr, long size)
243 unsigned long start, end, page_size;
245 page_size = getpagesize();
246 start = (unsigned long)addr;
247 start &= ~(page_size - 1);
249 end = (unsigned long)addr + size;
250 end += page_size - 1;
251 end &= ~(page_size - 1);
253 mprotect((void *)start, end - start,
254 PROT_READ | PROT_WRITE | PROT_EXEC);
256 #endif
258 static void page_init(void)
260 /* NOTE: we can always suppose that qemu_host_page_size >=
261 TARGET_PAGE_SIZE */
262 #ifdef _WIN32
264 SYSTEM_INFO system_info;
266 GetSystemInfo(&system_info);
267 qemu_real_host_page_size = system_info.dwPageSize;
269 #else
270 qemu_real_host_page_size = getpagesize();
271 #endif
272 if (qemu_host_page_size == 0)
273 qemu_host_page_size = qemu_real_host_page_size;
274 if (qemu_host_page_size < TARGET_PAGE_SIZE)
275 qemu_host_page_size = TARGET_PAGE_SIZE;
276 qemu_host_page_bits = 0;
277 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
278 qemu_host_page_bits++;
279 qemu_host_page_mask = ~(qemu_host_page_size - 1);
281 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
283 #ifdef HAVE_KINFO_GETVMMAP
284 struct kinfo_vmentry *freep;
285 int i, cnt;
287 freep = kinfo_getvmmap(getpid(), &cnt);
288 if (freep) {
289 mmap_lock();
290 for (i = 0; i < cnt; i++) {
291 unsigned long startaddr, endaddr;
293 startaddr = freep[i].kve_start;
294 endaddr = freep[i].kve_end;
295 if (h2g_valid(startaddr)) {
296 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
298 if (h2g_valid(endaddr)) {
299 endaddr = h2g(endaddr);
300 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
301 } else {
302 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
303 endaddr = ~0ul;
304 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
305 #endif
309 free(freep);
310 mmap_unlock();
312 #else
313 FILE *f;
315 last_brk = (unsigned long)sbrk(0);
317 f = fopen("/compat/linux/proc/self/maps", "r");
318 if (f) {
319 mmap_lock();
321 do {
322 unsigned long startaddr, endaddr;
323 int n;
325 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
327 if (n == 2 && h2g_valid(startaddr)) {
328 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
330 if (h2g_valid(endaddr)) {
331 endaddr = h2g(endaddr);
332 } else {
333 endaddr = ~0ul;
335 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
337 } while (!feof(f));
339 fclose(f);
340 mmap_unlock();
342 #endif
344 #endif
347 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
349 PageDesc *pd;
350 void **lp;
351 int i;
353 #if defined(CONFIG_USER_ONLY)
354 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
355 # define ALLOC(P, SIZE) \
356 do { \
357 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
358 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
359 } while (0)
360 #else
361 # define ALLOC(P, SIZE) \
362 do { P = qemu_mallocz(SIZE); } while (0)
363 #endif
365 /* Level 1. Always allocated. */
366 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
368 /* Level 2..N-1. */
369 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
370 void **p = *lp;
372 if (p == NULL) {
373 if (!alloc) {
374 return NULL;
376 ALLOC(p, sizeof(void *) * L2_SIZE);
377 *lp = p;
380 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
383 pd = *lp;
384 if (pd == NULL) {
385 if (!alloc) {
386 return NULL;
388 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
389 *lp = pd;
392 #undef ALLOC
394 return pd + (index & (L2_SIZE - 1));
397 static inline PageDesc *page_find(tb_page_addr_t index)
399 return page_find_alloc(index, 0);
402 #if !defined(CONFIG_USER_ONLY)
403 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
405 PhysPageDesc *pd;
406 void **lp;
407 int i;
409 /* Level 1. Always allocated. */
410 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
412 /* Level 2..N-1. */
413 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
414 void **p = *lp;
415 if (p == NULL) {
416 if (!alloc) {
417 return NULL;
419 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
421 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
424 pd = *lp;
425 if (pd == NULL) {
426 int i;
428 if (!alloc) {
429 return NULL;
432 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
434 for (i = 0; i < L2_SIZE; i++) {
435 pd[i].phys_offset = IO_MEM_UNASSIGNED;
436 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
440 return pd + (index & (L2_SIZE - 1));
443 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
445 return phys_page_find_alloc(index, 0);
448 static void tlb_protect_code(ram_addr_t ram_addr);
449 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
450 target_ulong vaddr);
451 #define mmap_lock() do { } while(0)
452 #define mmap_unlock() do { } while(0)
453 #endif
455 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
457 #if defined(CONFIG_USER_ONLY)
458 /* Currently it is not recommended to allocate big chunks of data in
459 user mode. It will change when a dedicated libc will be used */
460 #define USE_STATIC_CODE_GEN_BUFFER
461 #endif
463 #ifdef USE_STATIC_CODE_GEN_BUFFER
464 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
465 __attribute__((aligned (CODE_GEN_ALIGN)));
466 #endif
468 static void code_gen_alloc(unsigned long tb_size)
470 #ifdef USE_STATIC_CODE_GEN_BUFFER
471 code_gen_buffer = static_code_gen_buffer;
472 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
473 map_exec(code_gen_buffer, code_gen_buffer_size);
474 #else
475 code_gen_buffer_size = tb_size;
476 if (code_gen_buffer_size == 0) {
477 #if defined(CONFIG_USER_ONLY)
478 /* in user mode, phys_ram_size is not meaningful */
479 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
480 #else
481 /* XXX: needs adjustments */
482 code_gen_buffer_size = (unsigned long)(ram_size / 4);
483 #endif
485 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
486 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
487 /* The code gen buffer location may have constraints depending on
488 the host cpu and OS */
489 #if defined(__linux__)
491 int flags;
492 void *start = NULL;
494 flags = MAP_PRIVATE | MAP_ANONYMOUS;
495 #if defined(__x86_64__)
496 flags |= MAP_32BIT;
497 /* Cannot map more than that */
498 if (code_gen_buffer_size > (800 * 1024 * 1024))
499 code_gen_buffer_size = (800 * 1024 * 1024);
500 #elif defined(__sparc_v9__)
501 // Map the buffer below 2G, so we can use direct calls and branches
502 flags |= MAP_FIXED;
503 start = (void *) 0x60000000UL;
504 if (code_gen_buffer_size > (512 * 1024 * 1024))
505 code_gen_buffer_size = (512 * 1024 * 1024);
506 #elif defined(__arm__)
507 /* Map the buffer below 32M, so we can use direct calls and branches */
508 flags |= MAP_FIXED;
509 start = (void *) 0x01000000UL;
510 if (code_gen_buffer_size > 16 * 1024 * 1024)
511 code_gen_buffer_size = 16 * 1024 * 1024;
512 #elif defined(__s390x__)
513 /* Map the buffer so that we can use direct calls and branches. */
514 /* We have a +- 4GB range on the branches; leave some slop. */
515 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
516 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
518 start = (void *)0x90000000UL;
519 #endif
520 code_gen_buffer = mmap(start, code_gen_buffer_size,
521 PROT_WRITE | PROT_READ | PROT_EXEC,
522 flags, -1, 0);
523 if (code_gen_buffer == MAP_FAILED) {
524 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
525 exit(1);
528 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
529 || defined(__DragonFly__) || defined(__OpenBSD__)
531 int flags;
532 void *addr = NULL;
533 flags = MAP_PRIVATE | MAP_ANONYMOUS;
534 #if defined(__x86_64__)
535 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
536 * 0x40000000 is free */
537 flags |= MAP_FIXED;
538 addr = (void *)0x40000000;
539 /* Cannot map more than that */
540 if (code_gen_buffer_size > (800 * 1024 * 1024))
541 code_gen_buffer_size = (800 * 1024 * 1024);
542 #elif defined(__sparc_v9__)
543 // Map the buffer below 2G, so we can use direct calls and branches
544 flags |= MAP_FIXED;
545 addr = (void *) 0x60000000UL;
546 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
547 code_gen_buffer_size = (512 * 1024 * 1024);
549 #endif
550 code_gen_buffer = mmap(addr, code_gen_buffer_size,
551 PROT_WRITE | PROT_READ | PROT_EXEC,
552 flags, -1, 0);
553 if (code_gen_buffer == MAP_FAILED) {
554 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
555 exit(1);
558 #else
559 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
560 map_exec(code_gen_buffer, code_gen_buffer_size);
561 #endif
562 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
563 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
564 code_gen_buffer_max_size = code_gen_buffer_size -
565 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
566 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
567 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
570 /* Must be called before using the QEMU cpus. 'tb_size' is the size
571 (in bytes) allocated to the translation buffer. Zero means default
572 size. */
573 void cpu_exec_init_all(unsigned long tb_size)
575 cpu_gen_init();
576 code_gen_alloc(tb_size);
577 code_gen_ptr = code_gen_buffer;
578 page_init();
579 #if !defined(CONFIG_USER_ONLY)
580 memory_map_init();
581 io_mem_init();
582 #endif
583 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
584 /* There's no guest base to take into account, so go ahead and
585 initialize the prologue now. */
586 tcg_prologue_init(&tcg_ctx);
587 #endif
590 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
592 static int cpu_common_post_load(void *opaque, int version_id)
594 CPUState *env = opaque;
596 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
597 version_id is increased. */
598 env->interrupt_request &= ~0x01;
599 tlb_flush(env, 1);
601 return 0;
604 static const VMStateDescription vmstate_cpu_common = {
605 .name = "cpu_common",
606 .version_id = 1,
607 .minimum_version_id = 1,
608 .minimum_version_id_old = 1,
609 .post_load = cpu_common_post_load,
610 .fields = (VMStateField []) {
611 VMSTATE_UINT32(halted, CPUState),
612 VMSTATE_UINT32(interrupt_request, CPUState),
613 VMSTATE_END_OF_LIST()
616 #endif
618 CPUState *qemu_get_cpu(int cpu)
620 CPUState *env = first_cpu;
622 while (env) {
623 if (env->cpu_index == cpu)
624 break;
625 env = env->next_cpu;
628 return env;
631 void cpu_exec_init(CPUState *env)
633 CPUState **penv;
634 int cpu_index;
636 #if defined(CONFIG_USER_ONLY)
637 cpu_list_lock();
638 #endif
639 env->next_cpu = NULL;
640 penv = &first_cpu;
641 cpu_index = 0;
642 while (*penv != NULL) {
643 penv = &(*penv)->next_cpu;
644 cpu_index++;
646 env->cpu_index = cpu_index;
647 env->numa_node = 0;
648 QTAILQ_INIT(&env->breakpoints);
649 QTAILQ_INIT(&env->watchpoints);
650 #ifndef CONFIG_USER_ONLY
651 env->thread_id = qemu_get_thread_id();
652 #endif
653 *penv = env;
654 #if defined(CONFIG_USER_ONLY)
655 cpu_list_unlock();
656 #endif
657 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
658 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
659 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
660 cpu_save, cpu_load, env);
661 #endif
664 /* Allocate a new translation block. Flush the translation buffer if
665 too many translation blocks or too much generated code. */
666 static TranslationBlock *tb_alloc(target_ulong pc)
668 TranslationBlock *tb;
670 if (nb_tbs >= code_gen_max_blocks ||
671 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
672 return NULL;
673 tb = &tbs[nb_tbs++];
674 tb->pc = pc;
675 tb->cflags = 0;
676 return tb;
679 void tb_free(TranslationBlock *tb)
681 /* In practice this is mostly used for single use temporary TB
682 Ignore the hard cases and just back up if this TB happens to
683 be the last one generated. */
684 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
685 code_gen_ptr = tb->tc_ptr;
686 nb_tbs--;
690 static inline void invalidate_page_bitmap(PageDesc *p)
692 if (p->code_bitmap) {
693 qemu_free(p->code_bitmap);
694 p->code_bitmap = NULL;
696 p->code_write_count = 0;
699 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
701 static void page_flush_tb_1 (int level, void **lp)
703 int i;
705 if (*lp == NULL) {
706 return;
708 if (level == 0) {
709 PageDesc *pd = *lp;
710 for (i = 0; i < L2_SIZE; ++i) {
711 pd[i].first_tb = NULL;
712 invalidate_page_bitmap(pd + i);
714 } else {
715 void **pp = *lp;
716 for (i = 0; i < L2_SIZE; ++i) {
717 page_flush_tb_1 (level - 1, pp + i);
722 static void page_flush_tb(void)
724 int i;
725 for (i = 0; i < V_L1_SIZE; i++) {
726 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
730 /* flush all the translation blocks */
731 /* XXX: tb_flush is currently not thread safe */
732 void tb_flush(CPUState *env1)
734 CPUState *env;
735 #if defined(DEBUG_FLUSH)
736 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
737 (unsigned long)(code_gen_ptr - code_gen_buffer),
738 nb_tbs, nb_tbs > 0 ?
739 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
740 #endif
741 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
742 cpu_abort(env1, "Internal error: code buffer overflow\n");
744 nb_tbs = 0;
746 for(env = first_cpu; env != NULL; env = env->next_cpu) {
747 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
750 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
751 page_flush_tb();
753 code_gen_ptr = code_gen_buffer;
754 /* XXX: flush processor icache at this point if cache flush is
755 expensive */
756 tb_flush_count++;
759 #ifdef DEBUG_TB_CHECK
761 static void tb_invalidate_check(target_ulong address)
763 TranslationBlock *tb;
764 int i;
765 address &= TARGET_PAGE_MASK;
766 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
767 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
768 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
769 address >= tb->pc + tb->size)) {
770 printf("ERROR invalidate: address=" TARGET_FMT_lx
771 " PC=%08lx size=%04x\n",
772 address, (long)tb->pc, tb->size);
778 /* verify that all the pages have correct rights for code */
779 static void tb_page_check(void)
781 TranslationBlock *tb;
782 int i, flags1, flags2;
784 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
785 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
786 flags1 = page_get_flags(tb->pc);
787 flags2 = page_get_flags(tb->pc + tb->size - 1);
788 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
789 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
790 (long)tb->pc, tb->size, flags1, flags2);
796 #endif
798 /* invalidate one TB */
799 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
800 int next_offset)
802 TranslationBlock *tb1;
803 for(;;) {
804 tb1 = *ptb;
805 if (tb1 == tb) {
806 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
807 break;
809 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
813 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
815 TranslationBlock *tb1;
816 unsigned int n1;
818 for(;;) {
819 tb1 = *ptb;
820 n1 = (long)tb1 & 3;
821 tb1 = (TranslationBlock *)((long)tb1 & ~3);
822 if (tb1 == tb) {
823 *ptb = tb1->page_next[n1];
824 break;
826 ptb = &tb1->page_next[n1];
830 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
832 TranslationBlock *tb1, **ptb;
833 unsigned int n1;
835 ptb = &tb->jmp_next[n];
836 tb1 = *ptb;
837 if (tb1) {
838 /* find tb(n) in circular list */
839 for(;;) {
840 tb1 = *ptb;
841 n1 = (long)tb1 & 3;
842 tb1 = (TranslationBlock *)((long)tb1 & ~3);
843 if (n1 == n && tb1 == tb)
844 break;
845 if (n1 == 2) {
846 ptb = &tb1->jmp_first;
847 } else {
848 ptb = &tb1->jmp_next[n1];
851 /* now we can suppress tb(n) from the list */
852 *ptb = tb->jmp_next[n];
854 tb->jmp_next[n] = NULL;
858 /* reset the jump entry 'n' of a TB so that it is not chained to
859 another TB */
860 static inline void tb_reset_jump(TranslationBlock *tb, int n)
862 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
865 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
867 CPUState *env;
868 PageDesc *p;
869 unsigned int h, n1;
870 tb_page_addr_t phys_pc;
871 TranslationBlock *tb1, *tb2;
873 /* remove the TB from the hash list */
874 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
875 h = tb_phys_hash_func(phys_pc);
876 tb_remove(&tb_phys_hash[h], tb,
877 offsetof(TranslationBlock, phys_hash_next));
879 /* remove the TB from the page list */
880 if (tb->page_addr[0] != page_addr) {
881 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
882 tb_page_remove(&p->first_tb, tb);
883 invalidate_page_bitmap(p);
885 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
886 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
887 tb_page_remove(&p->first_tb, tb);
888 invalidate_page_bitmap(p);
891 tb_invalidated_flag = 1;
893 /* remove the TB from the hash list */
894 h = tb_jmp_cache_hash_func(tb->pc);
895 for(env = first_cpu; env != NULL; env = env->next_cpu) {
896 if (env->tb_jmp_cache[h] == tb)
897 env->tb_jmp_cache[h] = NULL;
900 /* suppress this TB from the two jump lists */
901 tb_jmp_remove(tb, 0);
902 tb_jmp_remove(tb, 1);
904 /* suppress any remaining jumps to this TB */
905 tb1 = tb->jmp_first;
906 for(;;) {
907 n1 = (long)tb1 & 3;
908 if (n1 == 2)
909 break;
910 tb1 = (TranslationBlock *)((long)tb1 & ~3);
911 tb2 = tb1->jmp_next[n1];
912 tb_reset_jump(tb1, n1);
913 tb1->jmp_next[n1] = NULL;
914 tb1 = tb2;
916 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
918 tb_phys_invalidate_count++;
921 static inline void set_bits(uint8_t *tab, int start, int len)
923 int end, mask, end1;
925 end = start + len;
926 tab += start >> 3;
927 mask = 0xff << (start & 7);
928 if ((start & ~7) == (end & ~7)) {
929 if (start < end) {
930 mask &= ~(0xff << (end & 7));
931 *tab |= mask;
933 } else {
934 *tab++ |= mask;
935 start = (start + 8) & ~7;
936 end1 = end & ~7;
937 while (start < end1) {
938 *tab++ = 0xff;
939 start += 8;
941 if (start < end) {
942 mask = ~(0xff << (end & 7));
943 *tab |= mask;
948 static void build_page_bitmap(PageDesc *p)
950 int n, tb_start, tb_end;
951 TranslationBlock *tb;
953 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
955 tb = p->first_tb;
956 while (tb != NULL) {
957 n = (long)tb & 3;
958 tb = (TranslationBlock *)((long)tb & ~3);
959 /* NOTE: this is subtle as a TB may span two physical pages */
960 if (n == 0) {
961 /* NOTE: tb_end may be after the end of the page, but
962 it is not a problem */
963 tb_start = tb->pc & ~TARGET_PAGE_MASK;
964 tb_end = tb_start + tb->size;
965 if (tb_end > TARGET_PAGE_SIZE)
966 tb_end = TARGET_PAGE_SIZE;
967 } else {
968 tb_start = 0;
969 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
971 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
972 tb = tb->page_next[n];
976 TranslationBlock *tb_gen_code(CPUState *env,
977 target_ulong pc, target_ulong cs_base,
978 int flags, int cflags)
980 TranslationBlock *tb;
981 uint8_t *tc_ptr;
982 tb_page_addr_t phys_pc, phys_page2;
983 target_ulong virt_page2;
984 int code_gen_size;
986 phys_pc = get_page_addr_code(env, pc);
987 tb = tb_alloc(pc);
988 if (!tb) {
989 /* flush must be done */
990 tb_flush(env);
991 /* cannot fail at this point */
992 tb = tb_alloc(pc);
993 /* Don't forget to invalidate previous TB info. */
994 tb_invalidated_flag = 1;
996 tc_ptr = code_gen_ptr;
997 tb->tc_ptr = tc_ptr;
998 tb->cs_base = cs_base;
999 tb->flags = flags;
1000 tb->cflags = cflags;
1001 cpu_gen_code(env, tb, &code_gen_size);
1002 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1004 /* check next page if needed */
1005 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1006 phys_page2 = -1;
1007 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1008 phys_page2 = get_page_addr_code(env, virt_page2);
1010 tb_link_page(tb, phys_pc, phys_page2);
1011 return tb;
1014 /* invalidate all TBs which intersect with the target physical page
1015 starting in range [start;end[. NOTE: start and end must refer to
1016 the same physical page. 'is_cpu_write_access' should be true if called
1017 from a real cpu write access: the virtual CPU will exit the current
1018 TB if code is modified inside this TB. */
1019 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1020 int is_cpu_write_access)
1022 TranslationBlock *tb, *tb_next, *saved_tb;
1023 CPUState *env = cpu_single_env;
1024 tb_page_addr_t tb_start, tb_end;
1025 PageDesc *p;
1026 int n;
1027 #ifdef TARGET_HAS_PRECISE_SMC
1028 int current_tb_not_found = is_cpu_write_access;
1029 TranslationBlock *current_tb = NULL;
1030 int current_tb_modified = 0;
1031 target_ulong current_pc = 0;
1032 target_ulong current_cs_base = 0;
1033 int current_flags = 0;
1034 #endif /* TARGET_HAS_PRECISE_SMC */
1036 p = page_find(start >> TARGET_PAGE_BITS);
1037 if (!p)
1038 return;
1039 if (!p->code_bitmap &&
1040 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1041 is_cpu_write_access) {
1042 /* build code bitmap */
1043 build_page_bitmap(p);
1046 /* we remove all the TBs in the range [start, end[ */
1047 /* XXX: see if in some cases it could be faster to invalidate all the code */
1048 tb = p->first_tb;
1049 while (tb != NULL) {
1050 n = (long)tb & 3;
1051 tb = (TranslationBlock *)((long)tb & ~3);
1052 tb_next = tb->page_next[n];
1053 /* NOTE: this is subtle as a TB may span two physical pages */
1054 if (n == 0) {
1055 /* NOTE: tb_end may be after the end of the page, but
1056 it is not a problem */
1057 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1058 tb_end = tb_start + tb->size;
1059 } else {
1060 tb_start = tb->page_addr[1];
1061 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1063 if (!(tb_end <= start || tb_start >= end)) {
1064 #ifdef TARGET_HAS_PRECISE_SMC
1065 if (current_tb_not_found) {
1066 current_tb_not_found = 0;
1067 current_tb = NULL;
1068 if (env->mem_io_pc) {
1069 /* now we have a real cpu fault */
1070 current_tb = tb_find_pc(env->mem_io_pc);
1073 if (current_tb == tb &&
1074 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1075 /* If we are modifying the current TB, we must stop
1076 its execution. We could be more precise by checking
1077 that the modification is after the current PC, but it
1078 would require a specialized function to partially
1079 restore the CPU state */
1081 current_tb_modified = 1;
1082 cpu_restore_state(current_tb, env, env->mem_io_pc);
1083 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1084 &current_flags);
1086 #endif /* TARGET_HAS_PRECISE_SMC */
1087 /* we need to do that to handle the case where a signal
1088 occurs while doing tb_phys_invalidate() */
1089 saved_tb = NULL;
1090 if (env) {
1091 saved_tb = env->current_tb;
1092 env->current_tb = NULL;
1094 tb_phys_invalidate(tb, -1);
1095 if (env) {
1096 env->current_tb = saved_tb;
1097 if (env->interrupt_request && env->current_tb)
1098 cpu_interrupt(env, env->interrupt_request);
1101 tb = tb_next;
1103 #if !defined(CONFIG_USER_ONLY)
1104 /* if no code remaining, no need to continue to use slow writes */
1105 if (!p->first_tb) {
1106 invalidate_page_bitmap(p);
1107 if (is_cpu_write_access) {
1108 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1111 #endif
1112 #ifdef TARGET_HAS_PRECISE_SMC
1113 if (current_tb_modified) {
1114 /* we generate a block containing just the instruction
1115 modifying the memory. It will ensure that it cannot modify
1116 itself */
1117 env->current_tb = NULL;
1118 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1119 cpu_resume_from_signal(env, NULL);
1121 #endif
1124 /* len must be <= 8 and start must be a multiple of len */
1125 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1127 PageDesc *p;
1128 int offset, b;
1129 #if 0
1130 if (1) {
1131 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1132 cpu_single_env->mem_io_vaddr, len,
1133 cpu_single_env->eip,
1134 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1136 #endif
1137 p = page_find(start >> TARGET_PAGE_BITS);
1138 if (!p)
1139 return;
1140 if (p->code_bitmap) {
1141 offset = start & ~TARGET_PAGE_MASK;
1142 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1143 if (b & ((1 << len) - 1))
1144 goto do_invalidate;
1145 } else {
1146 do_invalidate:
1147 tb_invalidate_phys_page_range(start, start + len, 1);
1151 #if !defined(CONFIG_SOFTMMU)
1152 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1153 unsigned long pc, void *puc)
1155 TranslationBlock *tb;
1156 PageDesc *p;
1157 int n;
1158 #ifdef TARGET_HAS_PRECISE_SMC
1159 TranslationBlock *current_tb = NULL;
1160 CPUState *env = cpu_single_env;
1161 int current_tb_modified = 0;
1162 target_ulong current_pc = 0;
1163 target_ulong current_cs_base = 0;
1164 int current_flags = 0;
1165 #endif
1167 addr &= TARGET_PAGE_MASK;
1168 p = page_find(addr >> TARGET_PAGE_BITS);
1169 if (!p)
1170 return;
1171 tb = p->first_tb;
1172 #ifdef TARGET_HAS_PRECISE_SMC
1173 if (tb && pc != 0) {
1174 current_tb = tb_find_pc(pc);
1176 #endif
1177 while (tb != NULL) {
1178 n = (long)tb & 3;
1179 tb = (TranslationBlock *)((long)tb & ~3);
1180 #ifdef TARGET_HAS_PRECISE_SMC
1181 if (current_tb == tb &&
1182 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1183 /* If we are modifying the current TB, we must stop
1184 its execution. We could be more precise by checking
1185 that the modification is after the current PC, but it
1186 would require a specialized function to partially
1187 restore the CPU state */
1189 current_tb_modified = 1;
1190 cpu_restore_state(current_tb, env, pc);
1191 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1192 &current_flags);
1194 #endif /* TARGET_HAS_PRECISE_SMC */
1195 tb_phys_invalidate(tb, addr);
1196 tb = tb->page_next[n];
1198 p->first_tb = NULL;
1199 #ifdef TARGET_HAS_PRECISE_SMC
1200 if (current_tb_modified) {
1201 /* we generate a block containing just the instruction
1202 modifying the memory. It will ensure that it cannot modify
1203 itself */
1204 env->current_tb = NULL;
1205 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1206 cpu_resume_from_signal(env, puc);
1208 #endif
1210 #endif
1212 /* add the tb in the target page and protect it if necessary */
1213 static inline void tb_alloc_page(TranslationBlock *tb,
1214 unsigned int n, tb_page_addr_t page_addr)
1216 PageDesc *p;
1217 #ifndef CONFIG_USER_ONLY
1218 bool page_already_protected;
1219 #endif
1221 tb->page_addr[n] = page_addr;
1222 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1223 tb->page_next[n] = p->first_tb;
1224 #ifndef CONFIG_USER_ONLY
1225 page_already_protected = p->first_tb != NULL;
1226 #endif
1227 p->first_tb = (TranslationBlock *)((long)tb | n);
1228 invalidate_page_bitmap(p);
1230 #if defined(TARGET_HAS_SMC) || 1
1232 #if defined(CONFIG_USER_ONLY)
1233 if (p->flags & PAGE_WRITE) {
1234 target_ulong addr;
1235 PageDesc *p2;
1236 int prot;
1238 /* force the host page as non writable (writes will have a
1239 page fault + mprotect overhead) */
1240 page_addr &= qemu_host_page_mask;
1241 prot = 0;
1242 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1243 addr += TARGET_PAGE_SIZE) {
1245 p2 = page_find (addr >> TARGET_PAGE_BITS);
1246 if (!p2)
1247 continue;
1248 prot |= p2->flags;
1249 p2->flags &= ~PAGE_WRITE;
1251 mprotect(g2h(page_addr), qemu_host_page_size,
1252 (prot & PAGE_BITS) & ~PAGE_WRITE);
1253 #ifdef DEBUG_TB_INVALIDATE
1254 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1255 page_addr);
1256 #endif
1258 #else
1259 /* if some code is already present, then the pages are already
1260 protected. So we handle the case where only the first TB is
1261 allocated in a physical page */
1262 if (!page_already_protected) {
1263 tlb_protect_code(page_addr);
1265 #endif
1267 #endif /* TARGET_HAS_SMC */
1270 /* add a new TB and link it to the physical page tables. phys_page2 is
1271 (-1) to indicate that only one page contains the TB. */
1272 void tb_link_page(TranslationBlock *tb,
1273 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1275 unsigned int h;
1276 TranslationBlock **ptb;
1278 /* Grab the mmap lock to stop another thread invalidating this TB
1279 before we are done. */
1280 mmap_lock();
1281 /* add in the physical hash table */
1282 h = tb_phys_hash_func(phys_pc);
1283 ptb = &tb_phys_hash[h];
1284 tb->phys_hash_next = *ptb;
1285 *ptb = tb;
1287 /* add in the page list */
1288 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1289 if (phys_page2 != -1)
1290 tb_alloc_page(tb, 1, phys_page2);
1291 else
1292 tb->page_addr[1] = -1;
1294 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1295 tb->jmp_next[0] = NULL;
1296 tb->jmp_next[1] = NULL;
1298 /* init original jump addresses */
1299 if (tb->tb_next_offset[0] != 0xffff)
1300 tb_reset_jump(tb, 0);
1301 if (tb->tb_next_offset[1] != 0xffff)
1302 tb_reset_jump(tb, 1);
1304 #ifdef DEBUG_TB_CHECK
1305 tb_page_check();
1306 #endif
1307 mmap_unlock();
1310 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1311 tb[1].tc_ptr. Return NULL if not found */
1312 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1314 int m_min, m_max, m;
1315 unsigned long v;
1316 TranslationBlock *tb;
1318 if (nb_tbs <= 0)
1319 return NULL;
1320 if (tc_ptr < (unsigned long)code_gen_buffer ||
1321 tc_ptr >= (unsigned long)code_gen_ptr)
1322 return NULL;
1323 /* binary search (cf Knuth) */
1324 m_min = 0;
1325 m_max = nb_tbs - 1;
1326 while (m_min <= m_max) {
1327 m = (m_min + m_max) >> 1;
1328 tb = &tbs[m];
1329 v = (unsigned long)tb->tc_ptr;
1330 if (v == tc_ptr)
1331 return tb;
1332 else if (tc_ptr < v) {
1333 m_max = m - 1;
1334 } else {
1335 m_min = m + 1;
1338 return &tbs[m_max];
1341 static void tb_reset_jump_recursive(TranslationBlock *tb);
1343 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1345 TranslationBlock *tb1, *tb_next, **ptb;
1346 unsigned int n1;
1348 tb1 = tb->jmp_next[n];
1349 if (tb1 != NULL) {
1350 /* find head of list */
1351 for(;;) {
1352 n1 = (long)tb1 & 3;
1353 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1354 if (n1 == 2)
1355 break;
1356 tb1 = tb1->jmp_next[n1];
1358 /* we are now sure now that tb jumps to tb1 */
1359 tb_next = tb1;
1361 /* remove tb from the jmp_first list */
1362 ptb = &tb_next->jmp_first;
1363 for(;;) {
1364 tb1 = *ptb;
1365 n1 = (long)tb1 & 3;
1366 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1367 if (n1 == n && tb1 == tb)
1368 break;
1369 ptb = &tb1->jmp_next[n1];
1371 *ptb = tb->jmp_next[n];
1372 tb->jmp_next[n] = NULL;
1374 /* suppress the jump to next tb in generated code */
1375 tb_reset_jump(tb, n);
1377 /* suppress jumps in the tb on which we could have jumped */
1378 tb_reset_jump_recursive(tb_next);
1382 static void tb_reset_jump_recursive(TranslationBlock *tb)
1384 tb_reset_jump_recursive2(tb, 0);
1385 tb_reset_jump_recursive2(tb, 1);
1388 #if defined(TARGET_HAS_ICE)
1389 #if defined(CONFIG_USER_ONLY)
1390 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1392 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1394 #else
1395 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1397 target_phys_addr_t addr;
1398 target_ulong pd;
1399 ram_addr_t ram_addr;
1400 PhysPageDesc *p;
1402 addr = cpu_get_phys_page_debug(env, pc);
1403 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1404 if (!p) {
1405 pd = IO_MEM_UNASSIGNED;
1406 } else {
1407 pd = p->phys_offset;
1409 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1410 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1412 #endif
1413 #endif /* TARGET_HAS_ICE */
1415 #if defined(CONFIG_USER_ONLY)
1416 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1421 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1422 int flags, CPUWatchpoint **watchpoint)
1424 return -ENOSYS;
1426 #else
1427 /* Add a watchpoint. */
1428 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1429 int flags, CPUWatchpoint **watchpoint)
1431 target_ulong len_mask = ~(len - 1);
1432 CPUWatchpoint *wp;
1434 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1435 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1436 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1437 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1438 return -EINVAL;
1440 wp = qemu_malloc(sizeof(*wp));
1442 wp->vaddr = addr;
1443 wp->len_mask = len_mask;
1444 wp->flags = flags;
1446 /* keep all GDB-injected watchpoints in front */
1447 if (flags & BP_GDB)
1448 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1449 else
1450 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1452 tlb_flush_page(env, addr);
1454 if (watchpoint)
1455 *watchpoint = wp;
1456 return 0;
1459 /* Remove a specific watchpoint. */
1460 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1461 int flags)
1463 target_ulong len_mask = ~(len - 1);
1464 CPUWatchpoint *wp;
1466 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1467 if (addr == wp->vaddr && len_mask == wp->len_mask
1468 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1469 cpu_watchpoint_remove_by_ref(env, wp);
1470 return 0;
1473 return -ENOENT;
1476 /* Remove a specific watchpoint by reference. */
1477 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1479 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1481 tlb_flush_page(env, watchpoint->vaddr);
1483 qemu_free(watchpoint);
1486 /* Remove all matching watchpoints. */
1487 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1489 CPUWatchpoint *wp, *next;
1491 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1492 if (wp->flags & mask)
1493 cpu_watchpoint_remove_by_ref(env, wp);
1496 #endif
1498 /* Add a breakpoint. */
1499 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1500 CPUBreakpoint **breakpoint)
1502 #if defined(TARGET_HAS_ICE)
1503 CPUBreakpoint *bp;
1505 bp = qemu_malloc(sizeof(*bp));
1507 bp->pc = pc;
1508 bp->flags = flags;
1510 /* keep all GDB-injected breakpoints in front */
1511 if (flags & BP_GDB)
1512 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1513 else
1514 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1516 breakpoint_invalidate(env, pc);
1518 if (breakpoint)
1519 *breakpoint = bp;
1520 return 0;
1521 #else
1522 return -ENOSYS;
1523 #endif
1526 /* Remove a specific breakpoint. */
1527 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1529 #if defined(TARGET_HAS_ICE)
1530 CPUBreakpoint *bp;
1532 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1533 if (bp->pc == pc && bp->flags == flags) {
1534 cpu_breakpoint_remove_by_ref(env, bp);
1535 return 0;
1538 return -ENOENT;
1539 #else
1540 return -ENOSYS;
1541 #endif
1544 /* Remove a specific breakpoint by reference. */
1545 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1547 #if defined(TARGET_HAS_ICE)
1548 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1550 breakpoint_invalidate(env, breakpoint->pc);
1552 qemu_free(breakpoint);
1553 #endif
1556 /* Remove all matching breakpoints. */
1557 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1559 #if defined(TARGET_HAS_ICE)
1560 CPUBreakpoint *bp, *next;
1562 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1563 if (bp->flags & mask)
1564 cpu_breakpoint_remove_by_ref(env, bp);
1566 #endif
1569 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1570 CPU loop after each instruction */
1571 void cpu_single_step(CPUState *env, int enabled)
1573 #if defined(TARGET_HAS_ICE)
1574 if (env->singlestep_enabled != enabled) {
1575 env->singlestep_enabled = enabled;
1576 if (kvm_enabled())
1577 kvm_update_guest_debug(env, 0);
1578 else {
1579 /* must flush all the translated code to avoid inconsistencies */
1580 /* XXX: only flush what is necessary */
1581 tb_flush(env);
1584 #endif
1587 /* enable or disable low levels log */
1588 void cpu_set_log(int log_flags)
1590 loglevel = log_flags;
1591 if (loglevel && !logfile) {
1592 logfile = fopen(logfilename, log_append ? "a" : "w");
1593 if (!logfile) {
1594 perror(logfilename);
1595 _exit(1);
1597 #if !defined(CONFIG_SOFTMMU)
1598 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1600 static char logfile_buf[4096];
1601 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1603 #elif !defined(_WIN32)
1604 /* Win32 doesn't support line-buffering and requires size >= 2 */
1605 setvbuf(logfile, NULL, _IOLBF, 0);
1606 #endif
1607 log_append = 1;
1609 if (!loglevel && logfile) {
1610 fclose(logfile);
1611 logfile = NULL;
1615 void cpu_set_log_filename(const char *filename)
1617 logfilename = strdup(filename);
1618 if (logfile) {
1619 fclose(logfile);
1620 logfile = NULL;
1622 cpu_set_log(loglevel);
1625 static void cpu_unlink_tb(CPUState *env)
1627 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1628 problem and hope the cpu will stop of its own accord. For userspace
1629 emulation this often isn't actually as bad as it sounds. Often
1630 signals are used primarily to interrupt blocking syscalls. */
1631 TranslationBlock *tb;
1632 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1634 spin_lock(&interrupt_lock);
1635 tb = env->current_tb;
1636 /* if the cpu is currently executing code, we must unlink it and
1637 all the potentially executing TB */
1638 if (tb) {
1639 env->current_tb = NULL;
1640 tb_reset_jump_recursive(tb);
1642 spin_unlock(&interrupt_lock);
1645 #ifndef CONFIG_USER_ONLY
1646 /* mask must never be zero, except for A20 change call */
1647 static void tcg_handle_interrupt(CPUState *env, int mask)
1649 int old_mask;
1651 old_mask = env->interrupt_request;
1652 env->interrupt_request |= mask;
1655 * If called from iothread context, wake the target cpu in
1656 * case its halted.
1658 if (!qemu_cpu_is_self(env)) {
1659 qemu_cpu_kick(env);
1660 return;
1663 if (use_icount) {
1664 env->icount_decr.u16.high = 0xffff;
1665 if (!can_do_io(env)
1666 && (mask & ~old_mask) != 0) {
1667 cpu_abort(env, "Raised interrupt while not in I/O function");
1669 } else {
1670 cpu_unlink_tb(env);
1674 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1676 #else /* CONFIG_USER_ONLY */
1678 void cpu_interrupt(CPUState *env, int mask)
1680 env->interrupt_request |= mask;
1681 cpu_unlink_tb(env);
1683 #endif /* CONFIG_USER_ONLY */
1685 void cpu_reset_interrupt(CPUState *env, int mask)
1687 env->interrupt_request &= ~mask;
1690 void cpu_exit(CPUState *env)
1692 env->exit_request = 1;
1693 cpu_unlink_tb(env);
1696 const CPULogItem cpu_log_items[] = {
1697 { CPU_LOG_TB_OUT_ASM, "out_asm",
1698 "show generated host assembly code for each compiled TB" },
1699 { CPU_LOG_TB_IN_ASM, "in_asm",
1700 "show target assembly code for each compiled TB" },
1701 { CPU_LOG_TB_OP, "op",
1702 "show micro ops for each compiled TB" },
1703 { CPU_LOG_TB_OP_OPT, "op_opt",
1704 "show micro ops "
1705 #ifdef TARGET_I386
1706 "before eflags optimization and "
1707 #endif
1708 "after liveness analysis" },
1709 { CPU_LOG_INT, "int",
1710 "show interrupts/exceptions in short format" },
1711 { CPU_LOG_EXEC, "exec",
1712 "show trace before each executed TB (lots of logs)" },
1713 { CPU_LOG_TB_CPU, "cpu",
1714 "show CPU state before block translation" },
1715 #ifdef TARGET_I386
1716 { CPU_LOG_PCALL, "pcall",
1717 "show protected mode far calls/returns/exceptions" },
1718 { CPU_LOG_RESET, "cpu_reset",
1719 "show CPU state before CPU resets" },
1720 #endif
1721 #ifdef DEBUG_IOPORT
1722 { CPU_LOG_IOPORT, "ioport",
1723 "show all i/o ports accesses" },
1724 #endif
1725 { 0, NULL, NULL },
1728 #ifndef CONFIG_USER_ONLY
1729 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1730 = QLIST_HEAD_INITIALIZER(memory_client_list);
1732 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1733 ram_addr_t size,
1734 ram_addr_t phys_offset,
1735 bool log_dirty)
1737 CPUPhysMemoryClient *client;
1738 QLIST_FOREACH(client, &memory_client_list, list) {
1739 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1743 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1744 target_phys_addr_t end)
1746 CPUPhysMemoryClient *client;
1747 QLIST_FOREACH(client, &memory_client_list, list) {
1748 int r = client->sync_dirty_bitmap(client, start, end);
1749 if (r < 0)
1750 return r;
1752 return 0;
1755 static int cpu_notify_migration_log(int enable)
1757 CPUPhysMemoryClient *client;
1758 QLIST_FOREACH(client, &memory_client_list, list) {
1759 int r = client->migration_log(client, enable);
1760 if (r < 0)
1761 return r;
1763 return 0;
1766 struct last_map {
1767 target_phys_addr_t start_addr;
1768 ram_addr_t size;
1769 ram_addr_t phys_offset;
1772 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1773 * address. Each intermediate table provides the next L2_BITs of guest
1774 * physical address space. The number of levels vary based on host and
1775 * guest configuration, making it efficient to build the final guest
1776 * physical address by seeding the L1 offset and shifting and adding in
1777 * each L2 offset as we recurse through them. */
1778 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1779 void **lp, target_phys_addr_t addr,
1780 struct last_map *map)
1782 int i;
1784 if (*lp == NULL) {
1785 return;
1787 if (level == 0) {
1788 PhysPageDesc *pd = *lp;
1789 addr <<= L2_BITS + TARGET_PAGE_BITS;
1790 for (i = 0; i < L2_SIZE; ++i) {
1791 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1792 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1794 if (map->size &&
1795 start_addr == map->start_addr + map->size &&
1796 pd[i].phys_offset == map->phys_offset + map->size) {
1798 map->size += TARGET_PAGE_SIZE;
1799 continue;
1800 } else if (map->size) {
1801 client->set_memory(client, map->start_addr,
1802 map->size, map->phys_offset, false);
1805 map->start_addr = start_addr;
1806 map->size = TARGET_PAGE_SIZE;
1807 map->phys_offset = pd[i].phys_offset;
1810 } else {
1811 void **pp = *lp;
1812 for (i = 0; i < L2_SIZE; ++i) {
1813 phys_page_for_each_1(client, level - 1, pp + i,
1814 (addr << L2_BITS) | i, map);
1819 static void phys_page_for_each(CPUPhysMemoryClient *client)
1821 int i;
1822 struct last_map map = { };
1824 for (i = 0; i < P_L1_SIZE; ++i) {
1825 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1826 l1_phys_map + i, i, &map);
1828 if (map.size) {
1829 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1830 false);
1834 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1836 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1837 phys_page_for_each(client);
1840 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1842 QLIST_REMOVE(client, list);
1844 #endif
1846 static int cmp1(const char *s1, int n, const char *s2)
1848 if (strlen(s2) != n)
1849 return 0;
1850 return memcmp(s1, s2, n) == 0;
1853 /* takes a comma separated list of log masks. Return 0 if error. */
1854 int cpu_str_to_log_mask(const char *str)
1856 const CPULogItem *item;
1857 int mask;
1858 const char *p, *p1;
1860 p = str;
1861 mask = 0;
1862 for(;;) {
1863 p1 = strchr(p, ',');
1864 if (!p1)
1865 p1 = p + strlen(p);
1866 if(cmp1(p,p1-p,"all")) {
1867 for(item = cpu_log_items; item->mask != 0; item++) {
1868 mask |= item->mask;
1870 } else {
1871 for(item = cpu_log_items; item->mask != 0; item++) {
1872 if (cmp1(p, p1 - p, item->name))
1873 goto found;
1875 return 0;
1877 found:
1878 mask |= item->mask;
1879 if (*p1 != ',')
1880 break;
1881 p = p1 + 1;
1883 return mask;
1886 void cpu_abort(CPUState *env, const char *fmt, ...)
1888 va_list ap;
1889 va_list ap2;
1891 va_start(ap, fmt);
1892 va_copy(ap2, ap);
1893 fprintf(stderr, "qemu: fatal: ");
1894 vfprintf(stderr, fmt, ap);
1895 fprintf(stderr, "\n");
1896 #ifdef TARGET_I386
1897 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1898 #else
1899 cpu_dump_state(env, stderr, fprintf, 0);
1900 #endif
1901 if (qemu_log_enabled()) {
1902 qemu_log("qemu: fatal: ");
1903 qemu_log_vprintf(fmt, ap2);
1904 qemu_log("\n");
1905 #ifdef TARGET_I386
1906 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1907 #else
1908 log_cpu_state(env, 0);
1909 #endif
1910 qemu_log_flush();
1911 qemu_log_close();
1913 va_end(ap2);
1914 va_end(ap);
1915 #if defined(CONFIG_USER_ONLY)
1917 struct sigaction act;
1918 sigfillset(&act.sa_mask);
1919 act.sa_handler = SIG_DFL;
1920 sigaction(SIGABRT, &act, NULL);
1922 #endif
1923 abort();
1926 CPUState *cpu_copy(CPUState *env)
1928 CPUState *new_env = cpu_init(env->cpu_model_str);
1929 CPUState *next_cpu = new_env->next_cpu;
1930 int cpu_index = new_env->cpu_index;
1931 #if defined(TARGET_HAS_ICE)
1932 CPUBreakpoint *bp;
1933 CPUWatchpoint *wp;
1934 #endif
1936 memcpy(new_env, env, sizeof(CPUState));
1938 /* Preserve chaining and index. */
1939 new_env->next_cpu = next_cpu;
1940 new_env->cpu_index = cpu_index;
1942 /* Clone all break/watchpoints.
1943 Note: Once we support ptrace with hw-debug register access, make sure
1944 BP_CPU break/watchpoints are handled correctly on clone. */
1945 QTAILQ_INIT(&env->breakpoints);
1946 QTAILQ_INIT(&env->watchpoints);
1947 #if defined(TARGET_HAS_ICE)
1948 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1949 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1951 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1952 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1953 wp->flags, NULL);
1955 #endif
1957 return new_env;
1960 #if !defined(CONFIG_USER_ONLY)
1962 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1964 unsigned int i;
1966 /* Discard jump cache entries for any tb which might potentially
1967 overlap the flushed page. */
1968 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1969 memset (&env->tb_jmp_cache[i], 0,
1970 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1972 i = tb_jmp_cache_hash_page(addr);
1973 memset (&env->tb_jmp_cache[i], 0,
1974 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1977 static CPUTLBEntry s_cputlb_empty_entry = {
1978 .addr_read = -1,
1979 .addr_write = -1,
1980 .addr_code = -1,
1981 .addend = -1,
1984 /* NOTE: if flush_global is true, also flush global entries (not
1985 implemented yet) */
1986 void tlb_flush(CPUState *env, int flush_global)
1988 int i;
1990 #if defined(DEBUG_TLB)
1991 printf("tlb_flush:\n");
1992 #endif
1993 /* must reset current TB so that interrupts cannot modify the
1994 links while we are modifying them */
1995 env->current_tb = NULL;
1997 for(i = 0; i < CPU_TLB_SIZE; i++) {
1998 int mmu_idx;
1999 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2000 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2004 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2006 env->tlb_flush_addr = -1;
2007 env->tlb_flush_mask = 0;
2008 tlb_flush_count++;
2011 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2013 if (addr == (tlb_entry->addr_read &
2014 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2015 addr == (tlb_entry->addr_write &
2016 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2017 addr == (tlb_entry->addr_code &
2018 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2019 *tlb_entry = s_cputlb_empty_entry;
2023 void tlb_flush_page(CPUState *env, target_ulong addr)
2025 int i;
2026 int mmu_idx;
2028 #if defined(DEBUG_TLB)
2029 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2030 #endif
2031 /* Check if we need to flush due to large pages. */
2032 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2033 #if defined(DEBUG_TLB)
2034 printf("tlb_flush_page: forced full flush ("
2035 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2036 env->tlb_flush_addr, env->tlb_flush_mask);
2037 #endif
2038 tlb_flush(env, 1);
2039 return;
2041 /* must reset current TB so that interrupts cannot modify the
2042 links while we are modifying them */
2043 env->current_tb = NULL;
2045 addr &= TARGET_PAGE_MASK;
2046 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2047 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2048 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2050 tlb_flush_jmp_cache(env, addr);
2053 /* update the TLBs so that writes to code in the virtual page 'addr'
2054 can be detected */
2055 static void tlb_protect_code(ram_addr_t ram_addr)
2057 cpu_physical_memory_reset_dirty(ram_addr,
2058 ram_addr + TARGET_PAGE_SIZE,
2059 CODE_DIRTY_FLAG);
2062 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2063 tested for self modifying code */
2064 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2065 target_ulong vaddr)
2067 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2070 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2071 unsigned long start, unsigned long length)
2073 unsigned long addr;
2074 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2075 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2076 if ((addr - start) < length) {
2077 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2082 /* Note: start and end must be within the same ram block. */
2083 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2084 int dirty_flags)
2086 CPUState *env;
2087 unsigned long length, start1;
2088 int i;
2090 start &= TARGET_PAGE_MASK;
2091 end = TARGET_PAGE_ALIGN(end);
2093 length = end - start;
2094 if (length == 0)
2095 return;
2096 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2098 /* we modify the TLB cache so that the dirty bit will be set again
2099 when accessing the range */
2100 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2101 /* Check that we don't span multiple blocks - this breaks the
2102 address comparisons below. */
2103 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2104 != (end - 1) - start) {
2105 abort();
2108 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2109 int mmu_idx;
2110 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2111 for(i = 0; i < CPU_TLB_SIZE; i++)
2112 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2113 start1, length);
2118 int cpu_physical_memory_set_dirty_tracking(int enable)
2120 int ret = 0;
2121 in_migration = enable;
2122 ret = cpu_notify_migration_log(!!enable);
2123 return ret;
2126 int cpu_physical_memory_get_dirty_tracking(void)
2128 return in_migration;
2131 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2132 target_phys_addr_t end_addr)
2134 int ret;
2136 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2137 return ret;
2140 int cpu_physical_log_start(target_phys_addr_t start_addr,
2141 ram_addr_t size)
2143 CPUPhysMemoryClient *client;
2144 QLIST_FOREACH(client, &memory_client_list, list) {
2145 if (client->log_start) {
2146 int r = client->log_start(client, start_addr, size);
2147 if (r < 0) {
2148 return r;
2152 return 0;
2155 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2156 ram_addr_t size)
2158 CPUPhysMemoryClient *client;
2159 QLIST_FOREACH(client, &memory_client_list, list) {
2160 if (client->log_stop) {
2161 int r = client->log_stop(client, start_addr, size);
2162 if (r < 0) {
2163 return r;
2167 return 0;
2170 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2172 ram_addr_t ram_addr;
2173 void *p;
2175 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2176 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2177 + tlb_entry->addend);
2178 ram_addr = qemu_ram_addr_from_host_nofail(p);
2179 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2180 tlb_entry->addr_write |= TLB_NOTDIRTY;
2185 /* update the TLB according to the current state of the dirty bits */
2186 void cpu_tlb_update_dirty(CPUState *env)
2188 int i;
2189 int mmu_idx;
2190 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2191 for(i = 0; i < CPU_TLB_SIZE; i++)
2192 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2196 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2198 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2199 tlb_entry->addr_write = vaddr;
2202 /* update the TLB corresponding to virtual page vaddr
2203 so that it is no longer dirty */
2204 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2206 int i;
2207 int mmu_idx;
2209 vaddr &= TARGET_PAGE_MASK;
2210 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2211 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2212 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2215 /* Our TLB does not support large pages, so remember the area covered by
2216 large pages and trigger a full TLB flush if these are invalidated. */
2217 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2218 target_ulong size)
2220 target_ulong mask = ~(size - 1);
2222 if (env->tlb_flush_addr == (target_ulong)-1) {
2223 env->tlb_flush_addr = vaddr & mask;
2224 env->tlb_flush_mask = mask;
2225 return;
2227 /* Extend the existing region to include the new page.
2228 This is a compromise between unnecessary flushes and the cost
2229 of maintaining a full variable size TLB. */
2230 mask &= env->tlb_flush_mask;
2231 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2232 mask <<= 1;
2234 env->tlb_flush_addr &= mask;
2235 env->tlb_flush_mask = mask;
2238 /* Add a new TLB entry. At most one entry for a given virtual address
2239 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2240 supplied size is only used by tlb_flush_page. */
2241 void tlb_set_page(CPUState *env, target_ulong vaddr,
2242 target_phys_addr_t paddr, int prot,
2243 int mmu_idx, target_ulong size)
2245 PhysPageDesc *p;
2246 unsigned long pd;
2247 unsigned int index;
2248 target_ulong address;
2249 target_ulong code_address;
2250 unsigned long addend;
2251 CPUTLBEntry *te;
2252 CPUWatchpoint *wp;
2253 target_phys_addr_t iotlb;
2255 assert(size >= TARGET_PAGE_SIZE);
2256 if (size != TARGET_PAGE_SIZE) {
2257 tlb_add_large_page(env, vaddr, size);
2259 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2260 if (!p) {
2261 pd = IO_MEM_UNASSIGNED;
2262 } else {
2263 pd = p->phys_offset;
2265 #if defined(DEBUG_TLB)
2266 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2267 " prot=%x idx=%d pd=0x%08lx\n",
2268 vaddr, paddr, prot, mmu_idx, pd);
2269 #endif
2271 address = vaddr;
2272 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2273 /* IO memory case (romd handled later) */
2274 address |= TLB_MMIO;
2276 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2277 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2278 /* Normal RAM. */
2279 iotlb = pd & TARGET_PAGE_MASK;
2280 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2281 iotlb |= IO_MEM_NOTDIRTY;
2282 else
2283 iotlb |= IO_MEM_ROM;
2284 } else {
2285 /* IO handlers are currently passed a physical address.
2286 It would be nice to pass an offset from the base address
2287 of that region. This would avoid having to special case RAM,
2288 and avoid full address decoding in every device.
2289 We can't use the high bits of pd for this because
2290 IO_MEM_ROMD uses these as a ram address. */
2291 iotlb = (pd & ~TARGET_PAGE_MASK);
2292 if (p) {
2293 iotlb += p->region_offset;
2294 } else {
2295 iotlb += paddr;
2299 code_address = address;
2300 /* Make accesses to pages with watchpoints go via the
2301 watchpoint trap routines. */
2302 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2303 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2304 /* Avoid trapping reads of pages with a write breakpoint. */
2305 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2306 iotlb = io_mem_watch + paddr;
2307 address |= TLB_MMIO;
2308 break;
2313 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2314 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2315 te = &env->tlb_table[mmu_idx][index];
2316 te->addend = addend - vaddr;
2317 if (prot & PAGE_READ) {
2318 te->addr_read = address;
2319 } else {
2320 te->addr_read = -1;
2323 if (prot & PAGE_EXEC) {
2324 te->addr_code = code_address;
2325 } else {
2326 te->addr_code = -1;
2328 if (prot & PAGE_WRITE) {
2329 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2330 (pd & IO_MEM_ROMD)) {
2331 /* Write access calls the I/O callback. */
2332 te->addr_write = address | TLB_MMIO;
2333 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2334 !cpu_physical_memory_is_dirty(pd)) {
2335 te->addr_write = address | TLB_NOTDIRTY;
2336 } else {
2337 te->addr_write = address;
2339 } else {
2340 te->addr_write = -1;
2344 #else
2346 void tlb_flush(CPUState *env, int flush_global)
2350 void tlb_flush_page(CPUState *env, target_ulong addr)
2355 * Walks guest process memory "regions" one by one
2356 * and calls callback function 'fn' for each region.
2359 struct walk_memory_regions_data
2361 walk_memory_regions_fn fn;
2362 void *priv;
2363 unsigned long start;
2364 int prot;
2367 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2368 abi_ulong end, int new_prot)
2370 if (data->start != -1ul) {
2371 int rc = data->fn(data->priv, data->start, end, data->prot);
2372 if (rc != 0) {
2373 return rc;
2377 data->start = (new_prot ? end : -1ul);
2378 data->prot = new_prot;
2380 return 0;
2383 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2384 abi_ulong base, int level, void **lp)
2386 abi_ulong pa;
2387 int i, rc;
2389 if (*lp == NULL) {
2390 return walk_memory_regions_end(data, base, 0);
2393 if (level == 0) {
2394 PageDesc *pd = *lp;
2395 for (i = 0; i < L2_SIZE; ++i) {
2396 int prot = pd[i].flags;
2398 pa = base | (i << TARGET_PAGE_BITS);
2399 if (prot != data->prot) {
2400 rc = walk_memory_regions_end(data, pa, prot);
2401 if (rc != 0) {
2402 return rc;
2406 } else {
2407 void **pp = *lp;
2408 for (i = 0; i < L2_SIZE; ++i) {
2409 pa = base | ((abi_ulong)i <<
2410 (TARGET_PAGE_BITS + L2_BITS * level));
2411 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2412 if (rc != 0) {
2413 return rc;
2418 return 0;
2421 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2423 struct walk_memory_regions_data data;
2424 unsigned long i;
2426 data.fn = fn;
2427 data.priv = priv;
2428 data.start = -1ul;
2429 data.prot = 0;
2431 for (i = 0; i < V_L1_SIZE; i++) {
2432 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2433 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2434 if (rc != 0) {
2435 return rc;
2439 return walk_memory_regions_end(&data, 0, 0);
2442 static int dump_region(void *priv, abi_ulong start,
2443 abi_ulong end, unsigned long prot)
2445 FILE *f = (FILE *)priv;
2447 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2448 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2449 start, end, end - start,
2450 ((prot & PAGE_READ) ? 'r' : '-'),
2451 ((prot & PAGE_WRITE) ? 'w' : '-'),
2452 ((prot & PAGE_EXEC) ? 'x' : '-'));
2454 return (0);
2457 /* dump memory mappings */
2458 void page_dump(FILE *f)
2460 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2461 "start", "end", "size", "prot");
2462 walk_memory_regions(f, dump_region);
2465 int page_get_flags(target_ulong address)
2467 PageDesc *p;
2469 p = page_find(address >> TARGET_PAGE_BITS);
2470 if (!p)
2471 return 0;
2472 return p->flags;
2475 /* Modify the flags of a page and invalidate the code if necessary.
2476 The flag PAGE_WRITE_ORG is positioned automatically depending
2477 on PAGE_WRITE. The mmap_lock should already be held. */
2478 void page_set_flags(target_ulong start, target_ulong end, int flags)
2480 target_ulong addr, len;
2482 /* This function should never be called with addresses outside the
2483 guest address space. If this assert fires, it probably indicates
2484 a missing call to h2g_valid. */
2485 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2486 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2487 #endif
2488 assert(start < end);
2490 start = start & TARGET_PAGE_MASK;
2491 end = TARGET_PAGE_ALIGN(end);
2493 if (flags & PAGE_WRITE) {
2494 flags |= PAGE_WRITE_ORG;
2497 for (addr = start, len = end - start;
2498 len != 0;
2499 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2500 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2502 /* If the write protection bit is set, then we invalidate
2503 the code inside. */
2504 if (!(p->flags & PAGE_WRITE) &&
2505 (flags & PAGE_WRITE) &&
2506 p->first_tb) {
2507 tb_invalidate_phys_page(addr, 0, NULL);
2509 p->flags = flags;
2513 int page_check_range(target_ulong start, target_ulong len, int flags)
2515 PageDesc *p;
2516 target_ulong end;
2517 target_ulong addr;
2519 /* This function should never be called with addresses outside the
2520 guest address space. If this assert fires, it probably indicates
2521 a missing call to h2g_valid. */
2522 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2523 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2524 #endif
2526 if (len == 0) {
2527 return 0;
2529 if (start + len - 1 < start) {
2530 /* We've wrapped around. */
2531 return -1;
2534 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2535 start = start & TARGET_PAGE_MASK;
2537 for (addr = start, len = end - start;
2538 len != 0;
2539 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2540 p = page_find(addr >> TARGET_PAGE_BITS);
2541 if( !p )
2542 return -1;
2543 if( !(p->flags & PAGE_VALID) )
2544 return -1;
2546 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2547 return -1;
2548 if (flags & PAGE_WRITE) {
2549 if (!(p->flags & PAGE_WRITE_ORG))
2550 return -1;
2551 /* unprotect the page if it was put read-only because it
2552 contains translated code */
2553 if (!(p->flags & PAGE_WRITE)) {
2554 if (!page_unprotect(addr, 0, NULL))
2555 return -1;
2557 return 0;
2560 return 0;
2563 /* called from signal handler: invalidate the code and unprotect the
2564 page. Return TRUE if the fault was successfully handled. */
2565 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2567 unsigned int prot;
2568 PageDesc *p;
2569 target_ulong host_start, host_end, addr;
2571 /* Technically this isn't safe inside a signal handler. However we
2572 know this only ever happens in a synchronous SEGV handler, so in
2573 practice it seems to be ok. */
2574 mmap_lock();
2576 p = page_find(address >> TARGET_PAGE_BITS);
2577 if (!p) {
2578 mmap_unlock();
2579 return 0;
2582 /* if the page was really writable, then we change its
2583 protection back to writable */
2584 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2585 host_start = address & qemu_host_page_mask;
2586 host_end = host_start + qemu_host_page_size;
2588 prot = 0;
2589 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2590 p = page_find(addr >> TARGET_PAGE_BITS);
2591 p->flags |= PAGE_WRITE;
2592 prot |= p->flags;
2594 /* and since the content will be modified, we must invalidate
2595 the corresponding translated code. */
2596 tb_invalidate_phys_page(addr, pc, puc);
2597 #ifdef DEBUG_TB_CHECK
2598 tb_invalidate_check(addr);
2599 #endif
2601 mprotect((void *)g2h(host_start), qemu_host_page_size,
2602 prot & PAGE_BITS);
2604 mmap_unlock();
2605 return 1;
2607 mmap_unlock();
2608 return 0;
2611 static inline void tlb_set_dirty(CPUState *env,
2612 unsigned long addr, target_ulong vaddr)
2615 #endif /* defined(CONFIG_USER_ONLY) */
2617 #if !defined(CONFIG_USER_ONLY)
2619 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2620 typedef struct subpage_t {
2621 target_phys_addr_t base;
2622 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2623 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2624 } subpage_t;
2626 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2627 ram_addr_t memory, ram_addr_t region_offset);
2628 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2629 ram_addr_t orig_memory,
2630 ram_addr_t region_offset);
2631 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2632 need_subpage) \
2633 do { \
2634 if (addr > start_addr) \
2635 start_addr2 = 0; \
2636 else { \
2637 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2638 if (start_addr2 > 0) \
2639 need_subpage = 1; \
2642 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2643 end_addr2 = TARGET_PAGE_SIZE - 1; \
2644 else { \
2645 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2646 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2647 need_subpage = 1; \
2649 } while (0)
2651 /* register physical memory.
2652 For RAM, 'size' must be a multiple of the target page size.
2653 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2654 io memory page. The address used when calling the IO function is
2655 the offset from the start of the region, plus region_offset. Both
2656 start_addr and region_offset are rounded down to a page boundary
2657 before calculating this offset. This should not be a problem unless
2658 the low bits of start_addr and region_offset differ. */
2659 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2660 ram_addr_t size,
2661 ram_addr_t phys_offset,
2662 ram_addr_t region_offset,
2663 bool log_dirty)
2665 target_phys_addr_t addr, end_addr;
2666 PhysPageDesc *p;
2667 CPUState *env;
2668 ram_addr_t orig_size = size;
2669 subpage_t *subpage;
2671 assert(size);
2672 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2674 if (phys_offset == IO_MEM_UNASSIGNED) {
2675 region_offset = start_addr;
2677 region_offset &= TARGET_PAGE_MASK;
2678 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2679 end_addr = start_addr + (target_phys_addr_t)size;
2681 addr = start_addr;
2682 do {
2683 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2684 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2685 ram_addr_t orig_memory = p->phys_offset;
2686 target_phys_addr_t start_addr2, end_addr2;
2687 int need_subpage = 0;
2689 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2690 need_subpage);
2691 if (need_subpage) {
2692 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2693 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2694 &p->phys_offset, orig_memory,
2695 p->region_offset);
2696 } else {
2697 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2698 >> IO_MEM_SHIFT];
2700 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2701 region_offset);
2702 p->region_offset = 0;
2703 } else {
2704 p->phys_offset = phys_offset;
2705 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2706 (phys_offset & IO_MEM_ROMD))
2707 phys_offset += TARGET_PAGE_SIZE;
2709 } else {
2710 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2711 p->phys_offset = phys_offset;
2712 p->region_offset = region_offset;
2713 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2714 (phys_offset & IO_MEM_ROMD)) {
2715 phys_offset += TARGET_PAGE_SIZE;
2716 } else {
2717 target_phys_addr_t start_addr2, end_addr2;
2718 int need_subpage = 0;
2720 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2721 end_addr2, need_subpage);
2723 if (need_subpage) {
2724 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2725 &p->phys_offset, IO_MEM_UNASSIGNED,
2726 addr & TARGET_PAGE_MASK);
2727 subpage_register(subpage, start_addr2, end_addr2,
2728 phys_offset, region_offset);
2729 p->region_offset = 0;
2733 region_offset += TARGET_PAGE_SIZE;
2734 addr += TARGET_PAGE_SIZE;
2735 } while (addr != end_addr);
2737 /* since each CPU stores ram addresses in its TLB cache, we must
2738 reset the modified entries */
2739 /* XXX: slow ! */
2740 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2741 tlb_flush(env, 1);
2745 /* XXX: temporary until new memory mapping API */
2746 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2748 PhysPageDesc *p;
2750 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2751 if (!p)
2752 return IO_MEM_UNASSIGNED;
2753 return p->phys_offset;
2756 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2758 if (kvm_enabled())
2759 kvm_coalesce_mmio_region(addr, size);
2762 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2764 if (kvm_enabled())
2765 kvm_uncoalesce_mmio_region(addr, size);
2768 void qemu_flush_coalesced_mmio_buffer(void)
2770 if (kvm_enabled())
2771 kvm_flush_coalesced_mmio_buffer();
2774 #if defined(__linux__) && !defined(TARGET_S390X)
2776 #include <sys/vfs.h>
2778 #define HUGETLBFS_MAGIC 0x958458f6
2780 static long gethugepagesize(const char *path)
2782 struct statfs fs;
2783 int ret;
2785 do {
2786 ret = statfs(path, &fs);
2787 } while (ret != 0 && errno == EINTR);
2789 if (ret != 0) {
2790 perror(path);
2791 return 0;
2794 if (fs.f_type != HUGETLBFS_MAGIC)
2795 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2797 return fs.f_bsize;
2800 static void *file_ram_alloc(RAMBlock *block,
2801 ram_addr_t memory,
2802 const char *path)
2804 char *filename;
2805 void *area;
2806 int fd;
2807 #ifdef MAP_POPULATE
2808 int flags;
2809 #endif
2810 unsigned long hpagesize;
2812 hpagesize = gethugepagesize(path);
2813 if (!hpagesize) {
2814 return NULL;
2817 if (memory < hpagesize) {
2818 return NULL;
2821 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2822 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2823 return NULL;
2826 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2827 return NULL;
2830 fd = mkstemp(filename);
2831 if (fd < 0) {
2832 perror("unable to create backing store for hugepages");
2833 free(filename);
2834 return NULL;
2836 unlink(filename);
2837 free(filename);
2839 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2842 * ftruncate is not supported by hugetlbfs in older
2843 * hosts, so don't bother bailing out on errors.
2844 * If anything goes wrong with it under other filesystems,
2845 * mmap will fail.
2847 if (ftruncate(fd, memory))
2848 perror("ftruncate");
2850 #ifdef MAP_POPULATE
2851 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2852 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2853 * to sidestep this quirk.
2855 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2856 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2857 #else
2858 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2859 #endif
2860 if (area == MAP_FAILED) {
2861 perror("file_ram_alloc: can't mmap RAM pages");
2862 close(fd);
2863 return (NULL);
2865 block->fd = fd;
2866 return area;
2868 #endif
2870 static ram_addr_t find_ram_offset(ram_addr_t size)
2872 RAMBlock *block, *next_block;
2873 ram_addr_t offset = 0, mingap = RAM_ADDR_MAX;
2875 if (QLIST_EMPTY(&ram_list.blocks))
2876 return 0;
2878 QLIST_FOREACH(block, &ram_list.blocks, next) {
2879 ram_addr_t end, next = RAM_ADDR_MAX;
2881 end = block->offset + block->length;
2883 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2884 if (next_block->offset >= end) {
2885 next = MIN(next, next_block->offset);
2888 if (next - end >= size && next - end < mingap) {
2889 offset = end;
2890 mingap = next - end;
2893 return offset;
2896 static ram_addr_t last_ram_offset(void)
2898 RAMBlock *block;
2899 ram_addr_t last = 0;
2901 QLIST_FOREACH(block, &ram_list.blocks, next)
2902 last = MAX(last, block->offset + block->length);
2904 return last;
2907 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2908 ram_addr_t size, void *host)
2910 RAMBlock *new_block, *block;
2912 size = TARGET_PAGE_ALIGN(size);
2913 new_block = qemu_mallocz(sizeof(*new_block));
2915 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2916 char *id = dev->parent_bus->info->get_dev_path(dev);
2917 if (id) {
2918 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2919 qemu_free(id);
2922 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2924 QLIST_FOREACH(block, &ram_list.blocks, next) {
2925 if (!strcmp(block->idstr, new_block->idstr)) {
2926 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2927 new_block->idstr);
2928 abort();
2932 new_block->offset = find_ram_offset(size);
2933 if (host) {
2934 new_block->host = host;
2935 new_block->flags |= RAM_PREALLOC_MASK;
2936 } else {
2937 if (mem_path) {
2938 #if defined (__linux__) && !defined(TARGET_S390X)
2939 new_block->host = file_ram_alloc(new_block, size, mem_path);
2940 if (!new_block->host) {
2941 new_block->host = qemu_vmalloc(size);
2942 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2944 #else
2945 fprintf(stderr, "-mem-path option unsupported\n");
2946 exit(1);
2947 #endif
2948 } else {
2949 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2950 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2951 an system defined value, which is at least 256GB. Larger systems
2952 have larger values. We put the guest between the end of data
2953 segment (system break) and this value. We use 32GB as a base to
2954 have enough room for the system break to grow. */
2955 new_block->host = mmap((void*)0x800000000, size,
2956 PROT_EXEC|PROT_READ|PROT_WRITE,
2957 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2958 if (new_block->host == MAP_FAILED) {
2959 fprintf(stderr, "Allocating RAM failed\n");
2960 abort();
2962 #else
2963 if (xen_enabled()) {
2964 xen_ram_alloc(new_block->offset, size);
2965 } else {
2966 new_block->host = qemu_vmalloc(size);
2968 #endif
2969 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2972 new_block->length = size;
2974 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2976 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2977 last_ram_offset() >> TARGET_PAGE_BITS);
2978 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2979 0xff, size >> TARGET_PAGE_BITS);
2981 if (kvm_enabled())
2982 kvm_setup_guest_memory(new_block->host, size);
2984 return new_block->offset;
2987 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2989 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2992 void qemu_ram_free_from_ptr(ram_addr_t addr)
2994 RAMBlock *block;
2996 QLIST_FOREACH(block, &ram_list.blocks, next) {
2997 if (addr == block->offset) {
2998 QLIST_REMOVE(block, next);
2999 qemu_free(block);
3000 return;
3005 void qemu_ram_free(ram_addr_t addr)
3007 RAMBlock *block;
3009 QLIST_FOREACH(block, &ram_list.blocks, next) {
3010 if (addr == block->offset) {
3011 QLIST_REMOVE(block, next);
3012 if (block->flags & RAM_PREALLOC_MASK) {
3014 } else if (mem_path) {
3015 #if defined (__linux__) && !defined(TARGET_S390X)
3016 if (block->fd) {
3017 munmap(block->host, block->length);
3018 close(block->fd);
3019 } else {
3020 qemu_vfree(block->host);
3022 #else
3023 abort();
3024 #endif
3025 } else {
3026 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3027 munmap(block->host, block->length);
3028 #else
3029 if (xen_enabled()) {
3030 xen_invalidate_map_cache_entry(block->host);
3031 } else {
3032 qemu_vfree(block->host);
3034 #endif
3036 qemu_free(block);
3037 return;
3043 #ifndef _WIN32
3044 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3046 RAMBlock *block;
3047 ram_addr_t offset;
3048 int flags;
3049 void *area, *vaddr;
3051 QLIST_FOREACH(block, &ram_list.blocks, next) {
3052 offset = addr - block->offset;
3053 if (offset < block->length) {
3054 vaddr = block->host + offset;
3055 if (block->flags & RAM_PREALLOC_MASK) {
3057 } else {
3058 flags = MAP_FIXED;
3059 munmap(vaddr, length);
3060 if (mem_path) {
3061 #if defined(__linux__) && !defined(TARGET_S390X)
3062 if (block->fd) {
3063 #ifdef MAP_POPULATE
3064 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3065 MAP_PRIVATE;
3066 #else
3067 flags |= MAP_PRIVATE;
3068 #endif
3069 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3070 flags, block->fd, offset);
3071 } else {
3072 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3073 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3074 flags, -1, 0);
3076 #else
3077 abort();
3078 #endif
3079 } else {
3080 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3081 flags |= MAP_SHARED | MAP_ANONYMOUS;
3082 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3083 flags, -1, 0);
3084 #else
3085 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3086 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3087 flags, -1, 0);
3088 #endif
3090 if (area != vaddr) {
3091 fprintf(stderr, "Could not remap addr: "
3092 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3093 length, addr);
3094 exit(1);
3096 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3098 return;
3102 #endif /* !_WIN32 */
3104 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3105 With the exception of the softmmu code in this file, this should
3106 only be used for local memory (e.g. video ram) that the device owns,
3107 and knows it isn't going to access beyond the end of the block.
3109 It should not be used for general purpose DMA.
3110 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3112 void *qemu_get_ram_ptr(ram_addr_t addr)
3114 RAMBlock *block;
3116 QLIST_FOREACH(block, &ram_list.blocks, next) {
3117 if (addr - block->offset < block->length) {
3118 /* Move this entry to to start of the list. */
3119 if (block != QLIST_FIRST(&ram_list.blocks)) {
3120 QLIST_REMOVE(block, next);
3121 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3123 if (xen_enabled()) {
3124 /* We need to check if the requested address is in the RAM
3125 * because we don't want to map the entire memory in QEMU.
3126 * In that case just map until the end of the page.
3128 if (block->offset == 0) {
3129 return xen_map_cache(addr, 0, 0);
3130 } else if (block->host == NULL) {
3131 block->host =
3132 xen_map_cache(block->offset, block->length, 1);
3135 return block->host + (addr - block->offset);
3139 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3140 abort();
3142 return NULL;
3145 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3146 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3148 void *qemu_safe_ram_ptr(ram_addr_t addr)
3150 RAMBlock *block;
3152 QLIST_FOREACH(block, &ram_list.blocks, next) {
3153 if (addr - block->offset < block->length) {
3154 if (xen_enabled()) {
3155 /* We need to check if the requested address is in the RAM
3156 * because we don't want to map the entire memory in QEMU.
3157 * In that case just map until the end of the page.
3159 if (block->offset == 0) {
3160 return xen_map_cache(addr, 0, 0);
3161 } else if (block->host == NULL) {
3162 block->host =
3163 xen_map_cache(block->offset, block->length, 1);
3166 return block->host + (addr - block->offset);
3170 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3171 abort();
3173 return NULL;
3176 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3177 * but takes a size argument */
3178 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3180 if (*size == 0) {
3181 return NULL;
3183 if (xen_enabled()) {
3184 return xen_map_cache(addr, *size, 1);
3185 } else {
3186 RAMBlock *block;
3188 QLIST_FOREACH(block, &ram_list.blocks, next) {
3189 if (addr - block->offset < block->length) {
3190 if (addr - block->offset + *size > block->length)
3191 *size = block->length - addr + block->offset;
3192 return block->host + (addr - block->offset);
3196 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3197 abort();
3201 void qemu_put_ram_ptr(void *addr)
3203 trace_qemu_put_ram_ptr(addr);
3206 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3208 RAMBlock *block;
3209 uint8_t *host = ptr;
3211 if (xen_enabled()) {
3212 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3213 return 0;
3216 QLIST_FOREACH(block, &ram_list.blocks, next) {
3217 /* This case append when the block is not mapped. */
3218 if (block->host == NULL) {
3219 continue;
3221 if (host - block->host < block->length) {
3222 *ram_addr = block->offset + (host - block->host);
3223 return 0;
3227 return -1;
3230 /* Some of the softmmu routines need to translate from a host pointer
3231 (typically a TLB entry) back to a ram offset. */
3232 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3234 ram_addr_t ram_addr;
3236 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3237 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3238 abort();
3240 return ram_addr;
3243 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3245 #ifdef DEBUG_UNASSIGNED
3246 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3247 #endif
3248 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3249 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3250 #endif
3251 return 0;
3254 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3256 #ifdef DEBUG_UNASSIGNED
3257 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3258 #endif
3259 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3260 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3261 #endif
3262 return 0;
3265 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3267 #ifdef DEBUG_UNASSIGNED
3268 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3269 #endif
3270 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3271 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3272 #endif
3273 return 0;
3276 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3278 #ifdef DEBUG_UNASSIGNED
3279 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3280 #endif
3281 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3282 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3283 #endif
3286 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3288 #ifdef DEBUG_UNASSIGNED
3289 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3290 #endif
3291 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3292 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3293 #endif
3296 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3298 #ifdef DEBUG_UNASSIGNED
3299 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3300 #endif
3301 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3302 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3303 #endif
3306 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3307 unassigned_mem_readb,
3308 unassigned_mem_readw,
3309 unassigned_mem_readl,
3312 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3313 unassigned_mem_writeb,
3314 unassigned_mem_writew,
3315 unassigned_mem_writel,
3318 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3319 uint32_t val)
3321 int dirty_flags;
3322 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3323 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3324 #if !defined(CONFIG_USER_ONLY)
3325 tb_invalidate_phys_page_fast(ram_addr, 1);
3326 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3327 #endif
3329 stb_p(qemu_get_ram_ptr(ram_addr), val);
3330 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3331 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3332 /* we remove the notdirty callback only if the code has been
3333 flushed */
3334 if (dirty_flags == 0xff)
3335 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3338 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3339 uint32_t val)
3341 int dirty_flags;
3342 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3343 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3344 #if !defined(CONFIG_USER_ONLY)
3345 tb_invalidate_phys_page_fast(ram_addr, 2);
3346 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3347 #endif
3349 stw_p(qemu_get_ram_ptr(ram_addr), val);
3350 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3351 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3352 /* we remove the notdirty callback only if the code has been
3353 flushed */
3354 if (dirty_flags == 0xff)
3355 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3358 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3359 uint32_t val)
3361 int dirty_flags;
3362 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3363 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3364 #if !defined(CONFIG_USER_ONLY)
3365 tb_invalidate_phys_page_fast(ram_addr, 4);
3366 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3367 #endif
3369 stl_p(qemu_get_ram_ptr(ram_addr), val);
3370 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3371 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3372 /* we remove the notdirty callback only if the code has been
3373 flushed */
3374 if (dirty_flags == 0xff)
3375 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3378 static CPUReadMemoryFunc * const error_mem_read[3] = {
3379 NULL, /* never used */
3380 NULL, /* never used */
3381 NULL, /* never used */
3384 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3385 notdirty_mem_writeb,
3386 notdirty_mem_writew,
3387 notdirty_mem_writel,
3390 /* Generate a debug exception if a watchpoint has been hit. */
3391 static void check_watchpoint(int offset, int len_mask, int flags)
3393 CPUState *env = cpu_single_env;
3394 target_ulong pc, cs_base;
3395 TranslationBlock *tb;
3396 target_ulong vaddr;
3397 CPUWatchpoint *wp;
3398 int cpu_flags;
3400 if (env->watchpoint_hit) {
3401 /* We re-entered the check after replacing the TB. Now raise
3402 * the debug interrupt so that is will trigger after the
3403 * current instruction. */
3404 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3405 return;
3407 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3408 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3409 if ((vaddr == (wp->vaddr & len_mask) ||
3410 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3411 wp->flags |= BP_WATCHPOINT_HIT;
3412 if (!env->watchpoint_hit) {
3413 env->watchpoint_hit = wp;
3414 tb = tb_find_pc(env->mem_io_pc);
3415 if (!tb) {
3416 cpu_abort(env, "check_watchpoint: could not find TB for "
3417 "pc=%p", (void *)env->mem_io_pc);
3419 cpu_restore_state(tb, env, env->mem_io_pc);
3420 tb_phys_invalidate(tb, -1);
3421 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3422 env->exception_index = EXCP_DEBUG;
3423 } else {
3424 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3425 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3427 cpu_resume_from_signal(env, NULL);
3429 } else {
3430 wp->flags &= ~BP_WATCHPOINT_HIT;
3435 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3436 so these check for a hit then pass through to the normal out-of-line
3437 phys routines. */
3438 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3440 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3441 return ldub_phys(addr);
3444 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3446 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3447 return lduw_phys(addr);
3450 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3452 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3453 return ldl_phys(addr);
3456 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3457 uint32_t val)
3459 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3460 stb_phys(addr, val);
3463 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3464 uint32_t val)
3466 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3467 stw_phys(addr, val);
3470 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3471 uint32_t val)
3473 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3474 stl_phys(addr, val);
3477 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3478 watch_mem_readb,
3479 watch_mem_readw,
3480 watch_mem_readl,
3483 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3484 watch_mem_writeb,
3485 watch_mem_writew,
3486 watch_mem_writel,
3489 static inline uint32_t subpage_readlen (subpage_t *mmio,
3490 target_phys_addr_t addr,
3491 unsigned int len)
3493 unsigned int idx = SUBPAGE_IDX(addr);
3494 #if defined(DEBUG_SUBPAGE)
3495 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3496 mmio, len, addr, idx);
3497 #endif
3499 addr += mmio->region_offset[idx];
3500 idx = mmio->sub_io_index[idx];
3501 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3504 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3505 uint32_t value, unsigned int len)
3507 unsigned int idx = SUBPAGE_IDX(addr);
3508 #if defined(DEBUG_SUBPAGE)
3509 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3510 __func__, mmio, len, addr, idx, value);
3511 #endif
3513 addr += mmio->region_offset[idx];
3514 idx = mmio->sub_io_index[idx];
3515 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3518 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3520 return subpage_readlen(opaque, addr, 0);
3523 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3524 uint32_t value)
3526 subpage_writelen(opaque, addr, value, 0);
3529 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3531 return subpage_readlen(opaque, addr, 1);
3534 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3535 uint32_t value)
3537 subpage_writelen(opaque, addr, value, 1);
3540 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3542 return subpage_readlen(opaque, addr, 2);
3545 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3546 uint32_t value)
3548 subpage_writelen(opaque, addr, value, 2);
3551 static CPUReadMemoryFunc * const subpage_read[] = {
3552 &subpage_readb,
3553 &subpage_readw,
3554 &subpage_readl,
3557 static CPUWriteMemoryFunc * const subpage_write[] = {
3558 &subpage_writeb,
3559 &subpage_writew,
3560 &subpage_writel,
3563 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3564 ram_addr_t memory, ram_addr_t region_offset)
3566 int idx, eidx;
3568 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3569 return -1;
3570 idx = SUBPAGE_IDX(start);
3571 eidx = SUBPAGE_IDX(end);
3572 #if defined(DEBUG_SUBPAGE)
3573 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3574 mmio, start, end, idx, eidx, memory);
3575 #endif
3576 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3577 memory = IO_MEM_UNASSIGNED;
3578 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3579 for (; idx <= eidx; idx++) {
3580 mmio->sub_io_index[idx] = memory;
3581 mmio->region_offset[idx] = region_offset;
3584 return 0;
3587 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3588 ram_addr_t orig_memory,
3589 ram_addr_t region_offset)
3591 subpage_t *mmio;
3592 int subpage_memory;
3594 mmio = qemu_mallocz(sizeof(subpage_t));
3596 mmio->base = base;
3597 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3598 DEVICE_NATIVE_ENDIAN);
3599 #if defined(DEBUG_SUBPAGE)
3600 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3601 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3602 #endif
3603 *phys = subpage_memory | IO_MEM_SUBPAGE;
3604 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3606 return mmio;
3609 static int get_free_io_mem_idx(void)
3611 int i;
3613 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3614 if (!io_mem_used[i]) {
3615 io_mem_used[i] = 1;
3616 return i;
3618 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3619 return -1;
3623 * Usually, devices operate in little endian mode. There are devices out
3624 * there that operate in big endian too. Each device gets byte swapped
3625 * mmio if plugged onto a CPU that does the other endianness.
3627 * CPU Device swap?
3629 * little little no
3630 * little big yes
3631 * big little yes
3632 * big big no
3635 typedef struct SwapEndianContainer {
3636 CPUReadMemoryFunc *read[3];
3637 CPUWriteMemoryFunc *write[3];
3638 void *opaque;
3639 } SwapEndianContainer;
3641 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3643 uint32_t val;
3644 SwapEndianContainer *c = opaque;
3645 val = c->read[0](c->opaque, addr);
3646 return val;
3649 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3651 uint32_t val;
3652 SwapEndianContainer *c = opaque;
3653 val = bswap16(c->read[1](c->opaque, addr));
3654 return val;
3657 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3659 uint32_t val;
3660 SwapEndianContainer *c = opaque;
3661 val = bswap32(c->read[2](c->opaque, addr));
3662 return val;
3665 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3666 swapendian_mem_readb,
3667 swapendian_mem_readw,
3668 swapendian_mem_readl
3671 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3672 uint32_t val)
3674 SwapEndianContainer *c = opaque;
3675 c->write[0](c->opaque, addr, val);
3678 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3679 uint32_t val)
3681 SwapEndianContainer *c = opaque;
3682 c->write[1](c->opaque, addr, bswap16(val));
3685 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3686 uint32_t val)
3688 SwapEndianContainer *c = opaque;
3689 c->write[2](c->opaque, addr, bswap32(val));
3692 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3693 swapendian_mem_writeb,
3694 swapendian_mem_writew,
3695 swapendian_mem_writel
3698 static void swapendian_init(int io_index)
3700 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3701 int i;
3703 /* Swap mmio for big endian targets */
3704 c->opaque = io_mem_opaque[io_index];
3705 for (i = 0; i < 3; i++) {
3706 c->read[i] = io_mem_read[io_index][i];
3707 c->write[i] = io_mem_write[io_index][i];
3709 io_mem_read[io_index][i] = swapendian_readfn[i];
3710 io_mem_write[io_index][i] = swapendian_writefn[i];
3712 io_mem_opaque[io_index] = c;
3715 static void swapendian_del(int io_index)
3717 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3718 qemu_free(io_mem_opaque[io_index]);
3722 /* mem_read and mem_write are arrays of functions containing the
3723 function to access byte (index 0), word (index 1) and dword (index
3724 2). Functions can be omitted with a NULL function pointer.
3725 If io_index is non zero, the corresponding io zone is
3726 modified. If it is zero, a new io zone is allocated. The return
3727 value can be used with cpu_register_physical_memory(). (-1) is
3728 returned if error. */
3729 static int cpu_register_io_memory_fixed(int io_index,
3730 CPUReadMemoryFunc * const *mem_read,
3731 CPUWriteMemoryFunc * const *mem_write,
3732 void *opaque, enum device_endian endian)
3734 int i;
3736 if (io_index <= 0) {
3737 io_index = get_free_io_mem_idx();
3738 if (io_index == -1)
3739 return io_index;
3740 } else {
3741 io_index >>= IO_MEM_SHIFT;
3742 if (io_index >= IO_MEM_NB_ENTRIES)
3743 return -1;
3746 for (i = 0; i < 3; ++i) {
3747 io_mem_read[io_index][i]
3748 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3750 for (i = 0; i < 3; ++i) {
3751 io_mem_write[io_index][i]
3752 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3754 io_mem_opaque[io_index] = opaque;
3756 switch (endian) {
3757 case DEVICE_BIG_ENDIAN:
3758 #ifndef TARGET_WORDS_BIGENDIAN
3759 swapendian_init(io_index);
3760 #endif
3761 break;
3762 case DEVICE_LITTLE_ENDIAN:
3763 #ifdef TARGET_WORDS_BIGENDIAN
3764 swapendian_init(io_index);
3765 #endif
3766 break;
3767 case DEVICE_NATIVE_ENDIAN:
3768 default:
3769 break;
3772 return (io_index << IO_MEM_SHIFT);
3775 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3776 CPUWriteMemoryFunc * const *mem_write,
3777 void *opaque, enum device_endian endian)
3779 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3782 void cpu_unregister_io_memory(int io_table_address)
3784 int i;
3785 int io_index = io_table_address >> IO_MEM_SHIFT;
3787 swapendian_del(io_index);
3789 for (i=0;i < 3; i++) {
3790 io_mem_read[io_index][i] = unassigned_mem_read[i];
3791 io_mem_write[io_index][i] = unassigned_mem_write[i];
3793 io_mem_opaque[io_index] = NULL;
3794 io_mem_used[io_index] = 0;
3797 static void io_mem_init(void)
3799 int i;
3801 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3802 unassigned_mem_write, NULL,
3803 DEVICE_NATIVE_ENDIAN);
3804 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3805 unassigned_mem_write, NULL,
3806 DEVICE_NATIVE_ENDIAN);
3807 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3808 notdirty_mem_write, NULL,
3809 DEVICE_NATIVE_ENDIAN);
3810 for (i=0; i<5; i++)
3811 io_mem_used[i] = 1;
3813 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3814 watch_mem_write, NULL,
3815 DEVICE_NATIVE_ENDIAN);
3818 static void memory_map_init(void)
3820 system_memory = qemu_malloc(sizeof(*system_memory));
3821 memory_region_init(system_memory, "system", UINT64_MAX);
3822 set_system_memory_map(system_memory);
3825 MemoryRegion *get_system_memory(void)
3827 return system_memory;
3830 #endif /* !defined(CONFIG_USER_ONLY) */
3832 /* physical memory access (slow version, mainly for debug) */
3833 #if defined(CONFIG_USER_ONLY)
3834 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3835 uint8_t *buf, int len, int is_write)
3837 int l, flags;
3838 target_ulong page;
3839 void * p;
3841 while (len > 0) {
3842 page = addr & TARGET_PAGE_MASK;
3843 l = (page + TARGET_PAGE_SIZE) - addr;
3844 if (l > len)
3845 l = len;
3846 flags = page_get_flags(page);
3847 if (!(flags & PAGE_VALID))
3848 return -1;
3849 if (is_write) {
3850 if (!(flags & PAGE_WRITE))
3851 return -1;
3852 /* XXX: this code should not depend on lock_user */
3853 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3854 return -1;
3855 memcpy(p, buf, l);
3856 unlock_user(p, addr, l);
3857 } else {
3858 if (!(flags & PAGE_READ))
3859 return -1;
3860 /* XXX: this code should not depend on lock_user */
3861 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3862 return -1;
3863 memcpy(buf, p, l);
3864 unlock_user(p, addr, 0);
3866 len -= l;
3867 buf += l;
3868 addr += l;
3870 return 0;
3873 #else
3874 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3875 int len, int is_write)
3877 int l, io_index;
3878 uint8_t *ptr;
3879 uint32_t val;
3880 target_phys_addr_t page;
3881 ram_addr_t pd;
3882 PhysPageDesc *p;
3884 while (len > 0) {
3885 page = addr & TARGET_PAGE_MASK;
3886 l = (page + TARGET_PAGE_SIZE) - addr;
3887 if (l > len)
3888 l = len;
3889 p = phys_page_find(page >> TARGET_PAGE_BITS);
3890 if (!p) {
3891 pd = IO_MEM_UNASSIGNED;
3892 } else {
3893 pd = p->phys_offset;
3896 if (is_write) {
3897 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3898 target_phys_addr_t addr1 = addr;
3899 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3900 if (p)
3901 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3902 /* XXX: could force cpu_single_env to NULL to avoid
3903 potential bugs */
3904 if (l >= 4 && ((addr1 & 3) == 0)) {
3905 /* 32 bit write access */
3906 val = ldl_p(buf);
3907 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3908 l = 4;
3909 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3910 /* 16 bit write access */
3911 val = lduw_p(buf);
3912 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3913 l = 2;
3914 } else {
3915 /* 8 bit write access */
3916 val = ldub_p(buf);
3917 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3918 l = 1;
3920 } else {
3921 ram_addr_t addr1;
3922 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3923 /* RAM case */
3924 ptr = qemu_get_ram_ptr(addr1);
3925 memcpy(ptr, buf, l);
3926 if (!cpu_physical_memory_is_dirty(addr1)) {
3927 /* invalidate code */
3928 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3929 /* set dirty bit */
3930 cpu_physical_memory_set_dirty_flags(
3931 addr1, (0xff & ~CODE_DIRTY_FLAG));
3933 qemu_put_ram_ptr(ptr);
3935 } else {
3936 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3937 !(pd & IO_MEM_ROMD)) {
3938 target_phys_addr_t addr1 = addr;
3939 /* I/O case */
3940 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3941 if (p)
3942 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3943 if (l >= 4 && ((addr1 & 3) == 0)) {
3944 /* 32 bit read access */
3945 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3946 stl_p(buf, val);
3947 l = 4;
3948 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3949 /* 16 bit read access */
3950 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3951 stw_p(buf, val);
3952 l = 2;
3953 } else {
3954 /* 8 bit read access */
3955 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3956 stb_p(buf, val);
3957 l = 1;
3959 } else {
3960 /* RAM case */
3961 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3962 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3963 qemu_put_ram_ptr(ptr);
3966 len -= l;
3967 buf += l;
3968 addr += l;
3972 /* used for ROM loading : can write in RAM and ROM */
3973 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3974 const uint8_t *buf, int len)
3976 int l;
3977 uint8_t *ptr;
3978 target_phys_addr_t page;
3979 unsigned long pd;
3980 PhysPageDesc *p;
3982 while (len > 0) {
3983 page = addr & TARGET_PAGE_MASK;
3984 l = (page + TARGET_PAGE_SIZE) - addr;
3985 if (l > len)
3986 l = len;
3987 p = phys_page_find(page >> TARGET_PAGE_BITS);
3988 if (!p) {
3989 pd = IO_MEM_UNASSIGNED;
3990 } else {
3991 pd = p->phys_offset;
3994 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3995 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3996 !(pd & IO_MEM_ROMD)) {
3997 /* do nothing */
3998 } else {
3999 unsigned long addr1;
4000 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4001 /* ROM/RAM case */
4002 ptr = qemu_get_ram_ptr(addr1);
4003 memcpy(ptr, buf, l);
4004 qemu_put_ram_ptr(ptr);
4006 len -= l;
4007 buf += l;
4008 addr += l;
4012 typedef struct {
4013 void *buffer;
4014 target_phys_addr_t addr;
4015 target_phys_addr_t len;
4016 } BounceBuffer;
4018 static BounceBuffer bounce;
4020 typedef struct MapClient {
4021 void *opaque;
4022 void (*callback)(void *opaque);
4023 QLIST_ENTRY(MapClient) link;
4024 } MapClient;
4026 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4027 = QLIST_HEAD_INITIALIZER(map_client_list);
4029 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4031 MapClient *client = qemu_malloc(sizeof(*client));
4033 client->opaque = opaque;
4034 client->callback = callback;
4035 QLIST_INSERT_HEAD(&map_client_list, client, link);
4036 return client;
4039 void cpu_unregister_map_client(void *_client)
4041 MapClient *client = (MapClient *)_client;
4043 QLIST_REMOVE(client, link);
4044 qemu_free(client);
4047 static void cpu_notify_map_clients(void)
4049 MapClient *client;
4051 while (!QLIST_EMPTY(&map_client_list)) {
4052 client = QLIST_FIRST(&map_client_list);
4053 client->callback(client->opaque);
4054 cpu_unregister_map_client(client);
4058 /* Map a physical memory region into a host virtual address.
4059 * May map a subset of the requested range, given by and returned in *plen.
4060 * May return NULL if resources needed to perform the mapping are exhausted.
4061 * Use only for reads OR writes - not for read-modify-write operations.
4062 * Use cpu_register_map_client() to know when retrying the map operation is
4063 * likely to succeed.
4065 void *cpu_physical_memory_map(target_phys_addr_t addr,
4066 target_phys_addr_t *plen,
4067 int is_write)
4069 target_phys_addr_t len = *plen;
4070 target_phys_addr_t todo = 0;
4071 int l;
4072 target_phys_addr_t page;
4073 unsigned long pd;
4074 PhysPageDesc *p;
4075 ram_addr_t raddr = RAM_ADDR_MAX;
4076 ram_addr_t rlen;
4077 void *ret;
4079 while (len > 0) {
4080 page = addr & TARGET_PAGE_MASK;
4081 l = (page + TARGET_PAGE_SIZE) - addr;
4082 if (l > len)
4083 l = len;
4084 p = phys_page_find(page >> TARGET_PAGE_BITS);
4085 if (!p) {
4086 pd = IO_MEM_UNASSIGNED;
4087 } else {
4088 pd = p->phys_offset;
4091 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4092 if (todo || bounce.buffer) {
4093 break;
4095 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4096 bounce.addr = addr;
4097 bounce.len = l;
4098 if (!is_write) {
4099 cpu_physical_memory_read(addr, bounce.buffer, l);
4102 *plen = l;
4103 return bounce.buffer;
4105 if (!todo) {
4106 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4109 len -= l;
4110 addr += l;
4111 todo += l;
4113 rlen = todo;
4114 ret = qemu_ram_ptr_length(raddr, &rlen);
4115 *plen = rlen;
4116 return ret;
4119 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4120 * Will also mark the memory as dirty if is_write == 1. access_len gives
4121 * the amount of memory that was actually read or written by the caller.
4123 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4124 int is_write, target_phys_addr_t access_len)
4126 if (buffer != bounce.buffer) {
4127 if (is_write) {
4128 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4129 while (access_len) {
4130 unsigned l;
4131 l = TARGET_PAGE_SIZE;
4132 if (l > access_len)
4133 l = access_len;
4134 if (!cpu_physical_memory_is_dirty(addr1)) {
4135 /* invalidate code */
4136 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4137 /* set dirty bit */
4138 cpu_physical_memory_set_dirty_flags(
4139 addr1, (0xff & ~CODE_DIRTY_FLAG));
4141 addr1 += l;
4142 access_len -= l;
4145 if (xen_enabled()) {
4146 xen_invalidate_map_cache_entry(buffer);
4148 return;
4150 if (is_write) {
4151 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4153 qemu_vfree(bounce.buffer);
4154 bounce.buffer = NULL;
4155 cpu_notify_map_clients();
4158 /* warning: addr must be aligned */
4159 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4160 enum device_endian endian)
4162 int io_index;
4163 uint8_t *ptr;
4164 uint32_t val;
4165 unsigned long pd;
4166 PhysPageDesc *p;
4168 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4169 if (!p) {
4170 pd = IO_MEM_UNASSIGNED;
4171 } else {
4172 pd = p->phys_offset;
4175 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4176 !(pd & IO_MEM_ROMD)) {
4177 /* I/O case */
4178 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4179 if (p)
4180 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4181 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4182 #if defined(TARGET_WORDS_BIGENDIAN)
4183 if (endian == DEVICE_LITTLE_ENDIAN) {
4184 val = bswap32(val);
4186 #else
4187 if (endian == DEVICE_BIG_ENDIAN) {
4188 val = bswap32(val);
4190 #endif
4191 } else {
4192 /* RAM case */
4193 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4194 (addr & ~TARGET_PAGE_MASK);
4195 switch (endian) {
4196 case DEVICE_LITTLE_ENDIAN:
4197 val = ldl_le_p(ptr);
4198 break;
4199 case DEVICE_BIG_ENDIAN:
4200 val = ldl_be_p(ptr);
4201 break;
4202 default:
4203 val = ldl_p(ptr);
4204 break;
4207 return val;
4210 uint32_t ldl_phys(target_phys_addr_t addr)
4212 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4215 uint32_t ldl_le_phys(target_phys_addr_t addr)
4217 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4220 uint32_t ldl_be_phys(target_phys_addr_t addr)
4222 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4225 /* warning: addr must be aligned */
4226 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4227 enum device_endian endian)
4229 int io_index;
4230 uint8_t *ptr;
4231 uint64_t val;
4232 unsigned long pd;
4233 PhysPageDesc *p;
4235 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4236 if (!p) {
4237 pd = IO_MEM_UNASSIGNED;
4238 } else {
4239 pd = p->phys_offset;
4242 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4243 !(pd & IO_MEM_ROMD)) {
4244 /* I/O case */
4245 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4246 if (p)
4247 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4249 /* XXX This is broken when device endian != cpu endian.
4250 Fix and add "endian" variable check */
4251 #ifdef TARGET_WORDS_BIGENDIAN
4252 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4253 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4254 #else
4255 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4256 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4257 #endif
4258 } else {
4259 /* RAM case */
4260 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4261 (addr & ~TARGET_PAGE_MASK);
4262 switch (endian) {
4263 case DEVICE_LITTLE_ENDIAN:
4264 val = ldq_le_p(ptr);
4265 break;
4266 case DEVICE_BIG_ENDIAN:
4267 val = ldq_be_p(ptr);
4268 break;
4269 default:
4270 val = ldq_p(ptr);
4271 break;
4274 return val;
4277 uint64_t ldq_phys(target_phys_addr_t addr)
4279 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4282 uint64_t ldq_le_phys(target_phys_addr_t addr)
4284 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4287 uint64_t ldq_be_phys(target_phys_addr_t addr)
4289 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4292 /* XXX: optimize */
4293 uint32_t ldub_phys(target_phys_addr_t addr)
4295 uint8_t val;
4296 cpu_physical_memory_read(addr, &val, 1);
4297 return val;
4300 /* warning: addr must be aligned */
4301 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4302 enum device_endian endian)
4304 int io_index;
4305 uint8_t *ptr;
4306 uint64_t val;
4307 unsigned long pd;
4308 PhysPageDesc *p;
4310 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4311 if (!p) {
4312 pd = IO_MEM_UNASSIGNED;
4313 } else {
4314 pd = p->phys_offset;
4317 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4318 !(pd & IO_MEM_ROMD)) {
4319 /* I/O case */
4320 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4321 if (p)
4322 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4323 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4324 #if defined(TARGET_WORDS_BIGENDIAN)
4325 if (endian == DEVICE_LITTLE_ENDIAN) {
4326 val = bswap16(val);
4328 #else
4329 if (endian == DEVICE_BIG_ENDIAN) {
4330 val = bswap16(val);
4332 #endif
4333 } else {
4334 /* RAM case */
4335 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4336 (addr & ~TARGET_PAGE_MASK);
4337 switch (endian) {
4338 case DEVICE_LITTLE_ENDIAN:
4339 val = lduw_le_p(ptr);
4340 break;
4341 case DEVICE_BIG_ENDIAN:
4342 val = lduw_be_p(ptr);
4343 break;
4344 default:
4345 val = lduw_p(ptr);
4346 break;
4349 return val;
4352 uint32_t lduw_phys(target_phys_addr_t addr)
4354 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4357 uint32_t lduw_le_phys(target_phys_addr_t addr)
4359 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4362 uint32_t lduw_be_phys(target_phys_addr_t addr)
4364 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4367 /* warning: addr must be aligned. The ram page is not masked as dirty
4368 and the code inside is not invalidated. It is useful if the dirty
4369 bits are used to track modified PTEs */
4370 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4372 int io_index;
4373 uint8_t *ptr;
4374 unsigned long pd;
4375 PhysPageDesc *p;
4377 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4378 if (!p) {
4379 pd = IO_MEM_UNASSIGNED;
4380 } else {
4381 pd = p->phys_offset;
4384 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4385 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4386 if (p)
4387 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4388 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4389 } else {
4390 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4391 ptr = qemu_get_ram_ptr(addr1);
4392 stl_p(ptr, val);
4394 if (unlikely(in_migration)) {
4395 if (!cpu_physical_memory_is_dirty(addr1)) {
4396 /* invalidate code */
4397 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4398 /* set dirty bit */
4399 cpu_physical_memory_set_dirty_flags(
4400 addr1, (0xff & ~CODE_DIRTY_FLAG));
4406 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4408 int io_index;
4409 uint8_t *ptr;
4410 unsigned long pd;
4411 PhysPageDesc *p;
4413 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4414 if (!p) {
4415 pd = IO_MEM_UNASSIGNED;
4416 } else {
4417 pd = p->phys_offset;
4420 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4421 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4422 if (p)
4423 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4424 #ifdef TARGET_WORDS_BIGENDIAN
4425 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4426 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4427 #else
4428 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4429 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4430 #endif
4431 } else {
4432 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4433 (addr & ~TARGET_PAGE_MASK);
4434 stq_p(ptr, val);
4438 /* warning: addr must be aligned */
4439 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4440 enum device_endian endian)
4442 int io_index;
4443 uint8_t *ptr;
4444 unsigned long pd;
4445 PhysPageDesc *p;
4447 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4448 if (!p) {
4449 pd = IO_MEM_UNASSIGNED;
4450 } else {
4451 pd = p->phys_offset;
4454 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4455 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4456 if (p)
4457 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4458 #if defined(TARGET_WORDS_BIGENDIAN)
4459 if (endian == DEVICE_LITTLE_ENDIAN) {
4460 val = bswap32(val);
4462 #else
4463 if (endian == DEVICE_BIG_ENDIAN) {
4464 val = bswap32(val);
4466 #endif
4467 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4468 } else {
4469 unsigned long addr1;
4470 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4471 /* RAM case */
4472 ptr = qemu_get_ram_ptr(addr1);
4473 switch (endian) {
4474 case DEVICE_LITTLE_ENDIAN:
4475 stl_le_p(ptr, val);
4476 break;
4477 case DEVICE_BIG_ENDIAN:
4478 stl_be_p(ptr, val);
4479 break;
4480 default:
4481 stl_p(ptr, val);
4482 break;
4484 if (!cpu_physical_memory_is_dirty(addr1)) {
4485 /* invalidate code */
4486 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4487 /* set dirty bit */
4488 cpu_physical_memory_set_dirty_flags(addr1,
4489 (0xff & ~CODE_DIRTY_FLAG));
4494 void stl_phys(target_phys_addr_t addr, uint32_t val)
4496 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4499 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4501 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4504 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4506 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4509 /* XXX: optimize */
4510 void stb_phys(target_phys_addr_t addr, uint32_t val)
4512 uint8_t v = val;
4513 cpu_physical_memory_write(addr, &v, 1);
4516 /* warning: addr must be aligned */
4517 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4518 enum device_endian endian)
4520 int io_index;
4521 uint8_t *ptr;
4522 unsigned long pd;
4523 PhysPageDesc *p;
4525 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4526 if (!p) {
4527 pd = IO_MEM_UNASSIGNED;
4528 } else {
4529 pd = p->phys_offset;
4532 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4533 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4534 if (p)
4535 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4536 #if defined(TARGET_WORDS_BIGENDIAN)
4537 if (endian == DEVICE_LITTLE_ENDIAN) {
4538 val = bswap16(val);
4540 #else
4541 if (endian == DEVICE_BIG_ENDIAN) {
4542 val = bswap16(val);
4544 #endif
4545 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4546 } else {
4547 unsigned long addr1;
4548 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4549 /* RAM case */
4550 ptr = qemu_get_ram_ptr(addr1);
4551 switch (endian) {
4552 case DEVICE_LITTLE_ENDIAN:
4553 stw_le_p(ptr, val);
4554 break;
4555 case DEVICE_BIG_ENDIAN:
4556 stw_be_p(ptr, val);
4557 break;
4558 default:
4559 stw_p(ptr, val);
4560 break;
4562 if (!cpu_physical_memory_is_dirty(addr1)) {
4563 /* invalidate code */
4564 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4565 /* set dirty bit */
4566 cpu_physical_memory_set_dirty_flags(addr1,
4567 (0xff & ~CODE_DIRTY_FLAG));
4572 void stw_phys(target_phys_addr_t addr, uint32_t val)
4574 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4577 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4579 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4582 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4584 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4587 /* XXX: optimize */
4588 void stq_phys(target_phys_addr_t addr, uint64_t val)
4590 val = tswap64(val);
4591 cpu_physical_memory_write(addr, &val, 8);
4594 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4596 val = cpu_to_le64(val);
4597 cpu_physical_memory_write(addr, &val, 8);
4600 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4602 val = cpu_to_be64(val);
4603 cpu_physical_memory_write(addr, &val, 8);
4606 /* virtual memory access for debug (includes writing to ROM) */
4607 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4608 uint8_t *buf, int len, int is_write)
4610 int l;
4611 target_phys_addr_t phys_addr;
4612 target_ulong page;
4614 while (len > 0) {
4615 page = addr & TARGET_PAGE_MASK;
4616 phys_addr = cpu_get_phys_page_debug(env, page);
4617 /* if no physical page mapped, return an error */
4618 if (phys_addr == -1)
4619 return -1;
4620 l = (page + TARGET_PAGE_SIZE) - addr;
4621 if (l > len)
4622 l = len;
4623 phys_addr += (addr & ~TARGET_PAGE_MASK);
4624 if (is_write)
4625 cpu_physical_memory_write_rom(phys_addr, buf, l);
4626 else
4627 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4628 len -= l;
4629 buf += l;
4630 addr += l;
4632 return 0;
4634 #endif
4636 /* in deterministic execution mode, instructions doing device I/Os
4637 must be at the end of the TB */
4638 void cpu_io_recompile(CPUState *env, void *retaddr)
4640 TranslationBlock *tb;
4641 uint32_t n, cflags;
4642 target_ulong pc, cs_base;
4643 uint64_t flags;
4645 tb = tb_find_pc((unsigned long)retaddr);
4646 if (!tb) {
4647 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4648 retaddr);
4650 n = env->icount_decr.u16.low + tb->icount;
4651 cpu_restore_state(tb, env, (unsigned long)retaddr);
4652 /* Calculate how many instructions had been executed before the fault
4653 occurred. */
4654 n = n - env->icount_decr.u16.low;
4655 /* Generate a new TB ending on the I/O insn. */
4656 n++;
4657 /* On MIPS and SH, delay slot instructions can only be restarted if
4658 they were already the first instruction in the TB. If this is not
4659 the first instruction in a TB then re-execute the preceding
4660 branch. */
4661 #if defined(TARGET_MIPS)
4662 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4663 env->active_tc.PC -= 4;
4664 env->icount_decr.u16.low++;
4665 env->hflags &= ~MIPS_HFLAG_BMASK;
4667 #elif defined(TARGET_SH4)
4668 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4669 && n > 1) {
4670 env->pc -= 2;
4671 env->icount_decr.u16.low++;
4672 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4674 #endif
4675 /* This should never happen. */
4676 if (n > CF_COUNT_MASK)
4677 cpu_abort(env, "TB too big during recompile");
4679 cflags = n | CF_LAST_IO;
4680 pc = tb->pc;
4681 cs_base = tb->cs_base;
4682 flags = tb->flags;
4683 tb_phys_invalidate(tb, -1);
4684 /* FIXME: In theory this could raise an exception. In practice
4685 we have already translated the block once so it's probably ok. */
4686 tb_gen_code(env, pc, cs_base, flags, cflags);
4687 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4688 the first in the TB) then we end up generating a whole new TB and
4689 repeating the fault, which is horribly inefficient.
4690 Better would be to execute just this insn uncached, or generate a
4691 second new TB. */
4692 cpu_resume_from_signal(env, NULL);
4695 #if !defined(CONFIG_USER_ONLY)
4697 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4699 int i, target_code_size, max_target_code_size;
4700 int direct_jmp_count, direct_jmp2_count, cross_page;
4701 TranslationBlock *tb;
4703 target_code_size = 0;
4704 max_target_code_size = 0;
4705 cross_page = 0;
4706 direct_jmp_count = 0;
4707 direct_jmp2_count = 0;
4708 for(i = 0; i < nb_tbs; i++) {
4709 tb = &tbs[i];
4710 target_code_size += tb->size;
4711 if (tb->size > max_target_code_size)
4712 max_target_code_size = tb->size;
4713 if (tb->page_addr[1] != -1)
4714 cross_page++;
4715 if (tb->tb_next_offset[0] != 0xffff) {
4716 direct_jmp_count++;
4717 if (tb->tb_next_offset[1] != 0xffff) {
4718 direct_jmp2_count++;
4722 /* XXX: avoid using doubles ? */
4723 cpu_fprintf(f, "Translation buffer state:\n");
4724 cpu_fprintf(f, "gen code size %td/%ld\n",
4725 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4726 cpu_fprintf(f, "TB count %d/%d\n",
4727 nb_tbs, code_gen_max_blocks);
4728 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4729 nb_tbs ? target_code_size / nb_tbs : 0,
4730 max_target_code_size);
4731 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4732 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4733 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4734 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4735 cross_page,
4736 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4737 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4738 direct_jmp_count,
4739 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4740 direct_jmp2_count,
4741 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4742 cpu_fprintf(f, "\nStatistics:\n");
4743 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4744 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4745 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4746 tcg_dump_info(f, cpu_fprintf);
4749 #define MMUSUFFIX _cmmu
4750 #define GETPC() NULL
4751 #define env cpu_single_env
4752 #define SOFTMMU_CODE_ACCESS
4754 #define SHIFT 0
4755 #include "softmmu_template.h"
4757 #define SHIFT 1
4758 #include "softmmu_template.h"
4760 #define SHIFT 2
4761 #include "softmmu_template.h"
4763 #define SHIFT 3
4764 #include "softmmu_template.h"
4766 #undef env
4768 #endif