Use mmap to allocate execute memory
[qemu.git] / exec.c
blob719fff9a915f07d0ff2f7c313bceacb6a16332ab
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 //#define DEBUG_TB_INVALIDATE
61 //#define DEBUG_FLUSH
62 //#define DEBUG_TLB
63 //#define DEBUG_UNASSIGNED
65 /* make various TB consistency checks */
66 //#define DEBUG_TB_CHECK
67 //#define DEBUG_TLB_CHECK
69 //#define DEBUG_IOPORT
70 //#define DEBUG_SUBPAGE
72 #if !defined(CONFIG_USER_ONLY)
73 /* TB consistency checks only implemented for usermode emulation. */
74 #undef DEBUG_TB_CHECK
75 #endif
77 #define SMC_BITMAP_USE_THRESHOLD 10
79 static TranslationBlock *tbs;
80 static int code_gen_max_blocks;
81 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
82 static int nb_tbs;
83 /* any access to the tbs or the page table must use this lock */
84 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
86 #if defined(__arm__) || defined(__sparc_v9__)
87 /* The prologue must be reachable with a direct jump. ARM and Sparc64
88 have limited branch ranges (possibly also PPC) so place it in a
89 section close to code segment. */
90 #define code_gen_section \
91 __attribute__((__section__(".gen_code"))) \
92 __attribute__((aligned (32)))
93 #elif defined(_WIN32)
94 /* Maximum alignment for Win32 is 16. */
95 #define code_gen_section \
96 __attribute__((aligned (16)))
97 #else
98 #define code_gen_section \
99 __attribute__((aligned (32)))
100 #endif
102 uint8_t code_gen_prologue[1024] code_gen_section;
103 static uint8_t *code_gen_buffer;
104 static unsigned long code_gen_buffer_size;
105 /* threshold to flush the translated code buffer */
106 static unsigned long code_gen_buffer_max_size;
107 static uint8_t *code_gen_ptr;
109 #if !defined(CONFIG_USER_ONLY)
110 int phys_ram_fd;
111 static int in_migration;
113 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list) };
115 static MemoryRegion *system_memory;
117 #endif
119 CPUState *first_cpu;
120 /* current CPU in the current thread. It is only valid inside
121 cpu_exec() */
122 CPUState *cpu_single_env;
123 /* 0 = Do not count executed instructions.
124 1 = Precise instruction counting.
125 2 = Adaptive rate instruction counting. */
126 int use_icount = 0;
127 /* Current instruction counter. While executing translated code this may
128 include some instructions that have not yet been executed. */
129 int64_t qemu_icount;
131 typedef struct PageDesc {
132 /* list of TBs intersecting this ram page */
133 TranslationBlock *first_tb;
134 /* in order to optimize self modifying code, we count the number
135 of lookups we do to a given page to use a bitmap */
136 unsigned int code_write_count;
137 uint8_t *code_bitmap;
138 #if defined(CONFIG_USER_ONLY)
139 unsigned long flags;
140 #endif
141 } PageDesc;
143 /* In system mode we want L1_MAP to be based on ram offsets,
144 while in user mode we want it to be based on virtual addresses. */
145 #if !defined(CONFIG_USER_ONLY)
146 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
147 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
148 #else
149 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
150 #endif
151 #else
152 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
153 #endif
155 /* Size of the L2 (and L3, etc) page tables. */
156 #define L2_BITS 10
157 #define L2_SIZE (1 << L2_BITS)
159 /* The bits remaining after N lower levels of page tables. */
160 #define P_L1_BITS_REM \
161 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
162 #define V_L1_BITS_REM \
163 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
165 /* Size of the L1 page table. Avoid silly small sizes. */
166 #if P_L1_BITS_REM < 4
167 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
168 #else
169 #define P_L1_BITS P_L1_BITS_REM
170 #endif
172 #if V_L1_BITS_REM < 4
173 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
174 #else
175 #define V_L1_BITS V_L1_BITS_REM
176 #endif
178 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
179 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
181 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
182 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
184 unsigned long qemu_real_host_page_size;
185 unsigned long qemu_host_page_bits;
186 unsigned long qemu_host_page_size;
187 unsigned long qemu_host_page_mask;
189 /* This is a multi-level map on the virtual address space.
190 The bottom level has pointers to PageDesc. */
191 static void *l1_map[V_L1_SIZE];
193 #if !defined(CONFIG_USER_ONLY)
194 typedef struct PhysPageDesc {
195 /* offset in host memory of the page + io_index in the low bits */
196 ram_addr_t phys_offset;
197 ram_addr_t region_offset;
198 } PhysPageDesc;
200 /* This is a multi-level map on the physical address space.
201 The bottom level has pointers to PhysPageDesc. */
202 static void *l1_phys_map[P_L1_SIZE];
204 static void io_mem_init(void);
205 static void memory_map_init(void);
207 /* io memory support */
208 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
209 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
210 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
211 static char io_mem_used[IO_MEM_NB_ENTRIES];
212 static int io_mem_watch;
213 #endif
215 /* log support */
216 #ifdef WIN32
217 static const char *logfilename = "qemu.log";
218 #else
219 static const char *logfilename = "/tmp/qemu.log";
220 #endif
221 FILE *logfile;
222 int loglevel;
223 static int log_append = 0;
225 /* statistics */
226 #if !defined(CONFIG_USER_ONLY)
227 static int tlb_flush_count;
228 #endif
229 static int tb_flush_count;
230 static int tb_phys_invalidate_count;
232 #ifdef _WIN32
233 static void map_exec(void *addr, long size)
235 DWORD old_protect;
236 VirtualProtect(addr, size,
237 PAGE_EXECUTE_READWRITE, &old_protect);
240 #else
241 static void map_exec(void *addr, long size)
243 unsigned long start, end, page_size;
245 page_size = getpagesize();
246 start = (unsigned long)addr;
247 start &= ~(page_size - 1);
249 end = (unsigned long)addr + size;
250 end += page_size - 1;
251 end &= ~(page_size - 1);
253 mprotect((void *)start, end - start,
254 PROT_READ | PROT_WRITE | PROT_EXEC);
256 #endif
258 static void page_init(void)
260 /* NOTE: we can always suppose that qemu_host_page_size >=
261 TARGET_PAGE_SIZE */
262 #ifdef _WIN32
264 SYSTEM_INFO system_info;
266 GetSystemInfo(&system_info);
267 qemu_real_host_page_size = system_info.dwPageSize;
269 #else
270 qemu_real_host_page_size = getpagesize();
271 #endif
272 if (qemu_host_page_size == 0)
273 qemu_host_page_size = qemu_real_host_page_size;
274 if (qemu_host_page_size < TARGET_PAGE_SIZE)
275 qemu_host_page_size = TARGET_PAGE_SIZE;
276 qemu_host_page_bits = 0;
277 while ((1 << qemu_host_page_bits) < qemu_host_page_size)
278 qemu_host_page_bits++;
279 qemu_host_page_mask = ~(qemu_host_page_size - 1);
281 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
283 #ifdef HAVE_KINFO_GETVMMAP
284 struct kinfo_vmentry *freep;
285 int i, cnt;
287 freep = kinfo_getvmmap(getpid(), &cnt);
288 if (freep) {
289 mmap_lock();
290 for (i = 0; i < cnt; i++) {
291 unsigned long startaddr, endaddr;
293 startaddr = freep[i].kve_start;
294 endaddr = freep[i].kve_end;
295 if (h2g_valid(startaddr)) {
296 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
298 if (h2g_valid(endaddr)) {
299 endaddr = h2g(endaddr);
300 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
301 } else {
302 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
303 endaddr = ~0ul;
304 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
305 #endif
309 free(freep);
310 mmap_unlock();
312 #else
313 FILE *f;
315 last_brk = (unsigned long)sbrk(0);
317 f = fopen("/compat/linux/proc/self/maps", "r");
318 if (f) {
319 mmap_lock();
321 do {
322 unsigned long startaddr, endaddr;
323 int n;
325 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
327 if (n == 2 && h2g_valid(startaddr)) {
328 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
330 if (h2g_valid(endaddr)) {
331 endaddr = h2g(endaddr);
332 } else {
333 endaddr = ~0ul;
335 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
337 } while (!feof(f));
339 fclose(f);
340 mmap_unlock();
342 #endif
344 #endif
347 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
349 PageDesc *pd;
350 void **lp;
351 int i;
353 #if defined(CONFIG_USER_ONLY)
354 /* We can't use qemu_malloc because it may recurse into a locked mutex. */
355 # define ALLOC(P, SIZE) \
356 do { \
357 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
358 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
359 } while (0)
360 #else
361 # define ALLOC(P, SIZE) \
362 do { P = qemu_mallocz(SIZE); } while (0)
363 #endif
365 /* Level 1. Always allocated. */
366 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
368 /* Level 2..N-1. */
369 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
370 void **p = *lp;
372 if (p == NULL) {
373 if (!alloc) {
374 return NULL;
376 ALLOC(p, sizeof(void *) * L2_SIZE);
377 *lp = p;
380 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
383 pd = *lp;
384 if (pd == NULL) {
385 if (!alloc) {
386 return NULL;
388 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
389 *lp = pd;
392 #undef ALLOC
394 return pd + (index & (L2_SIZE - 1));
397 static inline PageDesc *page_find(tb_page_addr_t index)
399 return page_find_alloc(index, 0);
402 #if !defined(CONFIG_USER_ONLY)
403 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
405 PhysPageDesc *pd;
406 void **lp;
407 int i;
409 /* Level 1. Always allocated. */
410 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
412 /* Level 2..N-1. */
413 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
414 void **p = *lp;
415 if (p == NULL) {
416 if (!alloc) {
417 return NULL;
419 *lp = p = qemu_mallocz(sizeof(void *) * L2_SIZE);
421 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
424 pd = *lp;
425 if (pd == NULL) {
426 int i;
428 if (!alloc) {
429 return NULL;
432 *lp = pd = qemu_malloc(sizeof(PhysPageDesc) * L2_SIZE);
434 for (i = 0; i < L2_SIZE; i++) {
435 pd[i].phys_offset = IO_MEM_UNASSIGNED;
436 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
440 return pd + (index & (L2_SIZE - 1));
443 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
445 return phys_page_find_alloc(index, 0);
448 static void tlb_protect_code(ram_addr_t ram_addr);
449 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
450 target_ulong vaddr);
451 #define mmap_lock() do { } while(0)
452 #define mmap_unlock() do { } while(0)
453 #endif
455 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
457 #if defined(CONFIG_USER_ONLY)
458 /* Currently it is not recommended to allocate big chunks of data in
459 user mode. It will change when a dedicated libc will be used */
460 #define USE_STATIC_CODE_GEN_BUFFER
461 #endif
463 #ifdef USE_STATIC_CODE_GEN_BUFFER
464 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
465 __attribute__((aligned (CODE_GEN_ALIGN)));
466 #endif
468 static void code_gen_alloc(unsigned long tb_size)
470 #ifdef USE_STATIC_CODE_GEN_BUFFER
471 code_gen_buffer = static_code_gen_buffer;
472 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
473 map_exec(code_gen_buffer, code_gen_buffer_size);
474 #else
475 code_gen_buffer_size = tb_size;
476 if (code_gen_buffer_size == 0) {
477 #if defined(CONFIG_USER_ONLY)
478 /* in user mode, phys_ram_size is not meaningful */
479 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
480 #else
481 /* XXX: needs adjustments */
482 code_gen_buffer_size = (unsigned long)(ram_size / 4);
483 #endif
485 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
486 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
487 /* The code gen buffer location may have constraints depending on
488 the host cpu and OS */
489 #if defined(__linux__)
491 int flags;
492 void *start = NULL;
494 flags = MAP_PRIVATE | MAP_ANONYMOUS;
495 #if defined(__x86_64__)
496 flags |= MAP_32BIT;
497 /* Cannot map more than that */
498 if (code_gen_buffer_size > (800 * 1024 * 1024))
499 code_gen_buffer_size = (800 * 1024 * 1024);
500 #elif defined(__sparc_v9__)
501 // Map the buffer below 2G, so we can use direct calls and branches
502 flags |= MAP_FIXED;
503 start = (void *) 0x60000000UL;
504 if (code_gen_buffer_size > (512 * 1024 * 1024))
505 code_gen_buffer_size = (512 * 1024 * 1024);
506 #elif defined(__arm__)
507 /* Map the buffer below 32M, so we can use direct calls and branches */
508 flags |= MAP_FIXED;
509 start = (void *) 0x01000000UL;
510 if (code_gen_buffer_size > 16 * 1024 * 1024)
511 code_gen_buffer_size = 16 * 1024 * 1024;
512 #elif defined(__s390x__)
513 /* Map the buffer so that we can use direct calls and branches. */
514 /* We have a +- 4GB range on the branches; leave some slop. */
515 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
516 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
518 start = (void *)0x90000000UL;
519 #endif
520 code_gen_buffer = mmap(start, code_gen_buffer_size,
521 PROT_WRITE | PROT_READ | PROT_EXEC,
522 flags, -1, 0);
523 if (code_gen_buffer == MAP_FAILED) {
524 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
525 exit(1);
528 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
529 || defined(__DragonFly__) || defined(__OpenBSD__) \
530 || defined(__NetBSD__)
532 int flags;
533 void *addr = NULL;
534 flags = MAP_PRIVATE | MAP_ANONYMOUS;
535 #if defined(__x86_64__)
536 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
537 * 0x40000000 is free */
538 flags |= MAP_FIXED;
539 addr = (void *)0x40000000;
540 /* Cannot map more than that */
541 if (code_gen_buffer_size > (800 * 1024 * 1024))
542 code_gen_buffer_size = (800 * 1024 * 1024);
543 #elif defined(__sparc_v9__)
544 // Map the buffer below 2G, so we can use direct calls and branches
545 flags |= MAP_FIXED;
546 addr = (void *) 0x60000000UL;
547 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
548 code_gen_buffer_size = (512 * 1024 * 1024);
550 #endif
551 code_gen_buffer = mmap(addr, code_gen_buffer_size,
552 PROT_WRITE | PROT_READ | PROT_EXEC,
553 flags, -1, 0);
554 if (code_gen_buffer == MAP_FAILED) {
555 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
556 exit(1);
559 #else
560 code_gen_buffer = qemu_malloc(code_gen_buffer_size);
561 map_exec(code_gen_buffer, code_gen_buffer_size);
562 #endif
563 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
564 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
565 code_gen_buffer_max_size = code_gen_buffer_size -
566 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
567 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
568 tbs = qemu_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
571 /* Must be called before using the QEMU cpus. 'tb_size' is the size
572 (in bytes) allocated to the translation buffer. Zero means default
573 size. */
574 void tcg_exec_init(unsigned long tb_size)
576 cpu_gen_init();
577 code_gen_alloc(tb_size);
578 code_gen_ptr = code_gen_buffer;
579 page_init();
580 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
581 /* There's no guest base to take into account, so go ahead and
582 initialize the prologue now. */
583 tcg_prologue_init(&tcg_ctx);
584 #endif
587 bool tcg_enabled(void)
589 return code_gen_buffer != NULL;
592 void cpu_exec_init_all(void)
594 #if !defined(CONFIG_USER_ONLY)
595 memory_map_init();
596 io_mem_init();
597 #endif
600 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
602 static int cpu_common_post_load(void *opaque, int version_id)
604 CPUState *env = opaque;
606 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
607 version_id is increased. */
608 env->interrupt_request &= ~0x01;
609 tlb_flush(env, 1);
611 return 0;
614 static const VMStateDescription vmstate_cpu_common = {
615 .name = "cpu_common",
616 .version_id = 1,
617 .minimum_version_id = 1,
618 .minimum_version_id_old = 1,
619 .post_load = cpu_common_post_load,
620 .fields = (VMStateField []) {
621 VMSTATE_UINT32(halted, CPUState),
622 VMSTATE_UINT32(interrupt_request, CPUState),
623 VMSTATE_END_OF_LIST()
626 #endif
628 CPUState *qemu_get_cpu(int cpu)
630 CPUState *env = first_cpu;
632 while (env) {
633 if (env->cpu_index == cpu)
634 break;
635 env = env->next_cpu;
638 return env;
641 void cpu_exec_init(CPUState *env)
643 CPUState **penv;
644 int cpu_index;
646 #if defined(CONFIG_USER_ONLY)
647 cpu_list_lock();
648 #endif
649 env->next_cpu = NULL;
650 penv = &first_cpu;
651 cpu_index = 0;
652 while (*penv != NULL) {
653 penv = &(*penv)->next_cpu;
654 cpu_index++;
656 env->cpu_index = cpu_index;
657 env->numa_node = 0;
658 QTAILQ_INIT(&env->breakpoints);
659 QTAILQ_INIT(&env->watchpoints);
660 #ifndef CONFIG_USER_ONLY
661 env->thread_id = qemu_get_thread_id();
662 #endif
663 *penv = env;
664 #if defined(CONFIG_USER_ONLY)
665 cpu_list_unlock();
666 #endif
667 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
668 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
669 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
670 cpu_save, cpu_load, env);
671 #endif
674 /* Allocate a new translation block. Flush the translation buffer if
675 too many translation blocks or too much generated code. */
676 static TranslationBlock *tb_alloc(target_ulong pc)
678 TranslationBlock *tb;
680 if (nb_tbs >= code_gen_max_blocks ||
681 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
682 return NULL;
683 tb = &tbs[nb_tbs++];
684 tb->pc = pc;
685 tb->cflags = 0;
686 return tb;
689 void tb_free(TranslationBlock *tb)
691 /* In practice this is mostly used for single use temporary TB
692 Ignore the hard cases and just back up if this TB happens to
693 be the last one generated. */
694 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
695 code_gen_ptr = tb->tc_ptr;
696 nb_tbs--;
700 static inline void invalidate_page_bitmap(PageDesc *p)
702 if (p->code_bitmap) {
703 qemu_free(p->code_bitmap);
704 p->code_bitmap = NULL;
706 p->code_write_count = 0;
709 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
711 static void page_flush_tb_1 (int level, void **lp)
713 int i;
715 if (*lp == NULL) {
716 return;
718 if (level == 0) {
719 PageDesc *pd = *lp;
720 for (i = 0; i < L2_SIZE; ++i) {
721 pd[i].first_tb = NULL;
722 invalidate_page_bitmap(pd + i);
724 } else {
725 void **pp = *lp;
726 for (i = 0; i < L2_SIZE; ++i) {
727 page_flush_tb_1 (level - 1, pp + i);
732 static void page_flush_tb(void)
734 int i;
735 for (i = 0; i < V_L1_SIZE; i++) {
736 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
740 /* flush all the translation blocks */
741 /* XXX: tb_flush is currently not thread safe */
742 void tb_flush(CPUState *env1)
744 CPUState *env;
745 #if defined(DEBUG_FLUSH)
746 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
747 (unsigned long)(code_gen_ptr - code_gen_buffer),
748 nb_tbs, nb_tbs > 0 ?
749 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
750 #endif
751 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
752 cpu_abort(env1, "Internal error: code buffer overflow\n");
754 nb_tbs = 0;
756 for(env = first_cpu; env != NULL; env = env->next_cpu) {
757 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
760 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
761 page_flush_tb();
763 code_gen_ptr = code_gen_buffer;
764 /* XXX: flush processor icache at this point if cache flush is
765 expensive */
766 tb_flush_count++;
769 #ifdef DEBUG_TB_CHECK
771 static void tb_invalidate_check(target_ulong address)
773 TranslationBlock *tb;
774 int i;
775 address &= TARGET_PAGE_MASK;
776 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
777 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
778 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
779 address >= tb->pc + tb->size)) {
780 printf("ERROR invalidate: address=" TARGET_FMT_lx
781 " PC=%08lx size=%04x\n",
782 address, (long)tb->pc, tb->size);
788 /* verify that all the pages have correct rights for code */
789 static void tb_page_check(void)
791 TranslationBlock *tb;
792 int i, flags1, flags2;
794 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
795 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
796 flags1 = page_get_flags(tb->pc);
797 flags2 = page_get_flags(tb->pc + tb->size - 1);
798 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
799 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
800 (long)tb->pc, tb->size, flags1, flags2);
806 #endif
808 /* invalidate one TB */
809 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
810 int next_offset)
812 TranslationBlock *tb1;
813 for(;;) {
814 tb1 = *ptb;
815 if (tb1 == tb) {
816 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
817 break;
819 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
823 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
825 TranslationBlock *tb1;
826 unsigned int n1;
828 for(;;) {
829 tb1 = *ptb;
830 n1 = (long)tb1 & 3;
831 tb1 = (TranslationBlock *)((long)tb1 & ~3);
832 if (tb1 == tb) {
833 *ptb = tb1->page_next[n1];
834 break;
836 ptb = &tb1->page_next[n1];
840 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
842 TranslationBlock *tb1, **ptb;
843 unsigned int n1;
845 ptb = &tb->jmp_next[n];
846 tb1 = *ptb;
847 if (tb1) {
848 /* find tb(n) in circular list */
849 for(;;) {
850 tb1 = *ptb;
851 n1 = (long)tb1 & 3;
852 tb1 = (TranslationBlock *)((long)tb1 & ~3);
853 if (n1 == n && tb1 == tb)
854 break;
855 if (n1 == 2) {
856 ptb = &tb1->jmp_first;
857 } else {
858 ptb = &tb1->jmp_next[n1];
861 /* now we can suppress tb(n) from the list */
862 *ptb = tb->jmp_next[n];
864 tb->jmp_next[n] = NULL;
868 /* reset the jump entry 'n' of a TB so that it is not chained to
869 another TB */
870 static inline void tb_reset_jump(TranslationBlock *tb, int n)
872 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
875 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
877 CPUState *env;
878 PageDesc *p;
879 unsigned int h, n1;
880 tb_page_addr_t phys_pc;
881 TranslationBlock *tb1, *tb2;
883 /* remove the TB from the hash list */
884 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
885 h = tb_phys_hash_func(phys_pc);
886 tb_remove(&tb_phys_hash[h], tb,
887 offsetof(TranslationBlock, phys_hash_next));
889 /* remove the TB from the page list */
890 if (tb->page_addr[0] != page_addr) {
891 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
892 tb_page_remove(&p->first_tb, tb);
893 invalidate_page_bitmap(p);
895 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
896 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
897 tb_page_remove(&p->first_tb, tb);
898 invalidate_page_bitmap(p);
901 tb_invalidated_flag = 1;
903 /* remove the TB from the hash list */
904 h = tb_jmp_cache_hash_func(tb->pc);
905 for(env = first_cpu; env != NULL; env = env->next_cpu) {
906 if (env->tb_jmp_cache[h] == tb)
907 env->tb_jmp_cache[h] = NULL;
910 /* suppress this TB from the two jump lists */
911 tb_jmp_remove(tb, 0);
912 tb_jmp_remove(tb, 1);
914 /* suppress any remaining jumps to this TB */
915 tb1 = tb->jmp_first;
916 for(;;) {
917 n1 = (long)tb1 & 3;
918 if (n1 == 2)
919 break;
920 tb1 = (TranslationBlock *)((long)tb1 & ~3);
921 tb2 = tb1->jmp_next[n1];
922 tb_reset_jump(tb1, n1);
923 tb1->jmp_next[n1] = NULL;
924 tb1 = tb2;
926 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
928 tb_phys_invalidate_count++;
931 static inline void set_bits(uint8_t *tab, int start, int len)
933 int end, mask, end1;
935 end = start + len;
936 tab += start >> 3;
937 mask = 0xff << (start & 7);
938 if ((start & ~7) == (end & ~7)) {
939 if (start < end) {
940 mask &= ~(0xff << (end & 7));
941 *tab |= mask;
943 } else {
944 *tab++ |= mask;
945 start = (start + 8) & ~7;
946 end1 = end & ~7;
947 while (start < end1) {
948 *tab++ = 0xff;
949 start += 8;
951 if (start < end) {
952 mask = ~(0xff << (end & 7));
953 *tab |= mask;
958 static void build_page_bitmap(PageDesc *p)
960 int n, tb_start, tb_end;
961 TranslationBlock *tb;
963 p->code_bitmap = qemu_mallocz(TARGET_PAGE_SIZE / 8);
965 tb = p->first_tb;
966 while (tb != NULL) {
967 n = (long)tb & 3;
968 tb = (TranslationBlock *)((long)tb & ~3);
969 /* NOTE: this is subtle as a TB may span two physical pages */
970 if (n == 0) {
971 /* NOTE: tb_end may be after the end of the page, but
972 it is not a problem */
973 tb_start = tb->pc & ~TARGET_PAGE_MASK;
974 tb_end = tb_start + tb->size;
975 if (tb_end > TARGET_PAGE_SIZE)
976 tb_end = TARGET_PAGE_SIZE;
977 } else {
978 tb_start = 0;
979 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
981 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
982 tb = tb->page_next[n];
986 TranslationBlock *tb_gen_code(CPUState *env,
987 target_ulong pc, target_ulong cs_base,
988 int flags, int cflags)
990 TranslationBlock *tb;
991 uint8_t *tc_ptr;
992 tb_page_addr_t phys_pc, phys_page2;
993 target_ulong virt_page2;
994 int code_gen_size;
996 phys_pc = get_page_addr_code(env, pc);
997 tb = tb_alloc(pc);
998 if (!tb) {
999 /* flush must be done */
1000 tb_flush(env);
1001 /* cannot fail at this point */
1002 tb = tb_alloc(pc);
1003 /* Don't forget to invalidate previous TB info. */
1004 tb_invalidated_flag = 1;
1006 tc_ptr = code_gen_ptr;
1007 tb->tc_ptr = tc_ptr;
1008 tb->cs_base = cs_base;
1009 tb->flags = flags;
1010 tb->cflags = cflags;
1011 cpu_gen_code(env, tb, &code_gen_size);
1012 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1014 /* check next page if needed */
1015 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1016 phys_page2 = -1;
1017 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1018 phys_page2 = get_page_addr_code(env, virt_page2);
1020 tb_link_page(tb, phys_pc, phys_page2);
1021 return tb;
1024 /* invalidate all TBs which intersect with the target physical page
1025 starting in range [start;end[. NOTE: start and end must refer to
1026 the same physical page. 'is_cpu_write_access' should be true if called
1027 from a real cpu write access: the virtual CPU will exit the current
1028 TB if code is modified inside this TB. */
1029 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1030 int is_cpu_write_access)
1032 TranslationBlock *tb, *tb_next, *saved_tb;
1033 CPUState *env = cpu_single_env;
1034 tb_page_addr_t tb_start, tb_end;
1035 PageDesc *p;
1036 int n;
1037 #ifdef TARGET_HAS_PRECISE_SMC
1038 int current_tb_not_found = is_cpu_write_access;
1039 TranslationBlock *current_tb = NULL;
1040 int current_tb_modified = 0;
1041 target_ulong current_pc = 0;
1042 target_ulong current_cs_base = 0;
1043 int current_flags = 0;
1044 #endif /* TARGET_HAS_PRECISE_SMC */
1046 p = page_find(start >> TARGET_PAGE_BITS);
1047 if (!p)
1048 return;
1049 if (!p->code_bitmap &&
1050 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1051 is_cpu_write_access) {
1052 /* build code bitmap */
1053 build_page_bitmap(p);
1056 /* we remove all the TBs in the range [start, end[ */
1057 /* XXX: see if in some cases it could be faster to invalidate all the code */
1058 tb = p->first_tb;
1059 while (tb != NULL) {
1060 n = (long)tb & 3;
1061 tb = (TranslationBlock *)((long)tb & ~3);
1062 tb_next = tb->page_next[n];
1063 /* NOTE: this is subtle as a TB may span two physical pages */
1064 if (n == 0) {
1065 /* NOTE: tb_end may be after the end of the page, but
1066 it is not a problem */
1067 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1068 tb_end = tb_start + tb->size;
1069 } else {
1070 tb_start = tb->page_addr[1];
1071 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1073 if (!(tb_end <= start || tb_start >= end)) {
1074 #ifdef TARGET_HAS_PRECISE_SMC
1075 if (current_tb_not_found) {
1076 current_tb_not_found = 0;
1077 current_tb = NULL;
1078 if (env->mem_io_pc) {
1079 /* now we have a real cpu fault */
1080 current_tb = tb_find_pc(env->mem_io_pc);
1083 if (current_tb == tb &&
1084 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1085 /* If we are modifying the current TB, we must stop
1086 its execution. We could be more precise by checking
1087 that the modification is after the current PC, but it
1088 would require a specialized function to partially
1089 restore the CPU state */
1091 current_tb_modified = 1;
1092 cpu_restore_state(current_tb, env, env->mem_io_pc);
1093 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1094 &current_flags);
1096 #endif /* TARGET_HAS_PRECISE_SMC */
1097 /* we need to do that to handle the case where a signal
1098 occurs while doing tb_phys_invalidate() */
1099 saved_tb = NULL;
1100 if (env) {
1101 saved_tb = env->current_tb;
1102 env->current_tb = NULL;
1104 tb_phys_invalidate(tb, -1);
1105 if (env) {
1106 env->current_tb = saved_tb;
1107 if (env->interrupt_request && env->current_tb)
1108 cpu_interrupt(env, env->interrupt_request);
1111 tb = tb_next;
1113 #if !defined(CONFIG_USER_ONLY)
1114 /* if no code remaining, no need to continue to use slow writes */
1115 if (!p->first_tb) {
1116 invalidate_page_bitmap(p);
1117 if (is_cpu_write_access) {
1118 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1121 #endif
1122 #ifdef TARGET_HAS_PRECISE_SMC
1123 if (current_tb_modified) {
1124 /* we generate a block containing just the instruction
1125 modifying the memory. It will ensure that it cannot modify
1126 itself */
1127 env->current_tb = NULL;
1128 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1129 cpu_resume_from_signal(env, NULL);
1131 #endif
1134 /* len must be <= 8 and start must be a multiple of len */
1135 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1137 PageDesc *p;
1138 int offset, b;
1139 #if 0
1140 if (1) {
1141 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1142 cpu_single_env->mem_io_vaddr, len,
1143 cpu_single_env->eip,
1144 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1146 #endif
1147 p = page_find(start >> TARGET_PAGE_BITS);
1148 if (!p)
1149 return;
1150 if (p->code_bitmap) {
1151 offset = start & ~TARGET_PAGE_MASK;
1152 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1153 if (b & ((1 << len) - 1))
1154 goto do_invalidate;
1155 } else {
1156 do_invalidate:
1157 tb_invalidate_phys_page_range(start, start + len, 1);
1161 #if !defined(CONFIG_SOFTMMU)
1162 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1163 unsigned long pc, void *puc)
1165 TranslationBlock *tb;
1166 PageDesc *p;
1167 int n;
1168 #ifdef TARGET_HAS_PRECISE_SMC
1169 TranslationBlock *current_tb = NULL;
1170 CPUState *env = cpu_single_env;
1171 int current_tb_modified = 0;
1172 target_ulong current_pc = 0;
1173 target_ulong current_cs_base = 0;
1174 int current_flags = 0;
1175 #endif
1177 addr &= TARGET_PAGE_MASK;
1178 p = page_find(addr >> TARGET_PAGE_BITS);
1179 if (!p)
1180 return;
1181 tb = p->first_tb;
1182 #ifdef TARGET_HAS_PRECISE_SMC
1183 if (tb && pc != 0) {
1184 current_tb = tb_find_pc(pc);
1186 #endif
1187 while (tb != NULL) {
1188 n = (long)tb & 3;
1189 tb = (TranslationBlock *)((long)tb & ~3);
1190 #ifdef TARGET_HAS_PRECISE_SMC
1191 if (current_tb == tb &&
1192 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1193 /* If we are modifying the current TB, we must stop
1194 its execution. We could be more precise by checking
1195 that the modification is after the current PC, but it
1196 would require a specialized function to partially
1197 restore the CPU state */
1199 current_tb_modified = 1;
1200 cpu_restore_state(current_tb, env, pc);
1201 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1202 &current_flags);
1204 #endif /* TARGET_HAS_PRECISE_SMC */
1205 tb_phys_invalidate(tb, addr);
1206 tb = tb->page_next[n];
1208 p->first_tb = NULL;
1209 #ifdef TARGET_HAS_PRECISE_SMC
1210 if (current_tb_modified) {
1211 /* we generate a block containing just the instruction
1212 modifying the memory. It will ensure that it cannot modify
1213 itself */
1214 env->current_tb = NULL;
1215 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1216 cpu_resume_from_signal(env, puc);
1218 #endif
1220 #endif
1222 /* add the tb in the target page and protect it if necessary */
1223 static inline void tb_alloc_page(TranslationBlock *tb,
1224 unsigned int n, tb_page_addr_t page_addr)
1226 PageDesc *p;
1227 #ifndef CONFIG_USER_ONLY
1228 bool page_already_protected;
1229 #endif
1231 tb->page_addr[n] = page_addr;
1232 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1233 tb->page_next[n] = p->first_tb;
1234 #ifndef CONFIG_USER_ONLY
1235 page_already_protected = p->first_tb != NULL;
1236 #endif
1237 p->first_tb = (TranslationBlock *)((long)tb | n);
1238 invalidate_page_bitmap(p);
1240 #if defined(TARGET_HAS_SMC) || 1
1242 #if defined(CONFIG_USER_ONLY)
1243 if (p->flags & PAGE_WRITE) {
1244 target_ulong addr;
1245 PageDesc *p2;
1246 int prot;
1248 /* force the host page as non writable (writes will have a
1249 page fault + mprotect overhead) */
1250 page_addr &= qemu_host_page_mask;
1251 prot = 0;
1252 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1253 addr += TARGET_PAGE_SIZE) {
1255 p2 = page_find (addr >> TARGET_PAGE_BITS);
1256 if (!p2)
1257 continue;
1258 prot |= p2->flags;
1259 p2->flags &= ~PAGE_WRITE;
1261 mprotect(g2h(page_addr), qemu_host_page_size,
1262 (prot & PAGE_BITS) & ~PAGE_WRITE);
1263 #ifdef DEBUG_TB_INVALIDATE
1264 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1265 page_addr);
1266 #endif
1268 #else
1269 /* if some code is already present, then the pages are already
1270 protected. So we handle the case where only the first TB is
1271 allocated in a physical page */
1272 if (!page_already_protected) {
1273 tlb_protect_code(page_addr);
1275 #endif
1277 #endif /* TARGET_HAS_SMC */
1280 /* add a new TB and link it to the physical page tables. phys_page2 is
1281 (-1) to indicate that only one page contains the TB. */
1282 void tb_link_page(TranslationBlock *tb,
1283 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1285 unsigned int h;
1286 TranslationBlock **ptb;
1288 /* Grab the mmap lock to stop another thread invalidating this TB
1289 before we are done. */
1290 mmap_lock();
1291 /* add in the physical hash table */
1292 h = tb_phys_hash_func(phys_pc);
1293 ptb = &tb_phys_hash[h];
1294 tb->phys_hash_next = *ptb;
1295 *ptb = tb;
1297 /* add in the page list */
1298 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1299 if (phys_page2 != -1)
1300 tb_alloc_page(tb, 1, phys_page2);
1301 else
1302 tb->page_addr[1] = -1;
1304 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1305 tb->jmp_next[0] = NULL;
1306 tb->jmp_next[1] = NULL;
1308 /* init original jump addresses */
1309 if (tb->tb_next_offset[0] != 0xffff)
1310 tb_reset_jump(tb, 0);
1311 if (tb->tb_next_offset[1] != 0xffff)
1312 tb_reset_jump(tb, 1);
1314 #ifdef DEBUG_TB_CHECK
1315 tb_page_check();
1316 #endif
1317 mmap_unlock();
1320 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1321 tb[1].tc_ptr. Return NULL if not found */
1322 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1324 int m_min, m_max, m;
1325 unsigned long v;
1326 TranslationBlock *tb;
1328 if (nb_tbs <= 0)
1329 return NULL;
1330 if (tc_ptr < (unsigned long)code_gen_buffer ||
1331 tc_ptr >= (unsigned long)code_gen_ptr)
1332 return NULL;
1333 /* binary search (cf Knuth) */
1334 m_min = 0;
1335 m_max = nb_tbs - 1;
1336 while (m_min <= m_max) {
1337 m = (m_min + m_max) >> 1;
1338 tb = &tbs[m];
1339 v = (unsigned long)tb->tc_ptr;
1340 if (v == tc_ptr)
1341 return tb;
1342 else if (tc_ptr < v) {
1343 m_max = m - 1;
1344 } else {
1345 m_min = m + 1;
1348 return &tbs[m_max];
1351 static void tb_reset_jump_recursive(TranslationBlock *tb);
1353 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1355 TranslationBlock *tb1, *tb_next, **ptb;
1356 unsigned int n1;
1358 tb1 = tb->jmp_next[n];
1359 if (tb1 != NULL) {
1360 /* find head of list */
1361 for(;;) {
1362 n1 = (long)tb1 & 3;
1363 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1364 if (n1 == 2)
1365 break;
1366 tb1 = tb1->jmp_next[n1];
1368 /* we are now sure now that tb jumps to tb1 */
1369 tb_next = tb1;
1371 /* remove tb from the jmp_first list */
1372 ptb = &tb_next->jmp_first;
1373 for(;;) {
1374 tb1 = *ptb;
1375 n1 = (long)tb1 & 3;
1376 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1377 if (n1 == n && tb1 == tb)
1378 break;
1379 ptb = &tb1->jmp_next[n1];
1381 *ptb = tb->jmp_next[n];
1382 tb->jmp_next[n] = NULL;
1384 /* suppress the jump to next tb in generated code */
1385 tb_reset_jump(tb, n);
1387 /* suppress jumps in the tb on which we could have jumped */
1388 tb_reset_jump_recursive(tb_next);
1392 static void tb_reset_jump_recursive(TranslationBlock *tb)
1394 tb_reset_jump_recursive2(tb, 0);
1395 tb_reset_jump_recursive2(tb, 1);
1398 #if defined(TARGET_HAS_ICE)
1399 #if defined(CONFIG_USER_ONLY)
1400 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1402 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1404 #else
1405 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1407 target_phys_addr_t addr;
1408 target_ulong pd;
1409 ram_addr_t ram_addr;
1410 PhysPageDesc *p;
1412 addr = cpu_get_phys_page_debug(env, pc);
1413 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1414 if (!p) {
1415 pd = IO_MEM_UNASSIGNED;
1416 } else {
1417 pd = p->phys_offset;
1419 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1420 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1422 #endif
1423 #endif /* TARGET_HAS_ICE */
1425 #if defined(CONFIG_USER_ONLY)
1426 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1431 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1432 int flags, CPUWatchpoint **watchpoint)
1434 return -ENOSYS;
1436 #else
1437 /* Add a watchpoint. */
1438 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1439 int flags, CPUWatchpoint **watchpoint)
1441 target_ulong len_mask = ~(len - 1);
1442 CPUWatchpoint *wp;
1444 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1445 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1446 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1447 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1448 return -EINVAL;
1450 wp = qemu_malloc(sizeof(*wp));
1452 wp->vaddr = addr;
1453 wp->len_mask = len_mask;
1454 wp->flags = flags;
1456 /* keep all GDB-injected watchpoints in front */
1457 if (flags & BP_GDB)
1458 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1459 else
1460 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1462 tlb_flush_page(env, addr);
1464 if (watchpoint)
1465 *watchpoint = wp;
1466 return 0;
1469 /* Remove a specific watchpoint. */
1470 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1471 int flags)
1473 target_ulong len_mask = ~(len - 1);
1474 CPUWatchpoint *wp;
1476 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1477 if (addr == wp->vaddr && len_mask == wp->len_mask
1478 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1479 cpu_watchpoint_remove_by_ref(env, wp);
1480 return 0;
1483 return -ENOENT;
1486 /* Remove a specific watchpoint by reference. */
1487 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1489 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1491 tlb_flush_page(env, watchpoint->vaddr);
1493 qemu_free(watchpoint);
1496 /* Remove all matching watchpoints. */
1497 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1499 CPUWatchpoint *wp, *next;
1501 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1502 if (wp->flags & mask)
1503 cpu_watchpoint_remove_by_ref(env, wp);
1506 #endif
1508 /* Add a breakpoint. */
1509 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1510 CPUBreakpoint **breakpoint)
1512 #if defined(TARGET_HAS_ICE)
1513 CPUBreakpoint *bp;
1515 bp = qemu_malloc(sizeof(*bp));
1517 bp->pc = pc;
1518 bp->flags = flags;
1520 /* keep all GDB-injected breakpoints in front */
1521 if (flags & BP_GDB)
1522 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1523 else
1524 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1526 breakpoint_invalidate(env, pc);
1528 if (breakpoint)
1529 *breakpoint = bp;
1530 return 0;
1531 #else
1532 return -ENOSYS;
1533 #endif
1536 /* Remove a specific breakpoint. */
1537 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1539 #if defined(TARGET_HAS_ICE)
1540 CPUBreakpoint *bp;
1542 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1543 if (bp->pc == pc && bp->flags == flags) {
1544 cpu_breakpoint_remove_by_ref(env, bp);
1545 return 0;
1548 return -ENOENT;
1549 #else
1550 return -ENOSYS;
1551 #endif
1554 /* Remove a specific breakpoint by reference. */
1555 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1557 #if defined(TARGET_HAS_ICE)
1558 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1560 breakpoint_invalidate(env, breakpoint->pc);
1562 qemu_free(breakpoint);
1563 #endif
1566 /* Remove all matching breakpoints. */
1567 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1569 #if defined(TARGET_HAS_ICE)
1570 CPUBreakpoint *bp, *next;
1572 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1573 if (bp->flags & mask)
1574 cpu_breakpoint_remove_by_ref(env, bp);
1576 #endif
1579 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1580 CPU loop after each instruction */
1581 void cpu_single_step(CPUState *env, int enabled)
1583 #if defined(TARGET_HAS_ICE)
1584 if (env->singlestep_enabled != enabled) {
1585 env->singlestep_enabled = enabled;
1586 if (kvm_enabled())
1587 kvm_update_guest_debug(env, 0);
1588 else {
1589 /* must flush all the translated code to avoid inconsistencies */
1590 /* XXX: only flush what is necessary */
1591 tb_flush(env);
1594 #endif
1597 /* enable or disable low levels log */
1598 void cpu_set_log(int log_flags)
1600 loglevel = log_flags;
1601 if (loglevel && !logfile) {
1602 logfile = fopen(logfilename, log_append ? "a" : "w");
1603 if (!logfile) {
1604 perror(logfilename);
1605 _exit(1);
1607 #if !defined(CONFIG_SOFTMMU)
1608 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1610 static char logfile_buf[4096];
1611 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1613 #elif !defined(_WIN32)
1614 /* Win32 doesn't support line-buffering and requires size >= 2 */
1615 setvbuf(logfile, NULL, _IOLBF, 0);
1616 #endif
1617 log_append = 1;
1619 if (!loglevel && logfile) {
1620 fclose(logfile);
1621 logfile = NULL;
1625 void cpu_set_log_filename(const char *filename)
1627 logfilename = strdup(filename);
1628 if (logfile) {
1629 fclose(logfile);
1630 logfile = NULL;
1632 cpu_set_log(loglevel);
1635 static void cpu_unlink_tb(CPUState *env)
1637 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1638 problem and hope the cpu will stop of its own accord. For userspace
1639 emulation this often isn't actually as bad as it sounds. Often
1640 signals are used primarily to interrupt blocking syscalls. */
1641 TranslationBlock *tb;
1642 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1644 spin_lock(&interrupt_lock);
1645 tb = env->current_tb;
1646 /* if the cpu is currently executing code, we must unlink it and
1647 all the potentially executing TB */
1648 if (tb) {
1649 env->current_tb = NULL;
1650 tb_reset_jump_recursive(tb);
1652 spin_unlock(&interrupt_lock);
1655 #ifndef CONFIG_USER_ONLY
1656 /* mask must never be zero, except for A20 change call */
1657 static void tcg_handle_interrupt(CPUState *env, int mask)
1659 int old_mask;
1661 old_mask = env->interrupt_request;
1662 env->interrupt_request |= mask;
1665 * If called from iothread context, wake the target cpu in
1666 * case its halted.
1668 if (!qemu_cpu_is_self(env)) {
1669 qemu_cpu_kick(env);
1670 return;
1673 if (use_icount) {
1674 env->icount_decr.u16.high = 0xffff;
1675 if (!can_do_io(env)
1676 && (mask & ~old_mask) != 0) {
1677 cpu_abort(env, "Raised interrupt while not in I/O function");
1679 } else {
1680 cpu_unlink_tb(env);
1684 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1686 #else /* CONFIG_USER_ONLY */
1688 void cpu_interrupt(CPUState *env, int mask)
1690 env->interrupt_request |= mask;
1691 cpu_unlink_tb(env);
1693 #endif /* CONFIG_USER_ONLY */
1695 void cpu_reset_interrupt(CPUState *env, int mask)
1697 env->interrupt_request &= ~mask;
1700 void cpu_exit(CPUState *env)
1702 env->exit_request = 1;
1703 cpu_unlink_tb(env);
1706 const CPULogItem cpu_log_items[] = {
1707 { CPU_LOG_TB_OUT_ASM, "out_asm",
1708 "show generated host assembly code for each compiled TB" },
1709 { CPU_LOG_TB_IN_ASM, "in_asm",
1710 "show target assembly code for each compiled TB" },
1711 { CPU_LOG_TB_OP, "op",
1712 "show micro ops for each compiled TB" },
1713 { CPU_LOG_TB_OP_OPT, "op_opt",
1714 "show micro ops "
1715 #ifdef TARGET_I386
1716 "before eflags optimization and "
1717 #endif
1718 "after liveness analysis" },
1719 { CPU_LOG_INT, "int",
1720 "show interrupts/exceptions in short format" },
1721 { CPU_LOG_EXEC, "exec",
1722 "show trace before each executed TB (lots of logs)" },
1723 { CPU_LOG_TB_CPU, "cpu",
1724 "show CPU state before block translation" },
1725 #ifdef TARGET_I386
1726 { CPU_LOG_PCALL, "pcall",
1727 "show protected mode far calls/returns/exceptions" },
1728 { CPU_LOG_RESET, "cpu_reset",
1729 "show CPU state before CPU resets" },
1730 #endif
1731 #ifdef DEBUG_IOPORT
1732 { CPU_LOG_IOPORT, "ioport",
1733 "show all i/o ports accesses" },
1734 #endif
1735 { 0, NULL, NULL },
1738 #ifndef CONFIG_USER_ONLY
1739 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1740 = QLIST_HEAD_INITIALIZER(memory_client_list);
1742 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1743 ram_addr_t size,
1744 ram_addr_t phys_offset,
1745 bool log_dirty)
1747 CPUPhysMemoryClient *client;
1748 QLIST_FOREACH(client, &memory_client_list, list) {
1749 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1753 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1754 target_phys_addr_t end)
1756 CPUPhysMemoryClient *client;
1757 QLIST_FOREACH(client, &memory_client_list, list) {
1758 int r = client->sync_dirty_bitmap(client, start, end);
1759 if (r < 0)
1760 return r;
1762 return 0;
1765 static int cpu_notify_migration_log(int enable)
1767 CPUPhysMemoryClient *client;
1768 QLIST_FOREACH(client, &memory_client_list, list) {
1769 int r = client->migration_log(client, enable);
1770 if (r < 0)
1771 return r;
1773 return 0;
1776 struct last_map {
1777 target_phys_addr_t start_addr;
1778 ram_addr_t size;
1779 ram_addr_t phys_offset;
1782 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1783 * address. Each intermediate table provides the next L2_BITs of guest
1784 * physical address space. The number of levels vary based on host and
1785 * guest configuration, making it efficient to build the final guest
1786 * physical address by seeding the L1 offset and shifting and adding in
1787 * each L2 offset as we recurse through them. */
1788 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1789 void **lp, target_phys_addr_t addr,
1790 struct last_map *map)
1792 int i;
1794 if (*lp == NULL) {
1795 return;
1797 if (level == 0) {
1798 PhysPageDesc *pd = *lp;
1799 addr <<= L2_BITS + TARGET_PAGE_BITS;
1800 for (i = 0; i < L2_SIZE; ++i) {
1801 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1802 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1804 if (map->size &&
1805 start_addr == map->start_addr + map->size &&
1806 pd[i].phys_offset == map->phys_offset + map->size) {
1808 map->size += TARGET_PAGE_SIZE;
1809 continue;
1810 } else if (map->size) {
1811 client->set_memory(client, map->start_addr,
1812 map->size, map->phys_offset, false);
1815 map->start_addr = start_addr;
1816 map->size = TARGET_PAGE_SIZE;
1817 map->phys_offset = pd[i].phys_offset;
1820 } else {
1821 void **pp = *lp;
1822 for (i = 0; i < L2_SIZE; ++i) {
1823 phys_page_for_each_1(client, level - 1, pp + i,
1824 (addr << L2_BITS) | i, map);
1829 static void phys_page_for_each(CPUPhysMemoryClient *client)
1831 int i;
1832 struct last_map map = { };
1834 for (i = 0; i < P_L1_SIZE; ++i) {
1835 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1836 l1_phys_map + i, i, &map);
1838 if (map.size) {
1839 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1840 false);
1844 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1846 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1847 phys_page_for_each(client);
1850 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1852 QLIST_REMOVE(client, list);
1854 #endif
1856 static int cmp1(const char *s1, int n, const char *s2)
1858 if (strlen(s2) != n)
1859 return 0;
1860 return memcmp(s1, s2, n) == 0;
1863 /* takes a comma separated list of log masks. Return 0 if error. */
1864 int cpu_str_to_log_mask(const char *str)
1866 const CPULogItem *item;
1867 int mask;
1868 const char *p, *p1;
1870 p = str;
1871 mask = 0;
1872 for(;;) {
1873 p1 = strchr(p, ',');
1874 if (!p1)
1875 p1 = p + strlen(p);
1876 if(cmp1(p,p1-p,"all")) {
1877 for(item = cpu_log_items; item->mask != 0; item++) {
1878 mask |= item->mask;
1880 } else {
1881 for(item = cpu_log_items; item->mask != 0; item++) {
1882 if (cmp1(p, p1 - p, item->name))
1883 goto found;
1885 return 0;
1887 found:
1888 mask |= item->mask;
1889 if (*p1 != ',')
1890 break;
1891 p = p1 + 1;
1893 return mask;
1896 void cpu_abort(CPUState *env, const char *fmt, ...)
1898 va_list ap;
1899 va_list ap2;
1901 va_start(ap, fmt);
1902 va_copy(ap2, ap);
1903 fprintf(stderr, "qemu: fatal: ");
1904 vfprintf(stderr, fmt, ap);
1905 fprintf(stderr, "\n");
1906 #ifdef TARGET_I386
1907 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1908 #else
1909 cpu_dump_state(env, stderr, fprintf, 0);
1910 #endif
1911 if (qemu_log_enabled()) {
1912 qemu_log("qemu: fatal: ");
1913 qemu_log_vprintf(fmt, ap2);
1914 qemu_log("\n");
1915 #ifdef TARGET_I386
1916 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1917 #else
1918 log_cpu_state(env, 0);
1919 #endif
1920 qemu_log_flush();
1921 qemu_log_close();
1923 va_end(ap2);
1924 va_end(ap);
1925 #if defined(CONFIG_USER_ONLY)
1927 struct sigaction act;
1928 sigfillset(&act.sa_mask);
1929 act.sa_handler = SIG_DFL;
1930 sigaction(SIGABRT, &act, NULL);
1932 #endif
1933 abort();
1936 CPUState *cpu_copy(CPUState *env)
1938 CPUState *new_env = cpu_init(env->cpu_model_str);
1939 CPUState *next_cpu = new_env->next_cpu;
1940 int cpu_index = new_env->cpu_index;
1941 #if defined(TARGET_HAS_ICE)
1942 CPUBreakpoint *bp;
1943 CPUWatchpoint *wp;
1944 #endif
1946 memcpy(new_env, env, sizeof(CPUState));
1948 /* Preserve chaining and index. */
1949 new_env->next_cpu = next_cpu;
1950 new_env->cpu_index = cpu_index;
1952 /* Clone all break/watchpoints.
1953 Note: Once we support ptrace with hw-debug register access, make sure
1954 BP_CPU break/watchpoints are handled correctly on clone. */
1955 QTAILQ_INIT(&env->breakpoints);
1956 QTAILQ_INIT(&env->watchpoints);
1957 #if defined(TARGET_HAS_ICE)
1958 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1959 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1961 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1962 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1963 wp->flags, NULL);
1965 #endif
1967 return new_env;
1970 #if !defined(CONFIG_USER_ONLY)
1972 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1974 unsigned int i;
1976 /* Discard jump cache entries for any tb which might potentially
1977 overlap the flushed page. */
1978 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1979 memset (&env->tb_jmp_cache[i], 0,
1980 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1982 i = tb_jmp_cache_hash_page(addr);
1983 memset (&env->tb_jmp_cache[i], 0,
1984 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1987 static CPUTLBEntry s_cputlb_empty_entry = {
1988 .addr_read = -1,
1989 .addr_write = -1,
1990 .addr_code = -1,
1991 .addend = -1,
1994 /* NOTE: if flush_global is true, also flush global entries (not
1995 implemented yet) */
1996 void tlb_flush(CPUState *env, int flush_global)
1998 int i;
2000 #if defined(DEBUG_TLB)
2001 printf("tlb_flush:\n");
2002 #endif
2003 /* must reset current TB so that interrupts cannot modify the
2004 links while we are modifying them */
2005 env->current_tb = NULL;
2007 for(i = 0; i < CPU_TLB_SIZE; i++) {
2008 int mmu_idx;
2009 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2010 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2014 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2016 env->tlb_flush_addr = -1;
2017 env->tlb_flush_mask = 0;
2018 tlb_flush_count++;
2021 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2023 if (addr == (tlb_entry->addr_read &
2024 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2025 addr == (tlb_entry->addr_write &
2026 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2027 addr == (tlb_entry->addr_code &
2028 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2029 *tlb_entry = s_cputlb_empty_entry;
2033 void tlb_flush_page(CPUState *env, target_ulong addr)
2035 int i;
2036 int mmu_idx;
2038 #if defined(DEBUG_TLB)
2039 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2040 #endif
2041 /* Check if we need to flush due to large pages. */
2042 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2043 #if defined(DEBUG_TLB)
2044 printf("tlb_flush_page: forced full flush ("
2045 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2046 env->tlb_flush_addr, env->tlb_flush_mask);
2047 #endif
2048 tlb_flush(env, 1);
2049 return;
2051 /* must reset current TB so that interrupts cannot modify the
2052 links while we are modifying them */
2053 env->current_tb = NULL;
2055 addr &= TARGET_PAGE_MASK;
2056 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2057 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2058 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2060 tlb_flush_jmp_cache(env, addr);
2063 /* update the TLBs so that writes to code in the virtual page 'addr'
2064 can be detected */
2065 static void tlb_protect_code(ram_addr_t ram_addr)
2067 cpu_physical_memory_reset_dirty(ram_addr,
2068 ram_addr + TARGET_PAGE_SIZE,
2069 CODE_DIRTY_FLAG);
2072 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2073 tested for self modifying code */
2074 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2075 target_ulong vaddr)
2077 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2080 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2081 unsigned long start, unsigned long length)
2083 unsigned long addr;
2084 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2085 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2086 if ((addr - start) < length) {
2087 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2092 /* Note: start and end must be within the same ram block. */
2093 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2094 int dirty_flags)
2096 CPUState *env;
2097 unsigned long length, start1;
2098 int i;
2100 start &= TARGET_PAGE_MASK;
2101 end = TARGET_PAGE_ALIGN(end);
2103 length = end - start;
2104 if (length == 0)
2105 return;
2106 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2108 /* we modify the TLB cache so that the dirty bit will be set again
2109 when accessing the range */
2110 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2111 /* Check that we don't span multiple blocks - this breaks the
2112 address comparisons below. */
2113 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2114 != (end - 1) - start) {
2115 abort();
2118 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2119 int mmu_idx;
2120 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2121 for(i = 0; i < CPU_TLB_SIZE; i++)
2122 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2123 start1, length);
2128 int cpu_physical_memory_set_dirty_tracking(int enable)
2130 int ret = 0;
2131 in_migration = enable;
2132 ret = cpu_notify_migration_log(!!enable);
2133 return ret;
2136 int cpu_physical_memory_get_dirty_tracking(void)
2138 return in_migration;
2141 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2142 target_phys_addr_t end_addr)
2144 int ret;
2146 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2147 return ret;
2150 int cpu_physical_log_start(target_phys_addr_t start_addr,
2151 ram_addr_t size)
2153 CPUPhysMemoryClient *client;
2154 QLIST_FOREACH(client, &memory_client_list, list) {
2155 if (client->log_start) {
2156 int r = client->log_start(client, start_addr, size);
2157 if (r < 0) {
2158 return r;
2162 return 0;
2165 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2166 ram_addr_t size)
2168 CPUPhysMemoryClient *client;
2169 QLIST_FOREACH(client, &memory_client_list, list) {
2170 if (client->log_stop) {
2171 int r = client->log_stop(client, start_addr, size);
2172 if (r < 0) {
2173 return r;
2177 return 0;
2180 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2182 ram_addr_t ram_addr;
2183 void *p;
2185 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2186 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2187 + tlb_entry->addend);
2188 ram_addr = qemu_ram_addr_from_host_nofail(p);
2189 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2190 tlb_entry->addr_write |= TLB_NOTDIRTY;
2195 /* update the TLB according to the current state of the dirty bits */
2196 void cpu_tlb_update_dirty(CPUState *env)
2198 int i;
2199 int mmu_idx;
2200 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2201 for(i = 0; i < CPU_TLB_SIZE; i++)
2202 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2206 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2208 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2209 tlb_entry->addr_write = vaddr;
2212 /* update the TLB corresponding to virtual page vaddr
2213 so that it is no longer dirty */
2214 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2216 int i;
2217 int mmu_idx;
2219 vaddr &= TARGET_PAGE_MASK;
2220 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2221 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2222 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2225 /* Our TLB does not support large pages, so remember the area covered by
2226 large pages and trigger a full TLB flush if these are invalidated. */
2227 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2228 target_ulong size)
2230 target_ulong mask = ~(size - 1);
2232 if (env->tlb_flush_addr == (target_ulong)-1) {
2233 env->tlb_flush_addr = vaddr & mask;
2234 env->tlb_flush_mask = mask;
2235 return;
2237 /* Extend the existing region to include the new page.
2238 This is a compromise between unnecessary flushes and the cost
2239 of maintaining a full variable size TLB. */
2240 mask &= env->tlb_flush_mask;
2241 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2242 mask <<= 1;
2244 env->tlb_flush_addr &= mask;
2245 env->tlb_flush_mask = mask;
2248 /* Add a new TLB entry. At most one entry for a given virtual address
2249 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2250 supplied size is only used by tlb_flush_page. */
2251 void tlb_set_page(CPUState *env, target_ulong vaddr,
2252 target_phys_addr_t paddr, int prot,
2253 int mmu_idx, target_ulong size)
2255 PhysPageDesc *p;
2256 unsigned long pd;
2257 unsigned int index;
2258 target_ulong address;
2259 target_ulong code_address;
2260 unsigned long addend;
2261 CPUTLBEntry *te;
2262 CPUWatchpoint *wp;
2263 target_phys_addr_t iotlb;
2265 assert(size >= TARGET_PAGE_SIZE);
2266 if (size != TARGET_PAGE_SIZE) {
2267 tlb_add_large_page(env, vaddr, size);
2269 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2270 if (!p) {
2271 pd = IO_MEM_UNASSIGNED;
2272 } else {
2273 pd = p->phys_offset;
2275 #if defined(DEBUG_TLB)
2276 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2277 " prot=%x idx=%d pd=0x%08lx\n",
2278 vaddr, paddr, prot, mmu_idx, pd);
2279 #endif
2281 address = vaddr;
2282 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2283 /* IO memory case (romd handled later) */
2284 address |= TLB_MMIO;
2286 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2287 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2288 /* Normal RAM. */
2289 iotlb = pd & TARGET_PAGE_MASK;
2290 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2291 iotlb |= IO_MEM_NOTDIRTY;
2292 else
2293 iotlb |= IO_MEM_ROM;
2294 } else {
2295 /* IO handlers are currently passed a physical address.
2296 It would be nice to pass an offset from the base address
2297 of that region. This would avoid having to special case RAM,
2298 and avoid full address decoding in every device.
2299 We can't use the high bits of pd for this because
2300 IO_MEM_ROMD uses these as a ram address. */
2301 iotlb = (pd & ~TARGET_PAGE_MASK);
2302 if (p) {
2303 iotlb += p->region_offset;
2304 } else {
2305 iotlb += paddr;
2309 code_address = address;
2310 /* Make accesses to pages with watchpoints go via the
2311 watchpoint trap routines. */
2312 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2313 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2314 /* Avoid trapping reads of pages with a write breakpoint. */
2315 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2316 iotlb = io_mem_watch + paddr;
2317 address |= TLB_MMIO;
2318 break;
2323 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2324 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2325 te = &env->tlb_table[mmu_idx][index];
2326 te->addend = addend - vaddr;
2327 if (prot & PAGE_READ) {
2328 te->addr_read = address;
2329 } else {
2330 te->addr_read = -1;
2333 if (prot & PAGE_EXEC) {
2334 te->addr_code = code_address;
2335 } else {
2336 te->addr_code = -1;
2338 if (prot & PAGE_WRITE) {
2339 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2340 (pd & IO_MEM_ROMD)) {
2341 /* Write access calls the I/O callback. */
2342 te->addr_write = address | TLB_MMIO;
2343 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2344 !cpu_physical_memory_is_dirty(pd)) {
2345 te->addr_write = address | TLB_NOTDIRTY;
2346 } else {
2347 te->addr_write = address;
2349 } else {
2350 te->addr_write = -1;
2354 #else
2356 void tlb_flush(CPUState *env, int flush_global)
2360 void tlb_flush_page(CPUState *env, target_ulong addr)
2365 * Walks guest process memory "regions" one by one
2366 * and calls callback function 'fn' for each region.
2369 struct walk_memory_regions_data
2371 walk_memory_regions_fn fn;
2372 void *priv;
2373 unsigned long start;
2374 int prot;
2377 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2378 abi_ulong end, int new_prot)
2380 if (data->start != -1ul) {
2381 int rc = data->fn(data->priv, data->start, end, data->prot);
2382 if (rc != 0) {
2383 return rc;
2387 data->start = (new_prot ? end : -1ul);
2388 data->prot = new_prot;
2390 return 0;
2393 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2394 abi_ulong base, int level, void **lp)
2396 abi_ulong pa;
2397 int i, rc;
2399 if (*lp == NULL) {
2400 return walk_memory_regions_end(data, base, 0);
2403 if (level == 0) {
2404 PageDesc *pd = *lp;
2405 for (i = 0; i < L2_SIZE; ++i) {
2406 int prot = pd[i].flags;
2408 pa = base | (i << TARGET_PAGE_BITS);
2409 if (prot != data->prot) {
2410 rc = walk_memory_regions_end(data, pa, prot);
2411 if (rc != 0) {
2412 return rc;
2416 } else {
2417 void **pp = *lp;
2418 for (i = 0; i < L2_SIZE; ++i) {
2419 pa = base | ((abi_ulong)i <<
2420 (TARGET_PAGE_BITS + L2_BITS * level));
2421 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2422 if (rc != 0) {
2423 return rc;
2428 return 0;
2431 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2433 struct walk_memory_regions_data data;
2434 unsigned long i;
2436 data.fn = fn;
2437 data.priv = priv;
2438 data.start = -1ul;
2439 data.prot = 0;
2441 for (i = 0; i < V_L1_SIZE; i++) {
2442 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2443 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2444 if (rc != 0) {
2445 return rc;
2449 return walk_memory_regions_end(&data, 0, 0);
2452 static int dump_region(void *priv, abi_ulong start,
2453 abi_ulong end, unsigned long prot)
2455 FILE *f = (FILE *)priv;
2457 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2458 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2459 start, end, end - start,
2460 ((prot & PAGE_READ) ? 'r' : '-'),
2461 ((prot & PAGE_WRITE) ? 'w' : '-'),
2462 ((prot & PAGE_EXEC) ? 'x' : '-'));
2464 return (0);
2467 /* dump memory mappings */
2468 void page_dump(FILE *f)
2470 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2471 "start", "end", "size", "prot");
2472 walk_memory_regions(f, dump_region);
2475 int page_get_flags(target_ulong address)
2477 PageDesc *p;
2479 p = page_find(address >> TARGET_PAGE_BITS);
2480 if (!p)
2481 return 0;
2482 return p->flags;
2485 /* Modify the flags of a page and invalidate the code if necessary.
2486 The flag PAGE_WRITE_ORG is positioned automatically depending
2487 on PAGE_WRITE. The mmap_lock should already be held. */
2488 void page_set_flags(target_ulong start, target_ulong end, int flags)
2490 target_ulong addr, len;
2492 /* This function should never be called with addresses outside the
2493 guest address space. If this assert fires, it probably indicates
2494 a missing call to h2g_valid. */
2495 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2496 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2497 #endif
2498 assert(start < end);
2500 start = start & TARGET_PAGE_MASK;
2501 end = TARGET_PAGE_ALIGN(end);
2503 if (flags & PAGE_WRITE) {
2504 flags |= PAGE_WRITE_ORG;
2507 for (addr = start, len = end - start;
2508 len != 0;
2509 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2510 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2512 /* If the write protection bit is set, then we invalidate
2513 the code inside. */
2514 if (!(p->flags & PAGE_WRITE) &&
2515 (flags & PAGE_WRITE) &&
2516 p->first_tb) {
2517 tb_invalidate_phys_page(addr, 0, NULL);
2519 p->flags = flags;
2523 int page_check_range(target_ulong start, target_ulong len, int flags)
2525 PageDesc *p;
2526 target_ulong end;
2527 target_ulong addr;
2529 /* This function should never be called with addresses outside the
2530 guest address space. If this assert fires, it probably indicates
2531 a missing call to h2g_valid. */
2532 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2533 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2534 #endif
2536 if (len == 0) {
2537 return 0;
2539 if (start + len - 1 < start) {
2540 /* We've wrapped around. */
2541 return -1;
2544 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2545 start = start & TARGET_PAGE_MASK;
2547 for (addr = start, len = end - start;
2548 len != 0;
2549 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2550 p = page_find(addr >> TARGET_PAGE_BITS);
2551 if( !p )
2552 return -1;
2553 if( !(p->flags & PAGE_VALID) )
2554 return -1;
2556 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2557 return -1;
2558 if (flags & PAGE_WRITE) {
2559 if (!(p->flags & PAGE_WRITE_ORG))
2560 return -1;
2561 /* unprotect the page if it was put read-only because it
2562 contains translated code */
2563 if (!(p->flags & PAGE_WRITE)) {
2564 if (!page_unprotect(addr, 0, NULL))
2565 return -1;
2567 return 0;
2570 return 0;
2573 /* called from signal handler: invalidate the code and unprotect the
2574 page. Return TRUE if the fault was successfully handled. */
2575 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2577 unsigned int prot;
2578 PageDesc *p;
2579 target_ulong host_start, host_end, addr;
2581 /* Technically this isn't safe inside a signal handler. However we
2582 know this only ever happens in a synchronous SEGV handler, so in
2583 practice it seems to be ok. */
2584 mmap_lock();
2586 p = page_find(address >> TARGET_PAGE_BITS);
2587 if (!p) {
2588 mmap_unlock();
2589 return 0;
2592 /* if the page was really writable, then we change its
2593 protection back to writable */
2594 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2595 host_start = address & qemu_host_page_mask;
2596 host_end = host_start + qemu_host_page_size;
2598 prot = 0;
2599 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2600 p = page_find(addr >> TARGET_PAGE_BITS);
2601 p->flags |= PAGE_WRITE;
2602 prot |= p->flags;
2604 /* and since the content will be modified, we must invalidate
2605 the corresponding translated code. */
2606 tb_invalidate_phys_page(addr, pc, puc);
2607 #ifdef DEBUG_TB_CHECK
2608 tb_invalidate_check(addr);
2609 #endif
2611 mprotect((void *)g2h(host_start), qemu_host_page_size,
2612 prot & PAGE_BITS);
2614 mmap_unlock();
2615 return 1;
2617 mmap_unlock();
2618 return 0;
2621 static inline void tlb_set_dirty(CPUState *env,
2622 unsigned long addr, target_ulong vaddr)
2625 #endif /* defined(CONFIG_USER_ONLY) */
2627 #if !defined(CONFIG_USER_ONLY)
2629 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2630 typedef struct subpage_t {
2631 target_phys_addr_t base;
2632 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2633 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2634 } subpage_t;
2636 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2637 ram_addr_t memory, ram_addr_t region_offset);
2638 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2639 ram_addr_t orig_memory,
2640 ram_addr_t region_offset);
2641 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2642 need_subpage) \
2643 do { \
2644 if (addr > start_addr) \
2645 start_addr2 = 0; \
2646 else { \
2647 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2648 if (start_addr2 > 0) \
2649 need_subpage = 1; \
2652 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2653 end_addr2 = TARGET_PAGE_SIZE - 1; \
2654 else { \
2655 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2656 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2657 need_subpage = 1; \
2659 } while (0)
2661 /* register physical memory.
2662 For RAM, 'size' must be a multiple of the target page size.
2663 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2664 io memory page. The address used when calling the IO function is
2665 the offset from the start of the region, plus region_offset. Both
2666 start_addr and region_offset are rounded down to a page boundary
2667 before calculating this offset. This should not be a problem unless
2668 the low bits of start_addr and region_offset differ. */
2669 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2670 ram_addr_t size,
2671 ram_addr_t phys_offset,
2672 ram_addr_t region_offset,
2673 bool log_dirty)
2675 target_phys_addr_t addr, end_addr;
2676 PhysPageDesc *p;
2677 CPUState *env;
2678 ram_addr_t orig_size = size;
2679 subpage_t *subpage;
2681 assert(size);
2682 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2684 if (phys_offset == IO_MEM_UNASSIGNED) {
2685 region_offset = start_addr;
2687 region_offset &= TARGET_PAGE_MASK;
2688 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2689 end_addr = start_addr + (target_phys_addr_t)size;
2691 addr = start_addr;
2692 do {
2693 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2694 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2695 ram_addr_t orig_memory = p->phys_offset;
2696 target_phys_addr_t start_addr2, end_addr2;
2697 int need_subpage = 0;
2699 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2700 need_subpage);
2701 if (need_subpage) {
2702 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2703 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2704 &p->phys_offset, orig_memory,
2705 p->region_offset);
2706 } else {
2707 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2708 >> IO_MEM_SHIFT];
2710 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2711 region_offset);
2712 p->region_offset = 0;
2713 } else {
2714 p->phys_offset = phys_offset;
2715 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2716 (phys_offset & IO_MEM_ROMD))
2717 phys_offset += TARGET_PAGE_SIZE;
2719 } else {
2720 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2721 p->phys_offset = phys_offset;
2722 p->region_offset = region_offset;
2723 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2724 (phys_offset & IO_MEM_ROMD)) {
2725 phys_offset += TARGET_PAGE_SIZE;
2726 } else {
2727 target_phys_addr_t start_addr2, end_addr2;
2728 int need_subpage = 0;
2730 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2731 end_addr2, need_subpage);
2733 if (need_subpage) {
2734 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2735 &p->phys_offset, IO_MEM_UNASSIGNED,
2736 addr & TARGET_PAGE_MASK);
2737 subpage_register(subpage, start_addr2, end_addr2,
2738 phys_offset, region_offset);
2739 p->region_offset = 0;
2743 region_offset += TARGET_PAGE_SIZE;
2744 addr += TARGET_PAGE_SIZE;
2745 } while (addr != end_addr);
2747 /* since each CPU stores ram addresses in its TLB cache, we must
2748 reset the modified entries */
2749 /* XXX: slow ! */
2750 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2751 tlb_flush(env, 1);
2755 /* XXX: temporary until new memory mapping API */
2756 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2758 PhysPageDesc *p;
2760 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2761 if (!p)
2762 return IO_MEM_UNASSIGNED;
2763 return p->phys_offset;
2766 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2768 if (kvm_enabled())
2769 kvm_coalesce_mmio_region(addr, size);
2772 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2774 if (kvm_enabled())
2775 kvm_uncoalesce_mmio_region(addr, size);
2778 void qemu_flush_coalesced_mmio_buffer(void)
2780 if (kvm_enabled())
2781 kvm_flush_coalesced_mmio_buffer();
2784 #if defined(__linux__) && !defined(TARGET_S390X)
2786 #include <sys/vfs.h>
2788 #define HUGETLBFS_MAGIC 0x958458f6
2790 static long gethugepagesize(const char *path)
2792 struct statfs fs;
2793 int ret;
2795 do {
2796 ret = statfs(path, &fs);
2797 } while (ret != 0 && errno == EINTR);
2799 if (ret != 0) {
2800 perror(path);
2801 return 0;
2804 if (fs.f_type != HUGETLBFS_MAGIC)
2805 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2807 return fs.f_bsize;
2810 static void *file_ram_alloc(RAMBlock *block,
2811 ram_addr_t memory,
2812 const char *path)
2814 char *filename;
2815 void *area;
2816 int fd;
2817 #ifdef MAP_POPULATE
2818 int flags;
2819 #endif
2820 unsigned long hpagesize;
2822 hpagesize = gethugepagesize(path);
2823 if (!hpagesize) {
2824 return NULL;
2827 if (memory < hpagesize) {
2828 return NULL;
2831 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2832 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2833 return NULL;
2836 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2837 return NULL;
2840 fd = mkstemp(filename);
2841 if (fd < 0) {
2842 perror("unable to create backing store for hugepages");
2843 free(filename);
2844 return NULL;
2846 unlink(filename);
2847 free(filename);
2849 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2852 * ftruncate is not supported by hugetlbfs in older
2853 * hosts, so don't bother bailing out on errors.
2854 * If anything goes wrong with it under other filesystems,
2855 * mmap will fail.
2857 if (ftruncate(fd, memory))
2858 perror("ftruncate");
2860 #ifdef MAP_POPULATE
2861 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2862 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2863 * to sidestep this quirk.
2865 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2866 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2867 #else
2868 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2869 #endif
2870 if (area == MAP_FAILED) {
2871 perror("file_ram_alloc: can't mmap RAM pages");
2872 close(fd);
2873 return (NULL);
2875 block->fd = fd;
2876 return area;
2878 #endif
2880 static ram_addr_t find_ram_offset(ram_addr_t size)
2882 RAMBlock *block, *next_block;
2883 ram_addr_t offset = 0, mingap = RAM_ADDR_MAX;
2885 if (QLIST_EMPTY(&ram_list.blocks))
2886 return 0;
2888 QLIST_FOREACH(block, &ram_list.blocks, next) {
2889 ram_addr_t end, next = RAM_ADDR_MAX;
2891 end = block->offset + block->length;
2893 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2894 if (next_block->offset >= end) {
2895 next = MIN(next, next_block->offset);
2898 if (next - end >= size && next - end < mingap) {
2899 offset = end;
2900 mingap = next - end;
2903 return offset;
2906 static ram_addr_t last_ram_offset(void)
2908 RAMBlock *block;
2909 ram_addr_t last = 0;
2911 QLIST_FOREACH(block, &ram_list.blocks, next)
2912 last = MAX(last, block->offset + block->length);
2914 return last;
2917 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2918 ram_addr_t size, void *host)
2920 RAMBlock *new_block, *block;
2922 size = TARGET_PAGE_ALIGN(size);
2923 new_block = qemu_mallocz(sizeof(*new_block));
2925 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2926 char *id = dev->parent_bus->info->get_dev_path(dev);
2927 if (id) {
2928 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2929 qemu_free(id);
2932 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2934 QLIST_FOREACH(block, &ram_list.blocks, next) {
2935 if (!strcmp(block->idstr, new_block->idstr)) {
2936 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2937 new_block->idstr);
2938 abort();
2942 new_block->offset = find_ram_offset(size);
2943 if (host) {
2944 new_block->host = host;
2945 new_block->flags |= RAM_PREALLOC_MASK;
2946 } else {
2947 if (mem_path) {
2948 #if defined (__linux__) && !defined(TARGET_S390X)
2949 new_block->host = file_ram_alloc(new_block, size, mem_path);
2950 if (!new_block->host) {
2951 new_block->host = qemu_vmalloc(size);
2952 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2954 #else
2955 fprintf(stderr, "-mem-path option unsupported\n");
2956 exit(1);
2957 #endif
2958 } else {
2959 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2960 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2961 an system defined value, which is at least 256GB. Larger systems
2962 have larger values. We put the guest between the end of data
2963 segment (system break) and this value. We use 32GB as a base to
2964 have enough room for the system break to grow. */
2965 new_block->host = mmap((void*)0x800000000, size,
2966 PROT_EXEC|PROT_READ|PROT_WRITE,
2967 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2968 if (new_block->host == MAP_FAILED) {
2969 fprintf(stderr, "Allocating RAM failed\n");
2970 abort();
2972 #else
2973 if (xen_enabled()) {
2974 xen_ram_alloc(new_block->offset, size);
2975 } else {
2976 new_block->host = qemu_vmalloc(size);
2978 #endif
2979 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2982 new_block->length = size;
2984 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2986 ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
2987 last_ram_offset() >> TARGET_PAGE_BITS);
2988 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2989 0xff, size >> TARGET_PAGE_BITS);
2991 if (kvm_enabled())
2992 kvm_setup_guest_memory(new_block->host, size);
2994 return new_block->offset;
2997 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2999 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
3002 void qemu_ram_free_from_ptr(ram_addr_t addr)
3004 RAMBlock *block;
3006 QLIST_FOREACH(block, &ram_list.blocks, next) {
3007 if (addr == block->offset) {
3008 QLIST_REMOVE(block, next);
3009 qemu_free(block);
3010 return;
3015 void qemu_ram_free(ram_addr_t addr)
3017 RAMBlock *block;
3019 QLIST_FOREACH(block, &ram_list.blocks, next) {
3020 if (addr == block->offset) {
3021 QLIST_REMOVE(block, next);
3022 if (block->flags & RAM_PREALLOC_MASK) {
3024 } else if (mem_path) {
3025 #if defined (__linux__) && !defined(TARGET_S390X)
3026 if (block->fd) {
3027 munmap(block->host, block->length);
3028 close(block->fd);
3029 } else {
3030 qemu_vfree(block->host);
3032 #else
3033 abort();
3034 #endif
3035 } else {
3036 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3037 munmap(block->host, block->length);
3038 #else
3039 if (xen_enabled()) {
3040 xen_invalidate_map_cache_entry(block->host);
3041 } else {
3042 qemu_vfree(block->host);
3044 #endif
3046 qemu_free(block);
3047 return;
3053 #ifndef _WIN32
3054 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3056 RAMBlock *block;
3057 ram_addr_t offset;
3058 int flags;
3059 void *area, *vaddr;
3061 QLIST_FOREACH(block, &ram_list.blocks, next) {
3062 offset = addr - block->offset;
3063 if (offset < block->length) {
3064 vaddr = block->host + offset;
3065 if (block->flags & RAM_PREALLOC_MASK) {
3067 } else {
3068 flags = MAP_FIXED;
3069 munmap(vaddr, length);
3070 if (mem_path) {
3071 #if defined(__linux__) && !defined(TARGET_S390X)
3072 if (block->fd) {
3073 #ifdef MAP_POPULATE
3074 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3075 MAP_PRIVATE;
3076 #else
3077 flags |= MAP_PRIVATE;
3078 #endif
3079 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3080 flags, block->fd, offset);
3081 } else {
3082 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3083 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3084 flags, -1, 0);
3086 #else
3087 abort();
3088 #endif
3089 } else {
3090 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3091 flags |= MAP_SHARED | MAP_ANONYMOUS;
3092 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3093 flags, -1, 0);
3094 #else
3095 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3096 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3097 flags, -1, 0);
3098 #endif
3100 if (area != vaddr) {
3101 fprintf(stderr, "Could not remap addr: "
3102 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3103 length, addr);
3104 exit(1);
3106 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3108 return;
3112 #endif /* !_WIN32 */
3114 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3115 With the exception of the softmmu code in this file, this should
3116 only be used for local memory (e.g. video ram) that the device owns,
3117 and knows it isn't going to access beyond the end of the block.
3119 It should not be used for general purpose DMA.
3120 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3122 void *qemu_get_ram_ptr(ram_addr_t addr)
3124 RAMBlock *block;
3126 QLIST_FOREACH(block, &ram_list.blocks, next) {
3127 if (addr - block->offset < block->length) {
3128 /* Move this entry to to start of the list. */
3129 if (block != QLIST_FIRST(&ram_list.blocks)) {
3130 QLIST_REMOVE(block, next);
3131 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3133 if (xen_enabled()) {
3134 /* We need to check if the requested address is in the RAM
3135 * because we don't want to map the entire memory in QEMU.
3136 * In that case just map until the end of the page.
3138 if (block->offset == 0) {
3139 return xen_map_cache(addr, 0, 0);
3140 } else if (block->host == NULL) {
3141 block->host =
3142 xen_map_cache(block->offset, block->length, 1);
3145 return block->host + (addr - block->offset);
3149 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3150 abort();
3152 return NULL;
3155 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3156 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3158 void *qemu_safe_ram_ptr(ram_addr_t addr)
3160 RAMBlock *block;
3162 QLIST_FOREACH(block, &ram_list.blocks, next) {
3163 if (addr - block->offset < block->length) {
3164 if (xen_enabled()) {
3165 /* We need to check if the requested address is in the RAM
3166 * because we don't want to map the entire memory in QEMU.
3167 * In that case just map until the end of the page.
3169 if (block->offset == 0) {
3170 return xen_map_cache(addr, 0, 0);
3171 } else if (block->host == NULL) {
3172 block->host =
3173 xen_map_cache(block->offset, block->length, 1);
3176 return block->host + (addr - block->offset);
3180 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3181 abort();
3183 return NULL;
3186 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3187 * but takes a size argument */
3188 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3190 if (*size == 0) {
3191 return NULL;
3193 if (xen_enabled()) {
3194 return xen_map_cache(addr, *size, 1);
3195 } else {
3196 RAMBlock *block;
3198 QLIST_FOREACH(block, &ram_list.blocks, next) {
3199 if (addr - block->offset < block->length) {
3200 if (addr - block->offset + *size > block->length)
3201 *size = block->length - addr + block->offset;
3202 return block->host + (addr - block->offset);
3206 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3207 abort();
3211 void qemu_put_ram_ptr(void *addr)
3213 trace_qemu_put_ram_ptr(addr);
3216 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3218 RAMBlock *block;
3219 uint8_t *host = ptr;
3221 if (xen_enabled()) {
3222 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3223 return 0;
3226 QLIST_FOREACH(block, &ram_list.blocks, next) {
3227 /* This case append when the block is not mapped. */
3228 if (block->host == NULL) {
3229 continue;
3231 if (host - block->host < block->length) {
3232 *ram_addr = block->offset + (host - block->host);
3233 return 0;
3237 return -1;
3240 /* Some of the softmmu routines need to translate from a host pointer
3241 (typically a TLB entry) back to a ram offset. */
3242 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3244 ram_addr_t ram_addr;
3246 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3247 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3248 abort();
3250 return ram_addr;
3253 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3255 #ifdef DEBUG_UNASSIGNED
3256 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3257 #endif
3258 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3259 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3260 #endif
3261 return 0;
3264 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3266 #ifdef DEBUG_UNASSIGNED
3267 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3268 #endif
3269 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3270 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3271 #endif
3272 return 0;
3275 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3277 #ifdef DEBUG_UNASSIGNED
3278 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3279 #endif
3280 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3281 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3282 #endif
3283 return 0;
3286 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3288 #ifdef DEBUG_UNASSIGNED
3289 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3290 #endif
3291 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3292 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3293 #endif
3296 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3298 #ifdef DEBUG_UNASSIGNED
3299 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3300 #endif
3301 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3302 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3303 #endif
3306 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3308 #ifdef DEBUG_UNASSIGNED
3309 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3310 #endif
3311 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3312 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3313 #endif
3316 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3317 unassigned_mem_readb,
3318 unassigned_mem_readw,
3319 unassigned_mem_readl,
3322 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3323 unassigned_mem_writeb,
3324 unassigned_mem_writew,
3325 unassigned_mem_writel,
3328 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3329 uint32_t val)
3331 int dirty_flags;
3332 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3333 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3334 #if !defined(CONFIG_USER_ONLY)
3335 tb_invalidate_phys_page_fast(ram_addr, 1);
3336 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3337 #endif
3339 stb_p(qemu_get_ram_ptr(ram_addr), val);
3340 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3341 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3342 /* we remove the notdirty callback only if the code has been
3343 flushed */
3344 if (dirty_flags == 0xff)
3345 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3348 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3349 uint32_t val)
3351 int dirty_flags;
3352 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3353 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3354 #if !defined(CONFIG_USER_ONLY)
3355 tb_invalidate_phys_page_fast(ram_addr, 2);
3356 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3357 #endif
3359 stw_p(qemu_get_ram_ptr(ram_addr), val);
3360 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3361 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3362 /* we remove the notdirty callback only if the code has been
3363 flushed */
3364 if (dirty_flags == 0xff)
3365 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3368 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3369 uint32_t val)
3371 int dirty_flags;
3372 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3373 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3374 #if !defined(CONFIG_USER_ONLY)
3375 tb_invalidate_phys_page_fast(ram_addr, 4);
3376 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3377 #endif
3379 stl_p(qemu_get_ram_ptr(ram_addr), val);
3380 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3381 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3382 /* we remove the notdirty callback only if the code has been
3383 flushed */
3384 if (dirty_flags == 0xff)
3385 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3388 static CPUReadMemoryFunc * const error_mem_read[3] = {
3389 NULL, /* never used */
3390 NULL, /* never used */
3391 NULL, /* never used */
3394 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3395 notdirty_mem_writeb,
3396 notdirty_mem_writew,
3397 notdirty_mem_writel,
3400 /* Generate a debug exception if a watchpoint has been hit. */
3401 static void check_watchpoint(int offset, int len_mask, int flags)
3403 CPUState *env = cpu_single_env;
3404 target_ulong pc, cs_base;
3405 TranslationBlock *tb;
3406 target_ulong vaddr;
3407 CPUWatchpoint *wp;
3408 int cpu_flags;
3410 if (env->watchpoint_hit) {
3411 /* We re-entered the check after replacing the TB. Now raise
3412 * the debug interrupt so that is will trigger after the
3413 * current instruction. */
3414 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3415 return;
3417 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3418 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3419 if ((vaddr == (wp->vaddr & len_mask) ||
3420 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3421 wp->flags |= BP_WATCHPOINT_HIT;
3422 if (!env->watchpoint_hit) {
3423 env->watchpoint_hit = wp;
3424 tb = tb_find_pc(env->mem_io_pc);
3425 if (!tb) {
3426 cpu_abort(env, "check_watchpoint: could not find TB for "
3427 "pc=%p", (void *)env->mem_io_pc);
3429 cpu_restore_state(tb, env, env->mem_io_pc);
3430 tb_phys_invalidate(tb, -1);
3431 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3432 env->exception_index = EXCP_DEBUG;
3433 } else {
3434 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3435 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3437 cpu_resume_from_signal(env, NULL);
3439 } else {
3440 wp->flags &= ~BP_WATCHPOINT_HIT;
3445 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3446 so these check for a hit then pass through to the normal out-of-line
3447 phys routines. */
3448 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3450 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3451 return ldub_phys(addr);
3454 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3456 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3457 return lduw_phys(addr);
3460 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3462 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3463 return ldl_phys(addr);
3466 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3467 uint32_t val)
3469 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3470 stb_phys(addr, val);
3473 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3474 uint32_t val)
3476 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3477 stw_phys(addr, val);
3480 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3481 uint32_t val)
3483 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3484 stl_phys(addr, val);
3487 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3488 watch_mem_readb,
3489 watch_mem_readw,
3490 watch_mem_readl,
3493 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3494 watch_mem_writeb,
3495 watch_mem_writew,
3496 watch_mem_writel,
3499 static inline uint32_t subpage_readlen (subpage_t *mmio,
3500 target_phys_addr_t addr,
3501 unsigned int len)
3503 unsigned int idx = SUBPAGE_IDX(addr);
3504 #if defined(DEBUG_SUBPAGE)
3505 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3506 mmio, len, addr, idx);
3507 #endif
3509 addr += mmio->region_offset[idx];
3510 idx = mmio->sub_io_index[idx];
3511 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3514 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3515 uint32_t value, unsigned int len)
3517 unsigned int idx = SUBPAGE_IDX(addr);
3518 #if defined(DEBUG_SUBPAGE)
3519 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3520 __func__, mmio, len, addr, idx, value);
3521 #endif
3523 addr += mmio->region_offset[idx];
3524 idx = mmio->sub_io_index[idx];
3525 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3528 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3530 return subpage_readlen(opaque, addr, 0);
3533 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3534 uint32_t value)
3536 subpage_writelen(opaque, addr, value, 0);
3539 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3541 return subpage_readlen(opaque, addr, 1);
3544 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3545 uint32_t value)
3547 subpage_writelen(opaque, addr, value, 1);
3550 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3552 return subpage_readlen(opaque, addr, 2);
3555 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3556 uint32_t value)
3558 subpage_writelen(opaque, addr, value, 2);
3561 static CPUReadMemoryFunc * const subpage_read[] = {
3562 &subpage_readb,
3563 &subpage_readw,
3564 &subpage_readl,
3567 static CPUWriteMemoryFunc * const subpage_write[] = {
3568 &subpage_writeb,
3569 &subpage_writew,
3570 &subpage_writel,
3573 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3574 ram_addr_t memory, ram_addr_t region_offset)
3576 int idx, eidx;
3578 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3579 return -1;
3580 idx = SUBPAGE_IDX(start);
3581 eidx = SUBPAGE_IDX(end);
3582 #if defined(DEBUG_SUBPAGE)
3583 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3584 mmio, start, end, idx, eidx, memory);
3585 #endif
3586 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3587 memory = IO_MEM_UNASSIGNED;
3588 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3589 for (; idx <= eidx; idx++) {
3590 mmio->sub_io_index[idx] = memory;
3591 mmio->region_offset[idx] = region_offset;
3594 return 0;
3597 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3598 ram_addr_t orig_memory,
3599 ram_addr_t region_offset)
3601 subpage_t *mmio;
3602 int subpage_memory;
3604 mmio = qemu_mallocz(sizeof(subpage_t));
3606 mmio->base = base;
3607 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3608 DEVICE_NATIVE_ENDIAN);
3609 #if defined(DEBUG_SUBPAGE)
3610 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3611 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3612 #endif
3613 *phys = subpage_memory | IO_MEM_SUBPAGE;
3614 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3616 return mmio;
3619 static int get_free_io_mem_idx(void)
3621 int i;
3623 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3624 if (!io_mem_used[i]) {
3625 io_mem_used[i] = 1;
3626 return i;
3628 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3629 return -1;
3633 * Usually, devices operate in little endian mode. There are devices out
3634 * there that operate in big endian too. Each device gets byte swapped
3635 * mmio if plugged onto a CPU that does the other endianness.
3637 * CPU Device swap?
3639 * little little no
3640 * little big yes
3641 * big little yes
3642 * big big no
3645 typedef struct SwapEndianContainer {
3646 CPUReadMemoryFunc *read[3];
3647 CPUWriteMemoryFunc *write[3];
3648 void *opaque;
3649 } SwapEndianContainer;
3651 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3653 uint32_t val;
3654 SwapEndianContainer *c = opaque;
3655 val = c->read[0](c->opaque, addr);
3656 return val;
3659 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3661 uint32_t val;
3662 SwapEndianContainer *c = opaque;
3663 val = bswap16(c->read[1](c->opaque, addr));
3664 return val;
3667 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3669 uint32_t val;
3670 SwapEndianContainer *c = opaque;
3671 val = bswap32(c->read[2](c->opaque, addr));
3672 return val;
3675 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3676 swapendian_mem_readb,
3677 swapendian_mem_readw,
3678 swapendian_mem_readl
3681 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3682 uint32_t val)
3684 SwapEndianContainer *c = opaque;
3685 c->write[0](c->opaque, addr, val);
3688 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3689 uint32_t val)
3691 SwapEndianContainer *c = opaque;
3692 c->write[1](c->opaque, addr, bswap16(val));
3695 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3696 uint32_t val)
3698 SwapEndianContainer *c = opaque;
3699 c->write[2](c->opaque, addr, bswap32(val));
3702 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3703 swapendian_mem_writeb,
3704 swapendian_mem_writew,
3705 swapendian_mem_writel
3708 static void swapendian_init(int io_index)
3710 SwapEndianContainer *c = qemu_malloc(sizeof(SwapEndianContainer));
3711 int i;
3713 /* Swap mmio for big endian targets */
3714 c->opaque = io_mem_opaque[io_index];
3715 for (i = 0; i < 3; i++) {
3716 c->read[i] = io_mem_read[io_index][i];
3717 c->write[i] = io_mem_write[io_index][i];
3719 io_mem_read[io_index][i] = swapendian_readfn[i];
3720 io_mem_write[io_index][i] = swapendian_writefn[i];
3722 io_mem_opaque[io_index] = c;
3725 static void swapendian_del(int io_index)
3727 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3728 qemu_free(io_mem_opaque[io_index]);
3732 /* mem_read and mem_write are arrays of functions containing the
3733 function to access byte (index 0), word (index 1) and dword (index
3734 2). Functions can be omitted with a NULL function pointer.
3735 If io_index is non zero, the corresponding io zone is
3736 modified. If it is zero, a new io zone is allocated. The return
3737 value can be used with cpu_register_physical_memory(). (-1) is
3738 returned if error. */
3739 static int cpu_register_io_memory_fixed(int io_index,
3740 CPUReadMemoryFunc * const *mem_read,
3741 CPUWriteMemoryFunc * const *mem_write,
3742 void *opaque, enum device_endian endian)
3744 int i;
3746 if (io_index <= 0) {
3747 io_index = get_free_io_mem_idx();
3748 if (io_index == -1)
3749 return io_index;
3750 } else {
3751 io_index >>= IO_MEM_SHIFT;
3752 if (io_index >= IO_MEM_NB_ENTRIES)
3753 return -1;
3756 for (i = 0; i < 3; ++i) {
3757 io_mem_read[io_index][i]
3758 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3760 for (i = 0; i < 3; ++i) {
3761 io_mem_write[io_index][i]
3762 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3764 io_mem_opaque[io_index] = opaque;
3766 switch (endian) {
3767 case DEVICE_BIG_ENDIAN:
3768 #ifndef TARGET_WORDS_BIGENDIAN
3769 swapendian_init(io_index);
3770 #endif
3771 break;
3772 case DEVICE_LITTLE_ENDIAN:
3773 #ifdef TARGET_WORDS_BIGENDIAN
3774 swapendian_init(io_index);
3775 #endif
3776 break;
3777 case DEVICE_NATIVE_ENDIAN:
3778 default:
3779 break;
3782 return (io_index << IO_MEM_SHIFT);
3785 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3786 CPUWriteMemoryFunc * const *mem_write,
3787 void *opaque, enum device_endian endian)
3789 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3792 void cpu_unregister_io_memory(int io_table_address)
3794 int i;
3795 int io_index = io_table_address >> IO_MEM_SHIFT;
3797 swapendian_del(io_index);
3799 for (i=0;i < 3; i++) {
3800 io_mem_read[io_index][i] = unassigned_mem_read[i];
3801 io_mem_write[io_index][i] = unassigned_mem_write[i];
3803 io_mem_opaque[io_index] = NULL;
3804 io_mem_used[io_index] = 0;
3807 static void io_mem_init(void)
3809 int i;
3811 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3812 unassigned_mem_write, NULL,
3813 DEVICE_NATIVE_ENDIAN);
3814 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3815 unassigned_mem_write, NULL,
3816 DEVICE_NATIVE_ENDIAN);
3817 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3818 notdirty_mem_write, NULL,
3819 DEVICE_NATIVE_ENDIAN);
3820 for (i=0; i<5; i++)
3821 io_mem_used[i] = 1;
3823 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3824 watch_mem_write, NULL,
3825 DEVICE_NATIVE_ENDIAN);
3828 static void memory_map_init(void)
3830 system_memory = qemu_malloc(sizeof(*system_memory));
3831 memory_region_init(system_memory, "system", INT64_MAX);
3832 set_system_memory_map(system_memory);
3835 MemoryRegion *get_system_memory(void)
3837 return system_memory;
3840 #endif /* !defined(CONFIG_USER_ONLY) */
3842 /* physical memory access (slow version, mainly for debug) */
3843 #if defined(CONFIG_USER_ONLY)
3844 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3845 uint8_t *buf, int len, int is_write)
3847 int l, flags;
3848 target_ulong page;
3849 void * p;
3851 while (len > 0) {
3852 page = addr & TARGET_PAGE_MASK;
3853 l = (page + TARGET_PAGE_SIZE) - addr;
3854 if (l > len)
3855 l = len;
3856 flags = page_get_flags(page);
3857 if (!(flags & PAGE_VALID))
3858 return -1;
3859 if (is_write) {
3860 if (!(flags & PAGE_WRITE))
3861 return -1;
3862 /* XXX: this code should not depend on lock_user */
3863 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3864 return -1;
3865 memcpy(p, buf, l);
3866 unlock_user(p, addr, l);
3867 } else {
3868 if (!(flags & PAGE_READ))
3869 return -1;
3870 /* XXX: this code should not depend on lock_user */
3871 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3872 return -1;
3873 memcpy(buf, p, l);
3874 unlock_user(p, addr, 0);
3876 len -= l;
3877 buf += l;
3878 addr += l;
3880 return 0;
3883 #else
3884 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3885 int len, int is_write)
3887 int l, io_index;
3888 uint8_t *ptr;
3889 uint32_t val;
3890 target_phys_addr_t page;
3891 ram_addr_t pd;
3892 PhysPageDesc *p;
3894 while (len > 0) {
3895 page = addr & TARGET_PAGE_MASK;
3896 l = (page + TARGET_PAGE_SIZE) - addr;
3897 if (l > len)
3898 l = len;
3899 p = phys_page_find(page >> TARGET_PAGE_BITS);
3900 if (!p) {
3901 pd = IO_MEM_UNASSIGNED;
3902 } else {
3903 pd = p->phys_offset;
3906 if (is_write) {
3907 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3908 target_phys_addr_t addr1 = addr;
3909 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3910 if (p)
3911 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3912 /* XXX: could force cpu_single_env to NULL to avoid
3913 potential bugs */
3914 if (l >= 4 && ((addr1 & 3) == 0)) {
3915 /* 32 bit write access */
3916 val = ldl_p(buf);
3917 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3918 l = 4;
3919 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3920 /* 16 bit write access */
3921 val = lduw_p(buf);
3922 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3923 l = 2;
3924 } else {
3925 /* 8 bit write access */
3926 val = ldub_p(buf);
3927 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3928 l = 1;
3930 } else {
3931 ram_addr_t addr1;
3932 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3933 /* RAM case */
3934 ptr = qemu_get_ram_ptr(addr1);
3935 memcpy(ptr, buf, l);
3936 if (!cpu_physical_memory_is_dirty(addr1)) {
3937 /* invalidate code */
3938 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3939 /* set dirty bit */
3940 cpu_physical_memory_set_dirty_flags(
3941 addr1, (0xff & ~CODE_DIRTY_FLAG));
3943 qemu_put_ram_ptr(ptr);
3945 } else {
3946 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3947 !(pd & IO_MEM_ROMD)) {
3948 target_phys_addr_t addr1 = addr;
3949 /* I/O case */
3950 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3951 if (p)
3952 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3953 if (l >= 4 && ((addr1 & 3) == 0)) {
3954 /* 32 bit read access */
3955 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3956 stl_p(buf, val);
3957 l = 4;
3958 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3959 /* 16 bit read access */
3960 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3961 stw_p(buf, val);
3962 l = 2;
3963 } else {
3964 /* 8 bit read access */
3965 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3966 stb_p(buf, val);
3967 l = 1;
3969 } else {
3970 /* RAM case */
3971 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3972 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3973 qemu_put_ram_ptr(ptr);
3976 len -= l;
3977 buf += l;
3978 addr += l;
3982 /* used for ROM loading : can write in RAM and ROM */
3983 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3984 const uint8_t *buf, int len)
3986 int l;
3987 uint8_t *ptr;
3988 target_phys_addr_t page;
3989 unsigned long pd;
3990 PhysPageDesc *p;
3992 while (len > 0) {
3993 page = addr & TARGET_PAGE_MASK;
3994 l = (page + TARGET_PAGE_SIZE) - addr;
3995 if (l > len)
3996 l = len;
3997 p = phys_page_find(page >> TARGET_PAGE_BITS);
3998 if (!p) {
3999 pd = IO_MEM_UNASSIGNED;
4000 } else {
4001 pd = p->phys_offset;
4004 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
4005 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
4006 !(pd & IO_MEM_ROMD)) {
4007 /* do nothing */
4008 } else {
4009 unsigned long addr1;
4010 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4011 /* ROM/RAM case */
4012 ptr = qemu_get_ram_ptr(addr1);
4013 memcpy(ptr, buf, l);
4014 qemu_put_ram_ptr(ptr);
4016 len -= l;
4017 buf += l;
4018 addr += l;
4022 typedef struct {
4023 void *buffer;
4024 target_phys_addr_t addr;
4025 target_phys_addr_t len;
4026 } BounceBuffer;
4028 static BounceBuffer bounce;
4030 typedef struct MapClient {
4031 void *opaque;
4032 void (*callback)(void *opaque);
4033 QLIST_ENTRY(MapClient) link;
4034 } MapClient;
4036 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4037 = QLIST_HEAD_INITIALIZER(map_client_list);
4039 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4041 MapClient *client = qemu_malloc(sizeof(*client));
4043 client->opaque = opaque;
4044 client->callback = callback;
4045 QLIST_INSERT_HEAD(&map_client_list, client, link);
4046 return client;
4049 void cpu_unregister_map_client(void *_client)
4051 MapClient *client = (MapClient *)_client;
4053 QLIST_REMOVE(client, link);
4054 qemu_free(client);
4057 static void cpu_notify_map_clients(void)
4059 MapClient *client;
4061 while (!QLIST_EMPTY(&map_client_list)) {
4062 client = QLIST_FIRST(&map_client_list);
4063 client->callback(client->opaque);
4064 cpu_unregister_map_client(client);
4068 /* Map a physical memory region into a host virtual address.
4069 * May map a subset of the requested range, given by and returned in *plen.
4070 * May return NULL if resources needed to perform the mapping are exhausted.
4071 * Use only for reads OR writes - not for read-modify-write operations.
4072 * Use cpu_register_map_client() to know when retrying the map operation is
4073 * likely to succeed.
4075 void *cpu_physical_memory_map(target_phys_addr_t addr,
4076 target_phys_addr_t *plen,
4077 int is_write)
4079 target_phys_addr_t len = *plen;
4080 target_phys_addr_t todo = 0;
4081 int l;
4082 target_phys_addr_t page;
4083 unsigned long pd;
4084 PhysPageDesc *p;
4085 ram_addr_t raddr = RAM_ADDR_MAX;
4086 ram_addr_t rlen;
4087 void *ret;
4089 while (len > 0) {
4090 page = addr & TARGET_PAGE_MASK;
4091 l = (page + TARGET_PAGE_SIZE) - addr;
4092 if (l > len)
4093 l = len;
4094 p = phys_page_find(page >> TARGET_PAGE_BITS);
4095 if (!p) {
4096 pd = IO_MEM_UNASSIGNED;
4097 } else {
4098 pd = p->phys_offset;
4101 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4102 if (todo || bounce.buffer) {
4103 break;
4105 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4106 bounce.addr = addr;
4107 bounce.len = l;
4108 if (!is_write) {
4109 cpu_physical_memory_read(addr, bounce.buffer, l);
4112 *plen = l;
4113 return bounce.buffer;
4115 if (!todo) {
4116 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4119 len -= l;
4120 addr += l;
4121 todo += l;
4123 rlen = todo;
4124 ret = qemu_ram_ptr_length(raddr, &rlen);
4125 *plen = rlen;
4126 return ret;
4129 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4130 * Will also mark the memory as dirty if is_write == 1. access_len gives
4131 * the amount of memory that was actually read or written by the caller.
4133 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4134 int is_write, target_phys_addr_t access_len)
4136 if (buffer != bounce.buffer) {
4137 if (is_write) {
4138 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4139 while (access_len) {
4140 unsigned l;
4141 l = TARGET_PAGE_SIZE;
4142 if (l > access_len)
4143 l = access_len;
4144 if (!cpu_physical_memory_is_dirty(addr1)) {
4145 /* invalidate code */
4146 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4147 /* set dirty bit */
4148 cpu_physical_memory_set_dirty_flags(
4149 addr1, (0xff & ~CODE_DIRTY_FLAG));
4151 addr1 += l;
4152 access_len -= l;
4155 if (xen_enabled()) {
4156 xen_invalidate_map_cache_entry(buffer);
4158 return;
4160 if (is_write) {
4161 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4163 qemu_vfree(bounce.buffer);
4164 bounce.buffer = NULL;
4165 cpu_notify_map_clients();
4168 /* warning: addr must be aligned */
4169 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4170 enum device_endian endian)
4172 int io_index;
4173 uint8_t *ptr;
4174 uint32_t val;
4175 unsigned long pd;
4176 PhysPageDesc *p;
4178 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4179 if (!p) {
4180 pd = IO_MEM_UNASSIGNED;
4181 } else {
4182 pd = p->phys_offset;
4185 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4186 !(pd & IO_MEM_ROMD)) {
4187 /* I/O case */
4188 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4189 if (p)
4190 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4191 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4192 #if defined(TARGET_WORDS_BIGENDIAN)
4193 if (endian == DEVICE_LITTLE_ENDIAN) {
4194 val = bswap32(val);
4196 #else
4197 if (endian == DEVICE_BIG_ENDIAN) {
4198 val = bswap32(val);
4200 #endif
4201 } else {
4202 /* RAM case */
4203 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4204 (addr & ~TARGET_PAGE_MASK);
4205 switch (endian) {
4206 case DEVICE_LITTLE_ENDIAN:
4207 val = ldl_le_p(ptr);
4208 break;
4209 case DEVICE_BIG_ENDIAN:
4210 val = ldl_be_p(ptr);
4211 break;
4212 default:
4213 val = ldl_p(ptr);
4214 break;
4217 return val;
4220 uint32_t ldl_phys(target_phys_addr_t addr)
4222 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4225 uint32_t ldl_le_phys(target_phys_addr_t addr)
4227 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4230 uint32_t ldl_be_phys(target_phys_addr_t addr)
4232 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4235 /* warning: addr must be aligned */
4236 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4237 enum device_endian endian)
4239 int io_index;
4240 uint8_t *ptr;
4241 uint64_t val;
4242 unsigned long pd;
4243 PhysPageDesc *p;
4245 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4246 if (!p) {
4247 pd = IO_MEM_UNASSIGNED;
4248 } else {
4249 pd = p->phys_offset;
4252 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4253 !(pd & IO_MEM_ROMD)) {
4254 /* I/O case */
4255 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4256 if (p)
4257 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4259 /* XXX This is broken when device endian != cpu endian.
4260 Fix and add "endian" variable check */
4261 #ifdef TARGET_WORDS_BIGENDIAN
4262 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4263 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4264 #else
4265 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4266 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4267 #endif
4268 } else {
4269 /* RAM case */
4270 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4271 (addr & ~TARGET_PAGE_MASK);
4272 switch (endian) {
4273 case DEVICE_LITTLE_ENDIAN:
4274 val = ldq_le_p(ptr);
4275 break;
4276 case DEVICE_BIG_ENDIAN:
4277 val = ldq_be_p(ptr);
4278 break;
4279 default:
4280 val = ldq_p(ptr);
4281 break;
4284 return val;
4287 uint64_t ldq_phys(target_phys_addr_t addr)
4289 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4292 uint64_t ldq_le_phys(target_phys_addr_t addr)
4294 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4297 uint64_t ldq_be_phys(target_phys_addr_t addr)
4299 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4302 /* XXX: optimize */
4303 uint32_t ldub_phys(target_phys_addr_t addr)
4305 uint8_t val;
4306 cpu_physical_memory_read(addr, &val, 1);
4307 return val;
4310 /* warning: addr must be aligned */
4311 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4312 enum device_endian endian)
4314 int io_index;
4315 uint8_t *ptr;
4316 uint64_t val;
4317 unsigned long pd;
4318 PhysPageDesc *p;
4320 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4321 if (!p) {
4322 pd = IO_MEM_UNASSIGNED;
4323 } else {
4324 pd = p->phys_offset;
4327 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4328 !(pd & IO_MEM_ROMD)) {
4329 /* I/O case */
4330 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4331 if (p)
4332 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4333 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4334 #if defined(TARGET_WORDS_BIGENDIAN)
4335 if (endian == DEVICE_LITTLE_ENDIAN) {
4336 val = bswap16(val);
4338 #else
4339 if (endian == DEVICE_BIG_ENDIAN) {
4340 val = bswap16(val);
4342 #endif
4343 } else {
4344 /* RAM case */
4345 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4346 (addr & ~TARGET_PAGE_MASK);
4347 switch (endian) {
4348 case DEVICE_LITTLE_ENDIAN:
4349 val = lduw_le_p(ptr);
4350 break;
4351 case DEVICE_BIG_ENDIAN:
4352 val = lduw_be_p(ptr);
4353 break;
4354 default:
4355 val = lduw_p(ptr);
4356 break;
4359 return val;
4362 uint32_t lduw_phys(target_phys_addr_t addr)
4364 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4367 uint32_t lduw_le_phys(target_phys_addr_t addr)
4369 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4372 uint32_t lduw_be_phys(target_phys_addr_t addr)
4374 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4377 /* warning: addr must be aligned. The ram page is not masked as dirty
4378 and the code inside is not invalidated. It is useful if the dirty
4379 bits are used to track modified PTEs */
4380 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4382 int io_index;
4383 uint8_t *ptr;
4384 unsigned long pd;
4385 PhysPageDesc *p;
4387 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4388 if (!p) {
4389 pd = IO_MEM_UNASSIGNED;
4390 } else {
4391 pd = p->phys_offset;
4394 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4395 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4396 if (p)
4397 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4398 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4399 } else {
4400 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4401 ptr = qemu_get_ram_ptr(addr1);
4402 stl_p(ptr, val);
4404 if (unlikely(in_migration)) {
4405 if (!cpu_physical_memory_is_dirty(addr1)) {
4406 /* invalidate code */
4407 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4408 /* set dirty bit */
4409 cpu_physical_memory_set_dirty_flags(
4410 addr1, (0xff & ~CODE_DIRTY_FLAG));
4416 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4418 int io_index;
4419 uint8_t *ptr;
4420 unsigned long pd;
4421 PhysPageDesc *p;
4423 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4424 if (!p) {
4425 pd = IO_MEM_UNASSIGNED;
4426 } else {
4427 pd = p->phys_offset;
4430 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4431 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4432 if (p)
4433 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4434 #ifdef TARGET_WORDS_BIGENDIAN
4435 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4436 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4437 #else
4438 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4439 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4440 #endif
4441 } else {
4442 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4443 (addr & ~TARGET_PAGE_MASK);
4444 stq_p(ptr, val);
4448 /* warning: addr must be aligned */
4449 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4450 enum device_endian endian)
4452 int io_index;
4453 uint8_t *ptr;
4454 unsigned long pd;
4455 PhysPageDesc *p;
4457 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4458 if (!p) {
4459 pd = IO_MEM_UNASSIGNED;
4460 } else {
4461 pd = p->phys_offset;
4464 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4465 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4466 if (p)
4467 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4468 #if defined(TARGET_WORDS_BIGENDIAN)
4469 if (endian == DEVICE_LITTLE_ENDIAN) {
4470 val = bswap32(val);
4472 #else
4473 if (endian == DEVICE_BIG_ENDIAN) {
4474 val = bswap32(val);
4476 #endif
4477 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4478 } else {
4479 unsigned long addr1;
4480 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4481 /* RAM case */
4482 ptr = qemu_get_ram_ptr(addr1);
4483 switch (endian) {
4484 case DEVICE_LITTLE_ENDIAN:
4485 stl_le_p(ptr, val);
4486 break;
4487 case DEVICE_BIG_ENDIAN:
4488 stl_be_p(ptr, val);
4489 break;
4490 default:
4491 stl_p(ptr, val);
4492 break;
4494 if (!cpu_physical_memory_is_dirty(addr1)) {
4495 /* invalidate code */
4496 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4497 /* set dirty bit */
4498 cpu_physical_memory_set_dirty_flags(addr1,
4499 (0xff & ~CODE_DIRTY_FLAG));
4504 void stl_phys(target_phys_addr_t addr, uint32_t val)
4506 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4509 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4511 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4514 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4516 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4519 /* XXX: optimize */
4520 void stb_phys(target_phys_addr_t addr, uint32_t val)
4522 uint8_t v = val;
4523 cpu_physical_memory_write(addr, &v, 1);
4526 /* warning: addr must be aligned */
4527 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4528 enum device_endian endian)
4530 int io_index;
4531 uint8_t *ptr;
4532 unsigned long pd;
4533 PhysPageDesc *p;
4535 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4536 if (!p) {
4537 pd = IO_MEM_UNASSIGNED;
4538 } else {
4539 pd = p->phys_offset;
4542 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4543 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4544 if (p)
4545 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4546 #if defined(TARGET_WORDS_BIGENDIAN)
4547 if (endian == DEVICE_LITTLE_ENDIAN) {
4548 val = bswap16(val);
4550 #else
4551 if (endian == DEVICE_BIG_ENDIAN) {
4552 val = bswap16(val);
4554 #endif
4555 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4556 } else {
4557 unsigned long addr1;
4558 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4559 /* RAM case */
4560 ptr = qemu_get_ram_ptr(addr1);
4561 switch (endian) {
4562 case DEVICE_LITTLE_ENDIAN:
4563 stw_le_p(ptr, val);
4564 break;
4565 case DEVICE_BIG_ENDIAN:
4566 stw_be_p(ptr, val);
4567 break;
4568 default:
4569 stw_p(ptr, val);
4570 break;
4572 if (!cpu_physical_memory_is_dirty(addr1)) {
4573 /* invalidate code */
4574 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4575 /* set dirty bit */
4576 cpu_physical_memory_set_dirty_flags(addr1,
4577 (0xff & ~CODE_DIRTY_FLAG));
4582 void stw_phys(target_phys_addr_t addr, uint32_t val)
4584 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4587 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4589 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4592 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4594 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4597 /* XXX: optimize */
4598 void stq_phys(target_phys_addr_t addr, uint64_t val)
4600 val = tswap64(val);
4601 cpu_physical_memory_write(addr, &val, 8);
4604 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4606 val = cpu_to_le64(val);
4607 cpu_physical_memory_write(addr, &val, 8);
4610 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4612 val = cpu_to_be64(val);
4613 cpu_physical_memory_write(addr, &val, 8);
4616 /* virtual memory access for debug (includes writing to ROM) */
4617 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4618 uint8_t *buf, int len, int is_write)
4620 int l;
4621 target_phys_addr_t phys_addr;
4622 target_ulong page;
4624 while (len > 0) {
4625 page = addr & TARGET_PAGE_MASK;
4626 phys_addr = cpu_get_phys_page_debug(env, page);
4627 /* if no physical page mapped, return an error */
4628 if (phys_addr == -1)
4629 return -1;
4630 l = (page + TARGET_PAGE_SIZE) - addr;
4631 if (l > len)
4632 l = len;
4633 phys_addr += (addr & ~TARGET_PAGE_MASK);
4634 if (is_write)
4635 cpu_physical_memory_write_rom(phys_addr, buf, l);
4636 else
4637 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4638 len -= l;
4639 buf += l;
4640 addr += l;
4642 return 0;
4644 #endif
4646 /* in deterministic execution mode, instructions doing device I/Os
4647 must be at the end of the TB */
4648 void cpu_io_recompile(CPUState *env, void *retaddr)
4650 TranslationBlock *tb;
4651 uint32_t n, cflags;
4652 target_ulong pc, cs_base;
4653 uint64_t flags;
4655 tb = tb_find_pc((unsigned long)retaddr);
4656 if (!tb) {
4657 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4658 retaddr);
4660 n = env->icount_decr.u16.low + tb->icount;
4661 cpu_restore_state(tb, env, (unsigned long)retaddr);
4662 /* Calculate how many instructions had been executed before the fault
4663 occurred. */
4664 n = n - env->icount_decr.u16.low;
4665 /* Generate a new TB ending on the I/O insn. */
4666 n++;
4667 /* On MIPS and SH, delay slot instructions can only be restarted if
4668 they were already the first instruction in the TB. If this is not
4669 the first instruction in a TB then re-execute the preceding
4670 branch. */
4671 #if defined(TARGET_MIPS)
4672 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4673 env->active_tc.PC -= 4;
4674 env->icount_decr.u16.low++;
4675 env->hflags &= ~MIPS_HFLAG_BMASK;
4677 #elif defined(TARGET_SH4)
4678 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4679 && n > 1) {
4680 env->pc -= 2;
4681 env->icount_decr.u16.low++;
4682 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4684 #endif
4685 /* This should never happen. */
4686 if (n > CF_COUNT_MASK)
4687 cpu_abort(env, "TB too big during recompile");
4689 cflags = n | CF_LAST_IO;
4690 pc = tb->pc;
4691 cs_base = tb->cs_base;
4692 flags = tb->flags;
4693 tb_phys_invalidate(tb, -1);
4694 /* FIXME: In theory this could raise an exception. In practice
4695 we have already translated the block once so it's probably ok. */
4696 tb_gen_code(env, pc, cs_base, flags, cflags);
4697 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4698 the first in the TB) then we end up generating a whole new TB and
4699 repeating the fault, which is horribly inefficient.
4700 Better would be to execute just this insn uncached, or generate a
4701 second new TB. */
4702 cpu_resume_from_signal(env, NULL);
4705 #if !defined(CONFIG_USER_ONLY)
4707 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4709 int i, target_code_size, max_target_code_size;
4710 int direct_jmp_count, direct_jmp2_count, cross_page;
4711 TranslationBlock *tb;
4713 target_code_size = 0;
4714 max_target_code_size = 0;
4715 cross_page = 0;
4716 direct_jmp_count = 0;
4717 direct_jmp2_count = 0;
4718 for(i = 0; i < nb_tbs; i++) {
4719 tb = &tbs[i];
4720 target_code_size += tb->size;
4721 if (tb->size > max_target_code_size)
4722 max_target_code_size = tb->size;
4723 if (tb->page_addr[1] != -1)
4724 cross_page++;
4725 if (tb->tb_next_offset[0] != 0xffff) {
4726 direct_jmp_count++;
4727 if (tb->tb_next_offset[1] != 0xffff) {
4728 direct_jmp2_count++;
4732 /* XXX: avoid using doubles ? */
4733 cpu_fprintf(f, "Translation buffer state:\n");
4734 cpu_fprintf(f, "gen code size %td/%ld\n",
4735 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4736 cpu_fprintf(f, "TB count %d/%d\n",
4737 nb_tbs, code_gen_max_blocks);
4738 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4739 nb_tbs ? target_code_size / nb_tbs : 0,
4740 max_target_code_size);
4741 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4742 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4743 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4744 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4745 cross_page,
4746 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4747 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4748 direct_jmp_count,
4749 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4750 direct_jmp2_count,
4751 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4752 cpu_fprintf(f, "\nStatistics:\n");
4753 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4754 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4755 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4756 tcg_dump_info(f, cpu_fprintf);
4759 #define MMUSUFFIX _cmmu
4760 #define GETPC() NULL
4761 #define env cpu_single_env
4762 #define SOFTMMU_CODE_ACCESS
4764 #define SHIFT 0
4765 #include "softmmu_template.h"
4767 #define SHIFT 1
4768 #include "softmmu_template.h"
4770 #define SHIFT 2
4771 #include "softmmu_template.h"
4773 #define SHIFT 3
4774 #include "softmmu_template.h"
4776 #undef env
4778 #endif