hw/9pfs: Use fs driver specific lstat
[qemu/kevin.git] / exec.c
blobd0cbf1582297b403936035e953a01462e0ccd21c
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 //#define DEBUG_TB_INVALIDATE
61 //#define DEBUG_FLUSH
62 //#define DEBUG_TLB
63 //#define DEBUG_UNASSIGNED
65 /* make various TB consistency checks */
66 //#define DEBUG_TB_CHECK
67 //#define DEBUG_TLB_CHECK
69 //#define DEBUG_IOPORT
70 //#define DEBUG_SUBPAGE
72 #if !defined(CONFIG_USER_ONLY)
73 /* TB consistency checks only implemented for usermode emulation. */
74 #undef DEBUG_TB_CHECK
75 #endif
77 #define SMC_BITMAP_USE_THRESHOLD 10
79 static TranslationBlock *tbs;
80 static int code_gen_max_blocks;
81 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
82 static int nb_tbs;
83 /* any access to the tbs or the page table must use this lock */
84 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
86 #if defined(__arm__) || defined(__sparc_v9__)
87 /* The prologue must be reachable with a direct jump. ARM and Sparc64
88 have limited branch ranges (possibly also PPC) so place it in a
89 section close to code segment. */
90 #define code_gen_section \
91 __attribute__((__section__(".gen_code"))) \
92 __attribute__((aligned (32)))
93 #elif defined(_WIN32)
94 /* Maximum alignment for Win32 is 16. */
95 #define code_gen_section \
96 __attribute__((aligned (16)))
97 #else
98 #define code_gen_section \
99 __attribute__((aligned (32)))
100 #endif
102 uint8_t code_gen_prologue[1024] code_gen_section;
103 static uint8_t *code_gen_buffer;
104 static unsigned long code_gen_buffer_size;
105 /* threshold to flush the translated code buffer */
106 static unsigned long code_gen_buffer_max_size;
107 static uint8_t *code_gen_ptr;
109 #if !defined(CONFIG_USER_ONLY)
110 int phys_ram_fd;
111 static int in_migration;
113 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
115 static MemoryRegion *system_memory;
116 static MemoryRegion *system_io;
118 #endif
120 CPUState *first_cpu;
121 /* current CPU in the current thread. It is only valid inside
122 cpu_exec() */
123 CPUState *cpu_single_env;
124 /* 0 = Do not count executed instructions.
125 1 = Precise instruction counting.
126 2 = Adaptive rate instruction counting. */
127 int use_icount = 0;
128 /* Current instruction counter. While executing translated code this may
129 include some instructions that have not yet been executed. */
130 int64_t qemu_icount;
132 typedef struct PageDesc {
133 /* list of TBs intersecting this ram page */
134 TranslationBlock *first_tb;
135 /* in order to optimize self modifying code, we count the number
136 of lookups we do to a given page to use a bitmap */
137 unsigned int code_write_count;
138 uint8_t *code_bitmap;
139 #if defined(CONFIG_USER_ONLY)
140 unsigned long flags;
141 #endif
142 } PageDesc;
144 /* In system mode we want L1_MAP to be based on ram offsets,
145 while in user mode we want it to be based on virtual addresses. */
146 #if !defined(CONFIG_USER_ONLY)
147 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
148 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
149 #else
150 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
151 #endif
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
154 #endif
156 /* Size of the L2 (and L3, etc) page tables. */
157 #define L2_BITS 10
158 #define L2_SIZE (1 << L2_BITS)
160 /* The bits remaining after N lower levels of page tables. */
161 #define P_L1_BITS_REM \
162 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
163 #define V_L1_BITS_REM \
164 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
166 /* Size of the L1 page table. Avoid silly small sizes. */
167 #if P_L1_BITS_REM < 4
168 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
169 #else
170 #define P_L1_BITS P_L1_BITS_REM
171 #endif
173 #if V_L1_BITS_REM < 4
174 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
175 #else
176 #define V_L1_BITS V_L1_BITS_REM
177 #endif
179 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
180 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
182 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
183 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
185 unsigned long qemu_real_host_page_size;
186 unsigned long qemu_host_page_size;
187 unsigned long qemu_host_page_mask;
189 /* This is a multi-level map on the virtual address space.
190 The bottom level has pointers to PageDesc. */
191 static void *l1_map[V_L1_SIZE];
193 #if !defined(CONFIG_USER_ONLY)
194 typedef struct PhysPageDesc {
195 /* offset in host memory of the page + io_index in the low bits */
196 ram_addr_t phys_offset;
197 ram_addr_t region_offset;
198 } PhysPageDesc;
200 /* This is a multi-level map on the physical address space.
201 The bottom level has pointers to PhysPageDesc. */
202 static void *l1_phys_map[P_L1_SIZE];
204 static void io_mem_init(void);
205 static void memory_map_init(void);
207 /* io memory support */
208 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
209 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
210 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
211 static char io_mem_used[IO_MEM_NB_ENTRIES];
212 static int io_mem_watch;
213 #endif
215 /* log support */
216 #ifdef WIN32
217 static const char *logfilename = "qemu.log";
218 #else
219 static const char *logfilename = "/tmp/qemu.log";
220 #endif
221 FILE *logfile;
222 int loglevel;
223 static int log_append = 0;
225 /* statistics */
226 #if !defined(CONFIG_USER_ONLY)
227 static int tlb_flush_count;
228 #endif
229 static int tb_flush_count;
230 static int tb_phys_invalidate_count;
232 #ifdef _WIN32
233 static void map_exec(void *addr, long size)
235 DWORD old_protect;
236 VirtualProtect(addr, size,
237 PAGE_EXECUTE_READWRITE, &old_protect);
240 #else
241 static void map_exec(void *addr, long size)
243 unsigned long start, end, page_size;
245 page_size = getpagesize();
246 start = (unsigned long)addr;
247 start &= ~(page_size - 1);
249 end = (unsigned long)addr + size;
250 end += page_size - 1;
251 end &= ~(page_size - 1);
253 mprotect((void *)start, end - start,
254 PROT_READ | PROT_WRITE | PROT_EXEC);
256 #endif
258 static void page_init(void)
260 /* NOTE: we can always suppose that qemu_host_page_size >=
261 TARGET_PAGE_SIZE */
262 #ifdef _WIN32
264 SYSTEM_INFO system_info;
266 GetSystemInfo(&system_info);
267 qemu_real_host_page_size = system_info.dwPageSize;
269 #else
270 qemu_real_host_page_size = getpagesize();
271 #endif
272 if (qemu_host_page_size == 0)
273 qemu_host_page_size = qemu_real_host_page_size;
274 if (qemu_host_page_size < TARGET_PAGE_SIZE)
275 qemu_host_page_size = TARGET_PAGE_SIZE;
276 qemu_host_page_mask = ~(qemu_host_page_size - 1);
278 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
280 #ifdef HAVE_KINFO_GETVMMAP
281 struct kinfo_vmentry *freep;
282 int i, cnt;
284 freep = kinfo_getvmmap(getpid(), &cnt);
285 if (freep) {
286 mmap_lock();
287 for (i = 0; i < cnt; i++) {
288 unsigned long startaddr, endaddr;
290 startaddr = freep[i].kve_start;
291 endaddr = freep[i].kve_end;
292 if (h2g_valid(startaddr)) {
293 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
295 if (h2g_valid(endaddr)) {
296 endaddr = h2g(endaddr);
297 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
298 } else {
299 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
300 endaddr = ~0ul;
301 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
302 #endif
306 free(freep);
307 mmap_unlock();
309 #else
310 FILE *f;
312 last_brk = (unsigned long)sbrk(0);
314 f = fopen("/compat/linux/proc/self/maps", "r");
315 if (f) {
316 mmap_lock();
318 do {
319 unsigned long startaddr, endaddr;
320 int n;
322 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
324 if (n == 2 && h2g_valid(startaddr)) {
325 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
327 if (h2g_valid(endaddr)) {
328 endaddr = h2g(endaddr);
329 } else {
330 endaddr = ~0ul;
332 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
334 } while (!feof(f));
336 fclose(f);
337 mmap_unlock();
339 #endif
341 #endif
344 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
346 PageDesc *pd;
347 void **lp;
348 int i;
350 #if defined(CONFIG_USER_ONLY)
351 /* We can't use g_malloc because it may recurse into a locked mutex. */
352 # define ALLOC(P, SIZE) \
353 do { \
354 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
355 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
356 } while (0)
357 #else
358 # define ALLOC(P, SIZE) \
359 do { P = g_malloc0(SIZE); } while (0)
360 #endif
362 /* Level 1. Always allocated. */
363 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
365 /* Level 2..N-1. */
366 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
367 void **p = *lp;
369 if (p == NULL) {
370 if (!alloc) {
371 return NULL;
373 ALLOC(p, sizeof(void *) * L2_SIZE);
374 *lp = p;
377 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
380 pd = *lp;
381 if (pd == NULL) {
382 if (!alloc) {
383 return NULL;
385 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
386 *lp = pd;
389 #undef ALLOC
391 return pd + (index & (L2_SIZE - 1));
394 static inline PageDesc *page_find(tb_page_addr_t index)
396 return page_find_alloc(index, 0);
399 #if !defined(CONFIG_USER_ONLY)
400 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
402 PhysPageDesc *pd;
403 void **lp;
404 int i;
406 /* Level 1. Always allocated. */
407 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
409 /* Level 2..N-1. */
410 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
411 void **p = *lp;
412 if (p == NULL) {
413 if (!alloc) {
414 return NULL;
416 *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
418 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
421 pd = *lp;
422 if (pd == NULL) {
423 int i;
425 if (!alloc) {
426 return NULL;
429 *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
431 for (i = 0; i < L2_SIZE; i++) {
432 pd[i].phys_offset = IO_MEM_UNASSIGNED;
433 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
437 return pd + (index & (L2_SIZE - 1));
440 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
442 return phys_page_find_alloc(index, 0);
445 static void tlb_protect_code(ram_addr_t ram_addr);
446 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
447 target_ulong vaddr);
448 #define mmap_lock() do { } while(0)
449 #define mmap_unlock() do { } while(0)
450 #endif
452 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
454 #if defined(CONFIG_USER_ONLY)
455 /* Currently it is not recommended to allocate big chunks of data in
456 user mode. It will change when a dedicated libc will be used */
457 #define USE_STATIC_CODE_GEN_BUFFER
458 #endif
460 #ifdef USE_STATIC_CODE_GEN_BUFFER
461 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
462 __attribute__((aligned (CODE_GEN_ALIGN)));
463 #endif
465 static void code_gen_alloc(unsigned long tb_size)
467 #ifdef USE_STATIC_CODE_GEN_BUFFER
468 code_gen_buffer = static_code_gen_buffer;
469 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
470 map_exec(code_gen_buffer, code_gen_buffer_size);
471 #else
472 code_gen_buffer_size = tb_size;
473 if (code_gen_buffer_size == 0) {
474 #if defined(CONFIG_USER_ONLY)
475 /* in user mode, phys_ram_size is not meaningful */
476 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
477 #else
478 /* XXX: needs adjustments */
479 code_gen_buffer_size = (unsigned long)(ram_size / 4);
480 #endif
482 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
483 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
484 /* The code gen buffer location may have constraints depending on
485 the host cpu and OS */
486 #if defined(__linux__)
488 int flags;
489 void *start = NULL;
491 flags = MAP_PRIVATE | MAP_ANONYMOUS;
492 #if defined(__x86_64__)
493 flags |= MAP_32BIT;
494 /* Cannot map more than that */
495 if (code_gen_buffer_size > (800 * 1024 * 1024))
496 code_gen_buffer_size = (800 * 1024 * 1024);
497 #elif defined(__sparc_v9__)
498 // Map the buffer below 2G, so we can use direct calls and branches
499 flags |= MAP_FIXED;
500 start = (void *) 0x60000000UL;
501 if (code_gen_buffer_size > (512 * 1024 * 1024))
502 code_gen_buffer_size = (512 * 1024 * 1024);
503 #elif defined(__arm__)
504 /* Map the buffer below 32M, so we can use direct calls and branches */
505 flags |= MAP_FIXED;
506 start = (void *) 0x01000000UL;
507 if (code_gen_buffer_size > 16 * 1024 * 1024)
508 code_gen_buffer_size = 16 * 1024 * 1024;
509 #elif defined(__s390x__)
510 /* Map the buffer so that we can use direct calls and branches. */
511 /* We have a +- 4GB range on the branches; leave some slop. */
512 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
513 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
515 start = (void *)0x90000000UL;
516 #endif
517 code_gen_buffer = mmap(start, code_gen_buffer_size,
518 PROT_WRITE | PROT_READ | PROT_EXEC,
519 flags, -1, 0);
520 if (code_gen_buffer == MAP_FAILED) {
521 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
522 exit(1);
525 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
526 || defined(__DragonFly__) || defined(__OpenBSD__) \
527 || defined(__NetBSD__)
529 int flags;
530 void *addr = NULL;
531 flags = MAP_PRIVATE | MAP_ANONYMOUS;
532 #if defined(__x86_64__)
533 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
534 * 0x40000000 is free */
535 flags |= MAP_FIXED;
536 addr = (void *)0x40000000;
537 /* Cannot map more than that */
538 if (code_gen_buffer_size > (800 * 1024 * 1024))
539 code_gen_buffer_size = (800 * 1024 * 1024);
540 #elif defined(__sparc_v9__)
541 // Map the buffer below 2G, so we can use direct calls and branches
542 flags |= MAP_FIXED;
543 addr = (void *) 0x60000000UL;
544 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
545 code_gen_buffer_size = (512 * 1024 * 1024);
547 #endif
548 code_gen_buffer = mmap(addr, code_gen_buffer_size,
549 PROT_WRITE | PROT_READ | PROT_EXEC,
550 flags, -1, 0);
551 if (code_gen_buffer == MAP_FAILED) {
552 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
553 exit(1);
556 #else
557 code_gen_buffer = g_malloc(code_gen_buffer_size);
558 map_exec(code_gen_buffer, code_gen_buffer_size);
559 #endif
560 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
561 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
562 code_gen_buffer_max_size = code_gen_buffer_size -
563 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
564 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
565 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
568 /* Must be called before using the QEMU cpus. 'tb_size' is the size
569 (in bytes) allocated to the translation buffer. Zero means default
570 size. */
571 void tcg_exec_init(unsigned long tb_size)
573 cpu_gen_init();
574 code_gen_alloc(tb_size);
575 code_gen_ptr = code_gen_buffer;
576 page_init();
577 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
578 /* There's no guest base to take into account, so go ahead and
579 initialize the prologue now. */
580 tcg_prologue_init(&tcg_ctx);
581 #endif
584 bool tcg_enabled(void)
586 return code_gen_buffer != NULL;
589 void cpu_exec_init_all(void)
591 #if !defined(CONFIG_USER_ONLY)
592 memory_map_init();
593 io_mem_init();
594 #endif
597 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
599 static int cpu_common_post_load(void *opaque, int version_id)
601 CPUState *env = opaque;
603 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
604 version_id is increased. */
605 env->interrupt_request &= ~0x01;
606 tlb_flush(env, 1);
608 return 0;
611 static const VMStateDescription vmstate_cpu_common = {
612 .name = "cpu_common",
613 .version_id = 1,
614 .minimum_version_id = 1,
615 .minimum_version_id_old = 1,
616 .post_load = cpu_common_post_load,
617 .fields = (VMStateField []) {
618 VMSTATE_UINT32(halted, CPUState),
619 VMSTATE_UINT32(interrupt_request, CPUState),
620 VMSTATE_END_OF_LIST()
623 #endif
625 CPUState *qemu_get_cpu(int cpu)
627 CPUState *env = first_cpu;
629 while (env) {
630 if (env->cpu_index == cpu)
631 break;
632 env = env->next_cpu;
635 return env;
638 void cpu_exec_init(CPUState *env)
640 CPUState **penv;
641 int cpu_index;
643 #if defined(CONFIG_USER_ONLY)
644 cpu_list_lock();
645 #endif
646 env->next_cpu = NULL;
647 penv = &first_cpu;
648 cpu_index = 0;
649 while (*penv != NULL) {
650 penv = &(*penv)->next_cpu;
651 cpu_index++;
653 env->cpu_index = cpu_index;
654 env->numa_node = 0;
655 QTAILQ_INIT(&env->breakpoints);
656 QTAILQ_INIT(&env->watchpoints);
657 #ifndef CONFIG_USER_ONLY
658 env->thread_id = qemu_get_thread_id();
659 #endif
660 *penv = env;
661 #if defined(CONFIG_USER_ONLY)
662 cpu_list_unlock();
663 #endif
664 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
665 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
666 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
667 cpu_save, cpu_load, env);
668 #endif
671 /* Allocate a new translation block. Flush the translation buffer if
672 too many translation blocks or too much generated code. */
673 static TranslationBlock *tb_alloc(target_ulong pc)
675 TranslationBlock *tb;
677 if (nb_tbs >= code_gen_max_blocks ||
678 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
679 return NULL;
680 tb = &tbs[nb_tbs++];
681 tb->pc = pc;
682 tb->cflags = 0;
683 return tb;
686 void tb_free(TranslationBlock *tb)
688 /* In practice this is mostly used for single use temporary TB
689 Ignore the hard cases and just back up if this TB happens to
690 be the last one generated. */
691 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
692 code_gen_ptr = tb->tc_ptr;
693 nb_tbs--;
697 static inline void invalidate_page_bitmap(PageDesc *p)
699 if (p->code_bitmap) {
700 g_free(p->code_bitmap);
701 p->code_bitmap = NULL;
703 p->code_write_count = 0;
706 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
708 static void page_flush_tb_1 (int level, void **lp)
710 int i;
712 if (*lp == NULL) {
713 return;
715 if (level == 0) {
716 PageDesc *pd = *lp;
717 for (i = 0; i < L2_SIZE; ++i) {
718 pd[i].first_tb = NULL;
719 invalidate_page_bitmap(pd + i);
721 } else {
722 void **pp = *lp;
723 for (i = 0; i < L2_SIZE; ++i) {
724 page_flush_tb_1 (level - 1, pp + i);
729 static void page_flush_tb(void)
731 int i;
732 for (i = 0; i < V_L1_SIZE; i++) {
733 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
737 /* flush all the translation blocks */
738 /* XXX: tb_flush is currently not thread safe */
739 void tb_flush(CPUState *env1)
741 CPUState *env;
742 #if defined(DEBUG_FLUSH)
743 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
744 (unsigned long)(code_gen_ptr - code_gen_buffer),
745 nb_tbs, nb_tbs > 0 ?
746 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
747 #endif
748 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
749 cpu_abort(env1, "Internal error: code buffer overflow\n");
751 nb_tbs = 0;
753 for(env = first_cpu; env != NULL; env = env->next_cpu) {
754 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
757 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
758 page_flush_tb();
760 code_gen_ptr = code_gen_buffer;
761 /* XXX: flush processor icache at this point if cache flush is
762 expensive */
763 tb_flush_count++;
766 #ifdef DEBUG_TB_CHECK
768 static void tb_invalidate_check(target_ulong address)
770 TranslationBlock *tb;
771 int i;
772 address &= TARGET_PAGE_MASK;
773 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
774 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
775 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
776 address >= tb->pc + tb->size)) {
777 printf("ERROR invalidate: address=" TARGET_FMT_lx
778 " PC=%08lx size=%04x\n",
779 address, (long)tb->pc, tb->size);
785 /* verify that all the pages have correct rights for code */
786 static void tb_page_check(void)
788 TranslationBlock *tb;
789 int i, flags1, flags2;
791 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
792 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
793 flags1 = page_get_flags(tb->pc);
794 flags2 = page_get_flags(tb->pc + tb->size - 1);
795 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
796 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
797 (long)tb->pc, tb->size, flags1, flags2);
803 #endif
805 /* invalidate one TB */
806 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
807 int next_offset)
809 TranslationBlock *tb1;
810 for(;;) {
811 tb1 = *ptb;
812 if (tb1 == tb) {
813 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
814 break;
816 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
820 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
822 TranslationBlock *tb1;
823 unsigned int n1;
825 for(;;) {
826 tb1 = *ptb;
827 n1 = (long)tb1 & 3;
828 tb1 = (TranslationBlock *)((long)tb1 & ~3);
829 if (tb1 == tb) {
830 *ptb = tb1->page_next[n1];
831 break;
833 ptb = &tb1->page_next[n1];
837 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
839 TranslationBlock *tb1, **ptb;
840 unsigned int n1;
842 ptb = &tb->jmp_next[n];
843 tb1 = *ptb;
844 if (tb1) {
845 /* find tb(n) in circular list */
846 for(;;) {
847 tb1 = *ptb;
848 n1 = (long)tb1 & 3;
849 tb1 = (TranslationBlock *)((long)tb1 & ~3);
850 if (n1 == n && tb1 == tb)
851 break;
852 if (n1 == 2) {
853 ptb = &tb1->jmp_first;
854 } else {
855 ptb = &tb1->jmp_next[n1];
858 /* now we can suppress tb(n) from the list */
859 *ptb = tb->jmp_next[n];
861 tb->jmp_next[n] = NULL;
865 /* reset the jump entry 'n' of a TB so that it is not chained to
866 another TB */
867 static inline void tb_reset_jump(TranslationBlock *tb, int n)
869 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
872 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
874 CPUState *env;
875 PageDesc *p;
876 unsigned int h, n1;
877 tb_page_addr_t phys_pc;
878 TranslationBlock *tb1, *tb2;
880 /* remove the TB from the hash list */
881 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
882 h = tb_phys_hash_func(phys_pc);
883 tb_remove(&tb_phys_hash[h], tb,
884 offsetof(TranslationBlock, phys_hash_next));
886 /* remove the TB from the page list */
887 if (tb->page_addr[0] != page_addr) {
888 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
889 tb_page_remove(&p->first_tb, tb);
890 invalidate_page_bitmap(p);
892 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
893 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
894 tb_page_remove(&p->first_tb, tb);
895 invalidate_page_bitmap(p);
898 tb_invalidated_flag = 1;
900 /* remove the TB from the hash list */
901 h = tb_jmp_cache_hash_func(tb->pc);
902 for(env = first_cpu; env != NULL; env = env->next_cpu) {
903 if (env->tb_jmp_cache[h] == tb)
904 env->tb_jmp_cache[h] = NULL;
907 /* suppress this TB from the two jump lists */
908 tb_jmp_remove(tb, 0);
909 tb_jmp_remove(tb, 1);
911 /* suppress any remaining jumps to this TB */
912 tb1 = tb->jmp_first;
913 for(;;) {
914 n1 = (long)tb1 & 3;
915 if (n1 == 2)
916 break;
917 tb1 = (TranslationBlock *)((long)tb1 & ~3);
918 tb2 = tb1->jmp_next[n1];
919 tb_reset_jump(tb1, n1);
920 tb1->jmp_next[n1] = NULL;
921 tb1 = tb2;
923 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
925 tb_phys_invalidate_count++;
928 static inline void set_bits(uint8_t *tab, int start, int len)
930 int end, mask, end1;
932 end = start + len;
933 tab += start >> 3;
934 mask = 0xff << (start & 7);
935 if ((start & ~7) == (end & ~7)) {
936 if (start < end) {
937 mask &= ~(0xff << (end & 7));
938 *tab |= mask;
940 } else {
941 *tab++ |= mask;
942 start = (start + 8) & ~7;
943 end1 = end & ~7;
944 while (start < end1) {
945 *tab++ = 0xff;
946 start += 8;
948 if (start < end) {
949 mask = ~(0xff << (end & 7));
950 *tab |= mask;
955 static void build_page_bitmap(PageDesc *p)
957 int n, tb_start, tb_end;
958 TranslationBlock *tb;
960 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
962 tb = p->first_tb;
963 while (tb != NULL) {
964 n = (long)tb & 3;
965 tb = (TranslationBlock *)((long)tb & ~3);
966 /* NOTE: this is subtle as a TB may span two physical pages */
967 if (n == 0) {
968 /* NOTE: tb_end may be after the end of the page, but
969 it is not a problem */
970 tb_start = tb->pc & ~TARGET_PAGE_MASK;
971 tb_end = tb_start + tb->size;
972 if (tb_end > TARGET_PAGE_SIZE)
973 tb_end = TARGET_PAGE_SIZE;
974 } else {
975 tb_start = 0;
976 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
978 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
979 tb = tb->page_next[n];
983 TranslationBlock *tb_gen_code(CPUState *env,
984 target_ulong pc, target_ulong cs_base,
985 int flags, int cflags)
987 TranslationBlock *tb;
988 uint8_t *tc_ptr;
989 tb_page_addr_t phys_pc, phys_page2;
990 target_ulong virt_page2;
991 int code_gen_size;
993 phys_pc = get_page_addr_code(env, pc);
994 tb = tb_alloc(pc);
995 if (!tb) {
996 /* flush must be done */
997 tb_flush(env);
998 /* cannot fail at this point */
999 tb = tb_alloc(pc);
1000 /* Don't forget to invalidate previous TB info. */
1001 tb_invalidated_flag = 1;
1003 tc_ptr = code_gen_ptr;
1004 tb->tc_ptr = tc_ptr;
1005 tb->cs_base = cs_base;
1006 tb->flags = flags;
1007 tb->cflags = cflags;
1008 cpu_gen_code(env, tb, &code_gen_size);
1009 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1011 /* check next page if needed */
1012 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1013 phys_page2 = -1;
1014 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1015 phys_page2 = get_page_addr_code(env, virt_page2);
1017 tb_link_page(tb, phys_pc, phys_page2);
1018 return tb;
1021 /* invalidate all TBs which intersect with the target physical page
1022 starting in range [start;end[. NOTE: start and end must refer to
1023 the same physical page. 'is_cpu_write_access' should be true if called
1024 from a real cpu write access: the virtual CPU will exit the current
1025 TB if code is modified inside this TB. */
1026 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1027 int is_cpu_write_access)
1029 TranslationBlock *tb, *tb_next, *saved_tb;
1030 CPUState *env = cpu_single_env;
1031 tb_page_addr_t tb_start, tb_end;
1032 PageDesc *p;
1033 int n;
1034 #ifdef TARGET_HAS_PRECISE_SMC
1035 int current_tb_not_found = is_cpu_write_access;
1036 TranslationBlock *current_tb = NULL;
1037 int current_tb_modified = 0;
1038 target_ulong current_pc = 0;
1039 target_ulong current_cs_base = 0;
1040 int current_flags = 0;
1041 #endif /* TARGET_HAS_PRECISE_SMC */
1043 p = page_find(start >> TARGET_PAGE_BITS);
1044 if (!p)
1045 return;
1046 if (!p->code_bitmap &&
1047 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1048 is_cpu_write_access) {
1049 /* build code bitmap */
1050 build_page_bitmap(p);
1053 /* we remove all the TBs in the range [start, end[ */
1054 /* XXX: see if in some cases it could be faster to invalidate all the code */
1055 tb = p->first_tb;
1056 while (tb != NULL) {
1057 n = (long)tb & 3;
1058 tb = (TranslationBlock *)((long)tb & ~3);
1059 tb_next = tb->page_next[n];
1060 /* NOTE: this is subtle as a TB may span two physical pages */
1061 if (n == 0) {
1062 /* NOTE: tb_end may be after the end of the page, but
1063 it is not a problem */
1064 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1065 tb_end = tb_start + tb->size;
1066 } else {
1067 tb_start = tb->page_addr[1];
1068 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1070 if (!(tb_end <= start || tb_start >= end)) {
1071 #ifdef TARGET_HAS_PRECISE_SMC
1072 if (current_tb_not_found) {
1073 current_tb_not_found = 0;
1074 current_tb = NULL;
1075 if (env->mem_io_pc) {
1076 /* now we have a real cpu fault */
1077 current_tb = tb_find_pc(env->mem_io_pc);
1080 if (current_tb == tb &&
1081 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1082 /* If we are modifying the current TB, we must stop
1083 its execution. We could be more precise by checking
1084 that the modification is after the current PC, but it
1085 would require a specialized function to partially
1086 restore the CPU state */
1088 current_tb_modified = 1;
1089 cpu_restore_state(current_tb, env, env->mem_io_pc);
1090 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1091 &current_flags);
1093 #endif /* TARGET_HAS_PRECISE_SMC */
1094 /* we need to do that to handle the case where a signal
1095 occurs while doing tb_phys_invalidate() */
1096 saved_tb = NULL;
1097 if (env) {
1098 saved_tb = env->current_tb;
1099 env->current_tb = NULL;
1101 tb_phys_invalidate(tb, -1);
1102 if (env) {
1103 env->current_tb = saved_tb;
1104 if (env->interrupt_request && env->current_tb)
1105 cpu_interrupt(env, env->interrupt_request);
1108 tb = tb_next;
1110 #if !defined(CONFIG_USER_ONLY)
1111 /* if no code remaining, no need to continue to use slow writes */
1112 if (!p->first_tb) {
1113 invalidate_page_bitmap(p);
1114 if (is_cpu_write_access) {
1115 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1118 #endif
1119 #ifdef TARGET_HAS_PRECISE_SMC
1120 if (current_tb_modified) {
1121 /* we generate a block containing just the instruction
1122 modifying the memory. It will ensure that it cannot modify
1123 itself */
1124 env->current_tb = NULL;
1125 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1126 cpu_resume_from_signal(env, NULL);
1128 #endif
1131 /* len must be <= 8 and start must be a multiple of len */
1132 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1134 PageDesc *p;
1135 int offset, b;
1136 #if 0
1137 if (1) {
1138 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1139 cpu_single_env->mem_io_vaddr, len,
1140 cpu_single_env->eip,
1141 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1143 #endif
1144 p = page_find(start >> TARGET_PAGE_BITS);
1145 if (!p)
1146 return;
1147 if (p->code_bitmap) {
1148 offset = start & ~TARGET_PAGE_MASK;
1149 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1150 if (b & ((1 << len) - 1))
1151 goto do_invalidate;
1152 } else {
1153 do_invalidate:
1154 tb_invalidate_phys_page_range(start, start + len, 1);
1158 #if !defined(CONFIG_SOFTMMU)
1159 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1160 unsigned long pc, void *puc)
1162 TranslationBlock *tb;
1163 PageDesc *p;
1164 int n;
1165 #ifdef TARGET_HAS_PRECISE_SMC
1166 TranslationBlock *current_tb = NULL;
1167 CPUState *env = cpu_single_env;
1168 int current_tb_modified = 0;
1169 target_ulong current_pc = 0;
1170 target_ulong current_cs_base = 0;
1171 int current_flags = 0;
1172 #endif
1174 addr &= TARGET_PAGE_MASK;
1175 p = page_find(addr >> TARGET_PAGE_BITS);
1176 if (!p)
1177 return;
1178 tb = p->first_tb;
1179 #ifdef TARGET_HAS_PRECISE_SMC
1180 if (tb && pc != 0) {
1181 current_tb = tb_find_pc(pc);
1183 #endif
1184 while (tb != NULL) {
1185 n = (long)tb & 3;
1186 tb = (TranslationBlock *)((long)tb & ~3);
1187 #ifdef TARGET_HAS_PRECISE_SMC
1188 if (current_tb == tb &&
1189 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1190 /* If we are modifying the current TB, we must stop
1191 its execution. We could be more precise by checking
1192 that the modification is after the current PC, but it
1193 would require a specialized function to partially
1194 restore the CPU state */
1196 current_tb_modified = 1;
1197 cpu_restore_state(current_tb, env, pc);
1198 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1199 &current_flags);
1201 #endif /* TARGET_HAS_PRECISE_SMC */
1202 tb_phys_invalidate(tb, addr);
1203 tb = tb->page_next[n];
1205 p->first_tb = NULL;
1206 #ifdef TARGET_HAS_PRECISE_SMC
1207 if (current_tb_modified) {
1208 /* we generate a block containing just the instruction
1209 modifying the memory. It will ensure that it cannot modify
1210 itself */
1211 env->current_tb = NULL;
1212 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1213 cpu_resume_from_signal(env, puc);
1215 #endif
1217 #endif
1219 /* add the tb in the target page and protect it if necessary */
1220 static inline void tb_alloc_page(TranslationBlock *tb,
1221 unsigned int n, tb_page_addr_t page_addr)
1223 PageDesc *p;
1224 #ifndef CONFIG_USER_ONLY
1225 bool page_already_protected;
1226 #endif
1228 tb->page_addr[n] = page_addr;
1229 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1230 tb->page_next[n] = p->first_tb;
1231 #ifndef CONFIG_USER_ONLY
1232 page_already_protected = p->first_tb != NULL;
1233 #endif
1234 p->first_tb = (TranslationBlock *)((long)tb | n);
1235 invalidate_page_bitmap(p);
1237 #if defined(TARGET_HAS_SMC) || 1
1239 #if defined(CONFIG_USER_ONLY)
1240 if (p->flags & PAGE_WRITE) {
1241 target_ulong addr;
1242 PageDesc *p2;
1243 int prot;
1245 /* force the host page as non writable (writes will have a
1246 page fault + mprotect overhead) */
1247 page_addr &= qemu_host_page_mask;
1248 prot = 0;
1249 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1250 addr += TARGET_PAGE_SIZE) {
1252 p2 = page_find (addr >> TARGET_PAGE_BITS);
1253 if (!p2)
1254 continue;
1255 prot |= p2->flags;
1256 p2->flags &= ~PAGE_WRITE;
1258 mprotect(g2h(page_addr), qemu_host_page_size,
1259 (prot & PAGE_BITS) & ~PAGE_WRITE);
1260 #ifdef DEBUG_TB_INVALIDATE
1261 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1262 page_addr);
1263 #endif
1265 #else
1266 /* if some code is already present, then the pages are already
1267 protected. So we handle the case where only the first TB is
1268 allocated in a physical page */
1269 if (!page_already_protected) {
1270 tlb_protect_code(page_addr);
1272 #endif
1274 #endif /* TARGET_HAS_SMC */
1277 /* add a new TB and link it to the physical page tables. phys_page2 is
1278 (-1) to indicate that only one page contains the TB. */
1279 void tb_link_page(TranslationBlock *tb,
1280 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1282 unsigned int h;
1283 TranslationBlock **ptb;
1285 /* Grab the mmap lock to stop another thread invalidating this TB
1286 before we are done. */
1287 mmap_lock();
1288 /* add in the physical hash table */
1289 h = tb_phys_hash_func(phys_pc);
1290 ptb = &tb_phys_hash[h];
1291 tb->phys_hash_next = *ptb;
1292 *ptb = tb;
1294 /* add in the page list */
1295 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1296 if (phys_page2 != -1)
1297 tb_alloc_page(tb, 1, phys_page2);
1298 else
1299 tb->page_addr[1] = -1;
1301 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1302 tb->jmp_next[0] = NULL;
1303 tb->jmp_next[1] = NULL;
1305 /* init original jump addresses */
1306 if (tb->tb_next_offset[0] != 0xffff)
1307 tb_reset_jump(tb, 0);
1308 if (tb->tb_next_offset[1] != 0xffff)
1309 tb_reset_jump(tb, 1);
1311 #ifdef DEBUG_TB_CHECK
1312 tb_page_check();
1313 #endif
1314 mmap_unlock();
1317 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1318 tb[1].tc_ptr. Return NULL if not found */
1319 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1321 int m_min, m_max, m;
1322 unsigned long v;
1323 TranslationBlock *tb;
1325 if (nb_tbs <= 0)
1326 return NULL;
1327 if (tc_ptr < (unsigned long)code_gen_buffer ||
1328 tc_ptr >= (unsigned long)code_gen_ptr)
1329 return NULL;
1330 /* binary search (cf Knuth) */
1331 m_min = 0;
1332 m_max = nb_tbs - 1;
1333 while (m_min <= m_max) {
1334 m = (m_min + m_max) >> 1;
1335 tb = &tbs[m];
1336 v = (unsigned long)tb->tc_ptr;
1337 if (v == tc_ptr)
1338 return tb;
1339 else if (tc_ptr < v) {
1340 m_max = m - 1;
1341 } else {
1342 m_min = m + 1;
1345 return &tbs[m_max];
1348 static void tb_reset_jump_recursive(TranslationBlock *tb);
1350 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1352 TranslationBlock *tb1, *tb_next, **ptb;
1353 unsigned int n1;
1355 tb1 = tb->jmp_next[n];
1356 if (tb1 != NULL) {
1357 /* find head of list */
1358 for(;;) {
1359 n1 = (long)tb1 & 3;
1360 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1361 if (n1 == 2)
1362 break;
1363 tb1 = tb1->jmp_next[n1];
1365 /* we are now sure now that tb jumps to tb1 */
1366 tb_next = tb1;
1368 /* remove tb from the jmp_first list */
1369 ptb = &tb_next->jmp_first;
1370 for(;;) {
1371 tb1 = *ptb;
1372 n1 = (long)tb1 & 3;
1373 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1374 if (n1 == n && tb1 == tb)
1375 break;
1376 ptb = &tb1->jmp_next[n1];
1378 *ptb = tb->jmp_next[n];
1379 tb->jmp_next[n] = NULL;
1381 /* suppress the jump to next tb in generated code */
1382 tb_reset_jump(tb, n);
1384 /* suppress jumps in the tb on which we could have jumped */
1385 tb_reset_jump_recursive(tb_next);
1389 static void tb_reset_jump_recursive(TranslationBlock *tb)
1391 tb_reset_jump_recursive2(tb, 0);
1392 tb_reset_jump_recursive2(tb, 1);
1395 #if defined(TARGET_HAS_ICE)
1396 #if defined(CONFIG_USER_ONLY)
1397 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1399 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1401 #else
1402 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1404 target_phys_addr_t addr;
1405 target_ulong pd;
1406 ram_addr_t ram_addr;
1407 PhysPageDesc *p;
1409 addr = cpu_get_phys_page_debug(env, pc);
1410 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1411 if (!p) {
1412 pd = IO_MEM_UNASSIGNED;
1413 } else {
1414 pd = p->phys_offset;
1416 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1417 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1419 #endif
1420 #endif /* TARGET_HAS_ICE */
1422 #if defined(CONFIG_USER_ONLY)
1423 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1428 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1429 int flags, CPUWatchpoint **watchpoint)
1431 return -ENOSYS;
1433 #else
1434 /* Add a watchpoint. */
1435 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1436 int flags, CPUWatchpoint **watchpoint)
1438 target_ulong len_mask = ~(len - 1);
1439 CPUWatchpoint *wp;
1441 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1442 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1443 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1444 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1445 return -EINVAL;
1447 wp = g_malloc(sizeof(*wp));
1449 wp->vaddr = addr;
1450 wp->len_mask = len_mask;
1451 wp->flags = flags;
1453 /* keep all GDB-injected watchpoints in front */
1454 if (flags & BP_GDB)
1455 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1456 else
1457 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1459 tlb_flush_page(env, addr);
1461 if (watchpoint)
1462 *watchpoint = wp;
1463 return 0;
1466 /* Remove a specific watchpoint. */
1467 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1468 int flags)
1470 target_ulong len_mask = ~(len - 1);
1471 CPUWatchpoint *wp;
1473 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1474 if (addr == wp->vaddr && len_mask == wp->len_mask
1475 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1476 cpu_watchpoint_remove_by_ref(env, wp);
1477 return 0;
1480 return -ENOENT;
1483 /* Remove a specific watchpoint by reference. */
1484 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1486 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1488 tlb_flush_page(env, watchpoint->vaddr);
1490 g_free(watchpoint);
1493 /* Remove all matching watchpoints. */
1494 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1496 CPUWatchpoint *wp, *next;
1498 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1499 if (wp->flags & mask)
1500 cpu_watchpoint_remove_by_ref(env, wp);
1503 #endif
1505 /* Add a breakpoint. */
1506 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1507 CPUBreakpoint **breakpoint)
1509 #if defined(TARGET_HAS_ICE)
1510 CPUBreakpoint *bp;
1512 bp = g_malloc(sizeof(*bp));
1514 bp->pc = pc;
1515 bp->flags = flags;
1517 /* keep all GDB-injected breakpoints in front */
1518 if (flags & BP_GDB)
1519 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1520 else
1521 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1523 breakpoint_invalidate(env, pc);
1525 if (breakpoint)
1526 *breakpoint = bp;
1527 return 0;
1528 #else
1529 return -ENOSYS;
1530 #endif
1533 /* Remove a specific breakpoint. */
1534 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1536 #if defined(TARGET_HAS_ICE)
1537 CPUBreakpoint *bp;
1539 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1540 if (bp->pc == pc && bp->flags == flags) {
1541 cpu_breakpoint_remove_by_ref(env, bp);
1542 return 0;
1545 return -ENOENT;
1546 #else
1547 return -ENOSYS;
1548 #endif
1551 /* Remove a specific breakpoint by reference. */
1552 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1554 #if defined(TARGET_HAS_ICE)
1555 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1557 breakpoint_invalidate(env, breakpoint->pc);
1559 g_free(breakpoint);
1560 #endif
1563 /* Remove all matching breakpoints. */
1564 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1566 #if defined(TARGET_HAS_ICE)
1567 CPUBreakpoint *bp, *next;
1569 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1570 if (bp->flags & mask)
1571 cpu_breakpoint_remove_by_ref(env, bp);
1573 #endif
1576 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1577 CPU loop after each instruction */
1578 void cpu_single_step(CPUState *env, int enabled)
1580 #if defined(TARGET_HAS_ICE)
1581 if (env->singlestep_enabled != enabled) {
1582 env->singlestep_enabled = enabled;
1583 if (kvm_enabled())
1584 kvm_update_guest_debug(env, 0);
1585 else {
1586 /* must flush all the translated code to avoid inconsistencies */
1587 /* XXX: only flush what is necessary */
1588 tb_flush(env);
1591 #endif
1594 /* enable or disable low levels log */
1595 void cpu_set_log(int log_flags)
1597 loglevel = log_flags;
1598 if (loglevel && !logfile) {
1599 logfile = fopen(logfilename, log_append ? "a" : "w");
1600 if (!logfile) {
1601 perror(logfilename);
1602 _exit(1);
1604 #if !defined(CONFIG_SOFTMMU)
1605 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1607 static char logfile_buf[4096];
1608 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1610 #elif !defined(_WIN32)
1611 /* Win32 doesn't support line-buffering and requires size >= 2 */
1612 setvbuf(logfile, NULL, _IOLBF, 0);
1613 #endif
1614 log_append = 1;
1616 if (!loglevel && logfile) {
1617 fclose(logfile);
1618 logfile = NULL;
1622 void cpu_set_log_filename(const char *filename)
1624 logfilename = strdup(filename);
1625 if (logfile) {
1626 fclose(logfile);
1627 logfile = NULL;
1629 cpu_set_log(loglevel);
1632 static void cpu_unlink_tb(CPUState *env)
1634 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1635 problem and hope the cpu will stop of its own accord. For userspace
1636 emulation this often isn't actually as bad as it sounds. Often
1637 signals are used primarily to interrupt blocking syscalls. */
1638 TranslationBlock *tb;
1639 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1641 spin_lock(&interrupt_lock);
1642 tb = env->current_tb;
1643 /* if the cpu is currently executing code, we must unlink it and
1644 all the potentially executing TB */
1645 if (tb) {
1646 env->current_tb = NULL;
1647 tb_reset_jump_recursive(tb);
1649 spin_unlock(&interrupt_lock);
1652 #ifndef CONFIG_USER_ONLY
1653 /* mask must never be zero, except for A20 change call */
1654 static void tcg_handle_interrupt(CPUState *env, int mask)
1656 int old_mask;
1658 old_mask = env->interrupt_request;
1659 env->interrupt_request |= mask;
1662 * If called from iothread context, wake the target cpu in
1663 * case its halted.
1665 if (!qemu_cpu_is_self(env)) {
1666 qemu_cpu_kick(env);
1667 return;
1670 if (use_icount) {
1671 env->icount_decr.u16.high = 0xffff;
1672 if (!can_do_io(env)
1673 && (mask & ~old_mask) != 0) {
1674 cpu_abort(env, "Raised interrupt while not in I/O function");
1676 } else {
1677 cpu_unlink_tb(env);
1681 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1683 #else /* CONFIG_USER_ONLY */
1685 void cpu_interrupt(CPUState *env, int mask)
1687 env->interrupt_request |= mask;
1688 cpu_unlink_tb(env);
1690 #endif /* CONFIG_USER_ONLY */
1692 void cpu_reset_interrupt(CPUState *env, int mask)
1694 env->interrupt_request &= ~mask;
1697 void cpu_exit(CPUState *env)
1699 env->exit_request = 1;
1700 cpu_unlink_tb(env);
1703 const CPULogItem cpu_log_items[] = {
1704 { CPU_LOG_TB_OUT_ASM, "out_asm",
1705 "show generated host assembly code for each compiled TB" },
1706 { CPU_LOG_TB_IN_ASM, "in_asm",
1707 "show target assembly code for each compiled TB" },
1708 { CPU_LOG_TB_OP, "op",
1709 "show micro ops for each compiled TB" },
1710 { CPU_LOG_TB_OP_OPT, "op_opt",
1711 "show micro ops "
1712 #ifdef TARGET_I386
1713 "before eflags optimization and "
1714 #endif
1715 "after liveness analysis" },
1716 { CPU_LOG_INT, "int",
1717 "show interrupts/exceptions in short format" },
1718 { CPU_LOG_EXEC, "exec",
1719 "show trace before each executed TB (lots of logs)" },
1720 { CPU_LOG_TB_CPU, "cpu",
1721 "show CPU state before block translation" },
1722 #ifdef TARGET_I386
1723 { CPU_LOG_PCALL, "pcall",
1724 "show protected mode far calls/returns/exceptions" },
1725 { CPU_LOG_RESET, "cpu_reset",
1726 "show CPU state before CPU resets" },
1727 #endif
1728 #ifdef DEBUG_IOPORT
1729 { CPU_LOG_IOPORT, "ioport",
1730 "show all i/o ports accesses" },
1731 #endif
1732 { 0, NULL, NULL },
1735 #ifndef CONFIG_USER_ONLY
1736 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1737 = QLIST_HEAD_INITIALIZER(memory_client_list);
1739 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1740 ram_addr_t size,
1741 ram_addr_t phys_offset,
1742 bool log_dirty)
1744 CPUPhysMemoryClient *client;
1745 QLIST_FOREACH(client, &memory_client_list, list) {
1746 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1750 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1751 target_phys_addr_t end)
1753 CPUPhysMemoryClient *client;
1754 QLIST_FOREACH(client, &memory_client_list, list) {
1755 int r = client->sync_dirty_bitmap(client, start, end);
1756 if (r < 0)
1757 return r;
1759 return 0;
1762 static int cpu_notify_migration_log(int enable)
1764 CPUPhysMemoryClient *client;
1765 QLIST_FOREACH(client, &memory_client_list, list) {
1766 int r = client->migration_log(client, enable);
1767 if (r < 0)
1768 return r;
1770 return 0;
1773 struct last_map {
1774 target_phys_addr_t start_addr;
1775 ram_addr_t size;
1776 ram_addr_t phys_offset;
1779 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1780 * address. Each intermediate table provides the next L2_BITs of guest
1781 * physical address space. The number of levels vary based on host and
1782 * guest configuration, making it efficient to build the final guest
1783 * physical address by seeding the L1 offset and shifting and adding in
1784 * each L2 offset as we recurse through them. */
1785 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1786 void **lp, target_phys_addr_t addr,
1787 struct last_map *map)
1789 int i;
1791 if (*lp == NULL) {
1792 return;
1794 if (level == 0) {
1795 PhysPageDesc *pd = *lp;
1796 addr <<= L2_BITS + TARGET_PAGE_BITS;
1797 for (i = 0; i < L2_SIZE; ++i) {
1798 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1799 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1801 if (map->size &&
1802 start_addr == map->start_addr + map->size &&
1803 pd[i].phys_offset == map->phys_offset + map->size) {
1805 map->size += TARGET_PAGE_SIZE;
1806 continue;
1807 } else if (map->size) {
1808 client->set_memory(client, map->start_addr,
1809 map->size, map->phys_offset, false);
1812 map->start_addr = start_addr;
1813 map->size = TARGET_PAGE_SIZE;
1814 map->phys_offset = pd[i].phys_offset;
1817 } else {
1818 void **pp = *lp;
1819 for (i = 0; i < L2_SIZE; ++i) {
1820 phys_page_for_each_1(client, level - 1, pp + i,
1821 (addr << L2_BITS) | i, map);
1826 static void phys_page_for_each(CPUPhysMemoryClient *client)
1828 int i;
1829 struct last_map map = { };
1831 for (i = 0; i < P_L1_SIZE; ++i) {
1832 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1833 l1_phys_map + i, i, &map);
1835 if (map.size) {
1836 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1837 false);
1841 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1843 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1844 phys_page_for_each(client);
1847 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1849 QLIST_REMOVE(client, list);
1851 #endif
1853 static int cmp1(const char *s1, int n, const char *s2)
1855 if (strlen(s2) != n)
1856 return 0;
1857 return memcmp(s1, s2, n) == 0;
1860 /* takes a comma separated list of log masks. Return 0 if error. */
1861 int cpu_str_to_log_mask(const char *str)
1863 const CPULogItem *item;
1864 int mask;
1865 const char *p, *p1;
1867 p = str;
1868 mask = 0;
1869 for(;;) {
1870 p1 = strchr(p, ',');
1871 if (!p1)
1872 p1 = p + strlen(p);
1873 if(cmp1(p,p1-p,"all")) {
1874 for(item = cpu_log_items; item->mask != 0; item++) {
1875 mask |= item->mask;
1877 } else {
1878 for(item = cpu_log_items; item->mask != 0; item++) {
1879 if (cmp1(p, p1 - p, item->name))
1880 goto found;
1882 return 0;
1884 found:
1885 mask |= item->mask;
1886 if (*p1 != ',')
1887 break;
1888 p = p1 + 1;
1890 return mask;
1893 void cpu_abort(CPUState *env, const char *fmt, ...)
1895 va_list ap;
1896 va_list ap2;
1898 va_start(ap, fmt);
1899 va_copy(ap2, ap);
1900 fprintf(stderr, "qemu: fatal: ");
1901 vfprintf(stderr, fmt, ap);
1902 fprintf(stderr, "\n");
1903 #ifdef TARGET_I386
1904 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1905 #else
1906 cpu_dump_state(env, stderr, fprintf, 0);
1907 #endif
1908 if (qemu_log_enabled()) {
1909 qemu_log("qemu: fatal: ");
1910 qemu_log_vprintf(fmt, ap2);
1911 qemu_log("\n");
1912 #ifdef TARGET_I386
1913 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1914 #else
1915 log_cpu_state(env, 0);
1916 #endif
1917 qemu_log_flush();
1918 qemu_log_close();
1920 va_end(ap2);
1921 va_end(ap);
1922 #if defined(CONFIG_USER_ONLY)
1924 struct sigaction act;
1925 sigfillset(&act.sa_mask);
1926 act.sa_handler = SIG_DFL;
1927 sigaction(SIGABRT, &act, NULL);
1929 #endif
1930 abort();
1933 CPUState *cpu_copy(CPUState *env)
1935 CPUState *new_env = cpu_init(env->cpu_model_str);
1936 CPUState *next_cpu = new_env->next_cpu;
1937 int cpu_index = new_env->cpu_index;
1938 #if defined(TARGET_HAS_ICE)
1939 CPUBreakpoint *bp;
1940 CPUWatchpoint *wp;
1941 #endif
1943 memcpy(new_env, env, sizeof(CPUState));
1945 /* Preserve chaining and index. */
1946 new_env->next_cpu = next_cpu;
1947 new_env->cpu_index = cpu_index;
1949 /* Clone all break/watchpoints.
1950 Note: Once we support ptrace with hw-debug register access, make sure
1951 BP_CPU break/watchpoints are handled correctly on clone. */
1952 QTAILQ_INIT(&env->breakpoints);
1953 QTAILQ_INIT(&env->watchpoints);
1954 #if defined(TARGET_HAS_ICE)
1955 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1956 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1958 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1959 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1960 wp->flags, NULL);
1962 #endif
1964 return new_env;
1967 #if !defined(CONFIG_USER_ONLY)
1969 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1971 unsigned int i;
1973 /* Discard jump cache entries for any tb which might potentially
1974 overlap the flushed page. */
1975 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1976 memset (&env->tb_jmp_cache[i], 0,
1977 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1979 i = tb_jmp_cache_hash_page(addr);
1980 memset (&env->tb_jmp_cache[i], 0,
1981 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1984 static CPUTLBEntry s_cputlb_empty_entry = {
1985 .addr_read = -1,
1986 .addr_write = -1,
1987 .addr_code = -1,
1988 .addend = -1,
1991 /* NOTE: if flush_global is true, also flush global entries (not
1992 implemented yet) */
1993 void tlb_flush(CPUState *env, int flush_global)
1995 int i;
1997 #if defined(DEBUG_TLB)
1998 printf("tlb_flush:\n");
1999 #endif
2000 /* must reset current TB so that interrupts cannot modify the
2001 links while we are modifying them */
2002 env->current_tb = NULL;
2004 for(i = 0; i < CPU_TLB_SIZE; i++) {
2005 int mmu_idx;
2006 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2007 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2011 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2013 env->tlb_flush_addr = -1;
2014 env->tlb_flush_mask = 0;
2015 tlb_flush_count++;
2018 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2020 if (addr == (tlb_entry->addr_read &
2021 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2022 addr == (tlb_entry->addr_write &
2023 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2024 addr == (tlb_entry->addr_code &
2025 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2026 *tlb_entry = s_cputlb_empty_entry;
2030 void tlb_flush_page(CPUState *env, target_ulong addr)
2032 int i;
2033 int mmu_idx;
2035 #if defined(DEBUG_TLB)
2036 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2037 #endif
2038 /* Check if we need to flush due to large pages. */
2039 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2040 #if defined(DEBUG_TLB)
2041 printf("tlb_flush_page: forced full flush ("
2042 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2043 env->tlb_flush_addr, env->tlb_flush_mask);
2044 #endif
2045 tlb_flush(env, 1);
2046 return;
2048 /* must reset current TB so that interrupts cannot modify the
2049 links while we are modifying them */
2050 env->current_tb = NULL;
2052 addr &= TARGET_PAGE_MASK;
2053 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2054 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2055 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2057 tlb_flush_jmp_cache(env, addr);
2060 /* update the TLBs so that writes to code in the virtual page 'addr'
2061 can be detected */
2062 static void tlb_protect_code(ram_addr_t ram_addr)
2064 cpu_physical_memory_reset_dirty(ram_addr,
2065 ram_addr + TARGET_PAGE_SIZE,
2066 CODE_DIRTY_FLAG);
2069 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2070 tested for self modifying code */
2071 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2072 target_ulong vaddr)
2074 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2077 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2078 unsigned long start, unsigned long length)
2080 unsigned long addr;
2081 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2082 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2083 if ((addr - start) < length) {
2084 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2089 /* Note: start and end must be within the same ram block. */
2090 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2091 int dirty_flags)
2093 CPUState *env;
2094 unsigned long length, start1;
2095 int i;
2097 start &= TARGET_PAGE_MASK;
2098 end = TARGET_PAGE_ALIGN(end);
2100 length = end - start;
2101 if (length == 0)
2102 return;
2103 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2105 /* we modify the TLB cache so that the dirty bit will be set again
2106 when accessing the range */
2107 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2108 /* Check that we don't span multiple blocks - this breaks the
2109 address comparisons below. */
2110 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2111 != (end - 1) - start) {
2112 abort();
2115 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2116 int mmu_idx;
2117 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2118 for(i = 0; i < CPU_TLB_SIZE; i++)
2119 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2120 start1, length);
2125 int cpu_physical_memory_set_dirty_tracking(int enable)
2127 int ret = 0;
2128 in_migration = enable;
2129 ret = cpu_notify_migration_log(!!enable);
2130 return ret;
2133 int cpu_physical_memory_get_dirty_tracking(void)
2135 return in_migration;
2138 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2139 target_phys_addr_t end_addr)
2141 int ret;
2143 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2144 return ret;
2147 int cpu_physical_log_start(target_phys_addr_t start_addr,
2148 ram_addr_t size)
2150 CPUPhysMemoryClient *client;
2151 QLIST_FOREACH(client, &memory_client_list, list) {
2152 if (client->log_start) {
2153 int r = client->log_start(client, start_addr, size);
2154 if (r < 0) {
2155 return r;
2159 return 0;
2162 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2163 ram_addr_t size)
2165 CPUPhysMemoryClient *client;
2166 QLIST_FOREACH(client, &memory_client_list, list) {
2167 if (client->log_stop) {
2168 int r = client->log_stop(client, start_addr, size);
2169 if (r < 0) {
2170 return r;
2174 return 0;
2177 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2179 ram_addr_t ram_addr;
2180 void *p;
2182 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2183 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2184 + tlb_entry->addend);
2185 ram_addr = qemu_ram_addr_from_host_nofail(p);
2186 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2187 tlb_entry->addr_write |= TLB_NOTDIRTY;
2192 /* update the TLB according to the current state of the dirty bits */
2193 void cpu_tlb_update_dirty(CPUState *env)
2195 int i;
2196 int mmu_idx;
2197 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2198 for(i = 0; i < CPU_TLB_SIZE; i++)
2199 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2203 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2205 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2206 tlb_entry->addr_write = vaddr;
2209 /* update the TLB corresponding to virtual page vaddr
2210 so that it is no longer dirty */
2211 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2213 int i;
2214 int mmu_idx;
2216 vaddr &= TARGET_PAGE_MASK;
2217 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2218 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2219 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2222 /* Our TLB does not support large pages, so remember the area covered by
2223 large pages and trigger a full TLB flush if these are invalidated. */
2224 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2225 target_ulong size)
2227 target_ulong mask = ~(size - 1);
2229 if (env->tlb_flush_addr == (target_ulong)-1) {
2230 env->tlb_flush_addr = vaddr & mask;
2231 env->tlb_flush_mask = mask;
2232 return;
2234 /* Extend the existing region to include the new page.
2235 This is a compromise between unnecessary flushes and the cost
2236 of maintaining a full variable size TLB. */
2237 mask &= env->tlb_flush_mask;
2238 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2239 mask <<= 1;
2241 env->tlb_flush_addr &= mask;
2242 env->tlb_flush_mask = mask;
2245 /* Add a new TLB entry. At most one entry for a given virtual address
2246 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2247 supplied size is only used by tlb_flush_page. */
2248 void tlb_set_page(CPUState *env, target_ulong vaddr,
2249 target_phys_addr_t paddr, int prot,
2250 int mmu_idx, target_ulong size)
2252 PhysPageDesc *p;
2253 unsigned long pd;
2254 unsigned int index;
2255 target_ulong address;
2256 target_ulong code_address;
2257 unsigned long addend;
2258 CPUTLBEntry *te;
2259 CPUWatchpoint *wp;
2260 target_phys_addr_t iotlb;
2262 assert(size >= TARGET_PAGE_SIZE);
2263 if (size != TARGET_PAGE_SIZE) {
2264 tlb_add_large_page(env, vaddr, size);
2266 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2267 if (!p) {
2268 pd = IO_MEM_UNASSIGNED;
2269 } else {
2270 pd = p->phys_offset;
2272 #if defined(DEBUG_TLB)
2273 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2274 " prot=%x idx=%d pd=0x%08lx\n",
2275 vaddr, paddr, prot, mmu_idx, pd);
2276 #endif
2278 address = vaddr;
2279 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2280 /* IO memory case (romd handled later) */
2281 address |= TLB_MMIO;
2283 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2284 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2285 /* Normal RAM. */
2286 iotlb = pd & TARGET_PAGE_MASK;
2287 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2288 iotlb |= IO_MEM_NOTDIRTY;
2289 else
2290 iotlb |= IO_MEM_ROM;
2291 } else {
2292 /* IO handlers are currently passed a physical address.
2293 It would be nice to pass an offset from the base address
2294 of that region. This would avoid having to special case RAM,
2295 and avoid full address decoding in every device.
2296 We can't use the high bits of pd for this because
2297 IO_MEM_ROMD uses these as a ram address. */
2298 iotlb = (pd & ~TARGET_PAGE_MASK);
2299 if (p) {
2300 iotlb += p->region_offset;
2301 } else {
2302 iotlb += paddr;
2306 code_address = address;
2307 /* Make accesses to pages with watchpoints go via the
2308 watchpoint trap routines. */
2309 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2310 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2311 /* Avoid trapping reads of pages with a write breakpoint. */
2312 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2313 iotlb = io_mem_watch + paddr;
2314 address |= TLB_MMIO;
2315 break;
2320 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2321 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2322 te = &env->tlb_table[mmu_idx][index];
2323 te->addend = addend - vaddr;
2324 if (prot & PAGE_READ) {
2325 te->addr_read = address;
2326 } else {
2327 te->addr_read = -1;
2330 if (prot & PAGE_EXEC) {
2331 te->addr_code = code_address;
2332 } else {
2333 te->addr_code = -1;
2335 if (prot & PAGE_WRITE) {
2336 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2337 (pd & IO_MEM_ROMD)) {
2338 /* Write access calls the I/O callback. */
2339 te->addr_write = address | TLB_MMIO;
2340 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2341 !cpu_physical_memory_is_dirty(pd)) {
2342 te->addr_write = address | TLB_NOTDIRTY;
2343 } else {
2344 te->addr_write = address;
2346 } else {
2347 te->addr_write = -1;
2351 #else
2353 void tlb_flush(CPUState *env, int flush_global)
2357 void tlb_flush_page(CPUState *env, target_ulong addr)
2362 * Walks guest process memory "regions" one by one
2363 * and calls callback function 'fn' for each region.
2366 struct walk_memory_regions_data
2368 walk_memory_regions_fn fn;
2369 void *priv;
2370 unsigned long start;
2371 int prot;
2374 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2375 abi_ulong end, int new_prot)
2377 if (data->start != -1ul) {
2378 int rc = data->fn(data->priv, data->start, end, data->prot);
2379 if (rc != 0) {
2380 return rc;
2384 data->start = (new_prot ? end : -1ul);
2385 data->prot = new_prot;
2387 return 0;
2390 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2391 abi_ulong base, int level, void **lp)
2393 abi_ulong pa;
2394 int i, rc;
2396 if (*lp == NULL) {
2397 return walk_memory_regions_end(data, base, 0);
2400 if (level == 0) {
2401 PageDesc *pd = *lp;
2402 for (i = 0; i < L2_SIZE; ++i) {
2403 int prot = pd[i].flags;
2405 pa = base | (i << TARGET_PAGE_BITS);
2406 if (prot != data->prot) {
2407 rc = walk_memory_regions_end(data, pa, prot);
2408 if (rc != 0) {
2409 return rc;
2413 } else {
2414 void **pp = *lp;
2415 for (i = 0; i < L2_SIZE; ++i) {
2416 pa = base | ((abi_ulong)i <<
2417 (TARGET_PAGE_BITS + L2_BITS * level));
2418 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2419 if (rc != 0) {
2420 return rc;
2425 return 0;
2428 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2430 struct walk_memory_regions_data data;
2431 unsigned long i;
2433 data.fn = fn;
2434 data.priv = priv;
2435 data.start = -1ul;
2436 data.prot = 0;
2438 for (i = 0; i < V_L1_SIZE; i++) {
2439 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2440 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2441 if (rc != 0) {
2442 return rc;
2446 return walk_memory_regions_end(&data, 0, 0);
2449 static int dump_region(void *priv, abi_ulong start,
2450 abi_ulong end, unsigned long prot)
2452 FILE *f = (FILE *)priv;
2454 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2455 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2456 start, end, end - start,
2457 ((prot & PAGE_READ) ? 'r' : '-'),
2458 ((prot & PAGE_WRITE) ? 'w' : '-'),
2459 ((prot & PAGE_EXEC) ? 'x' : '-'));
2461 return (0);
2464 /* dump memory mappings */
2465 void page_dump(FILE *f)
2467 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2468 "start", "end", "size", "prot");
2469 walk_memory_regions(f, dump_region);
2472 int page_get_flags(target_ulong address)
2474 PageDesc *p;
2476 p = page_find(address >> TARGET_PAGE_BITS);
2477 if (!p)
2478 return 0;
2479 return p->flags;
2482 /* Modify the flags of a page and invalidate the code if necessary.
2483 The flag PAGE_WRITE_ORG is positioned automatically depending
2484 on PAGE_WRITE. The mmap_lock should already be held. */
2485 void page_set_flags(target_ulong start, target_ulong end, int flags)
2487 target_ulong addr, len;
2489 /* This function should never be called with addresses outside the
2490 guest address space. If this assert fires, it probably indicates
2491 a missing call to h2g_valid. */
2492 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2493 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2494 #endif
2495 assert(start < end);
2497 start = start & TARGET_PAGE_MASK;
2498 end = TARGET_PAGE_ALIGN(end);
2500 if (flags & PAGE_WRITE) {
2501 flags |= PAGE_WRITE_ORG;
2504 for (addr = start, len = end - start;
2505 len != 0;
2506 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2507 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2509 /* If the write protection bit is set, then we invalidate
2510 the code inside. */
2511 if (!(p->flags & PAGE_WRITE) &&
2512 (flags & PAGE_WRITE) &&
2513 p->first_tb) {
2514 tb_invalidate_phys_page(addr, 0, NULL);
2516 p->flags = flags;
2520 int page_check_range(target_ulong start, target_ulong len, int flags)
2522 PageDesc *p;
2523 target_ulong end;
2524 target_ulong addr;
2526 /* This function should never be called with addresses outside the
2527 guest address space. If this assert fires, it probably indicates
2528 a missing call to h2g_valid. */
2529 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2530 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2531 #endif
2533 if (len == 0) {
2534 return 0;
2536 if (start + len - 1 < start) {
2537 /* We've wrapped around. */
2538 return -1;
2541 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2542 start = start & TARGET_PAGE_MASK;
2544 for (addr = start, len = end - start;
2545 len != 0;
2546 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2547 p = page_find(addr >> TARGET_PAGE_BITS);
2548 if( !p )
2549 return -1;
2550 if( !(p->flags & PAGE_VALID) )
2551 return -1;
2553 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2554 return -1;
2555 if (flags & PAGE_WRITE) {
2556 if (!(p->flags & PAGE_WRITE_ORG))
2557 return -1;
2558 /* unprotect the page if it was put read-only because it
2559 contains translated code */
2560 if (!(p->flags & PAGE_WRITE)) {
2561 if (!page_unprotect(addr, 0, NULL))
2562 return -1;
2564 return 0;
2567 return 0;
2570 /* called from signal handler: invalidate the code and unprotect the
2571 page. Return TRUE if the fault was successfully handled. */
2572 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2574 unsigned int prot;
2575 PageDesc *p;
2576 target_ulong host_start, host_end, addr;
2578 /* Technically this isn't safe inside a signal handler. However we
2579 know this only ever happens in a synchronous SEGV handler, so in
2580 practice it seems to be ok. */
2581 mmap_lock();
2583 p = page_find(address >> TARGET_PAGE_BITS);
2584 if (!p) {
2585 mmap_unlock();
2586 return 0;
2589 /* if the page was really writable, then we change its
2590 protection back to writable */
2591 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2592 host_start = address & qemu_host_page_mask;
2593 host_end = host_start + qemu_host_page_size;
2595 prot = 0;
2596 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2597 p = page_find(addr >> TARGET_PAGE_BITS);
2598 p->flags |= PAGE_WRITE;
2599 prot |= p->flags;
2601 /* and since the content will be modified, we must invalidate
2602 the corresponding translated code. */
2603 tb_invalidate_phys_page(addr, pc, puc);
2604 #ifdef DEBUG_TB_CHECK
2605 tb_invalidate_check(addr);
2606 #endif
2608 mprotect((void *)g2h(host_start), qemu_host_page_size,
2609 prot & PAGE_BITS);
2611 mmap_unlock();
2612 return 1;
2614 mmap_unlock();
2615 return 0;
2618 static inline void tlb_set_dirty(CPUState *env,
2619 unsigned long addr, target_ulong vaddr)
2622 #endif /* defined(CONFIG_USER_ONLY) */
2624 #if !defined(CONFIG_USER_ONLY)
2626 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2627 typedef struct subpage_t {
2628 target_phys_addr_t base;
2629 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2630 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2631 } subpage_t;
2633 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2634 ram_addr_t memory, ram_addr_t region_offset);
2635 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2636 ram_addr_t orig_memory,
2637 ram_addr_t region_offset);
2638 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2639 need_subpage) \
2640 do { \
2641 if (addr > start_addr) \
2642 start_addr2 = 0; \
2643 else { \
2644 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2645 if (start_addr2 > 0) \
2646 need_subpage = 1; \
2649 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2650 end_addr2 = TARGET_PAGE_SIZE - 1; \
2651 else { \
2652 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2653 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2654 need_subpage = 1; \
2656 } while (0)
2658 /* register physical memory.
2659 For RAM, 'size' must be a multiple of the target page size.
2660 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2661 io memory page. The address used when calling the IO function is
2662 the offset from the start of the region, plus region_offset. Both
2663 start_addr and region_offset are rounded down to a page boundary
2664 before calculating this offset. This should not be a problem unless
2665 the low bits of start_addr and region_offset differ. */
2666 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2667 ram_addr_t size,
2668 ram_addr_t phys_offset,
2669 ram_addr_t region_offset,
2670 bool log_dirty)
2672 target_phys_addr_t addr, end_addr;
2673 PhysPageDesc *p;
2674 CPUState *env;
2675 ram_addr_t orig_size = size;
2676 subpage_t *subpage;
2678 assert(size);
2679 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2681 if (phys_offset == IO_MEM_UNASSIGNED) {
2682 region_offset = start_addr;
2684 region_offset &= TARGET_PAGE_MASK;
2685 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2686 end_addr = start_addr + (target_phys_addr_t)size;
2688 addr = start_addr;
2689 do {
2690 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2691 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2692 ram_addr_t orig_memory = p->phys_offset;
2693 target_phys_addr_t start_addr2, end_addr2;
2694 int need_subpage = 0;
2696 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2697 need_subpage);
2698 if (need_subpage) {
2699 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2700 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2701 &p->phys_offset, orig_memory,
2702 p->region_offset);
2703 } else {
2704 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2705 >> IO_MEM_SHIFT];
2707 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2708 region_offset);
2709 p->region_offset = 0;
2710 } else {
2711 p->phys_offset = phys_offset;
2712 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2713 (phys_offset & IO_MEM_ROMD))
2714 phys_offset += TARGET_PAGE_SIZE;
2716 } else {
2717 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2718 p->phys_offset = phys_offset;
2719 p->region_offset = region_offset;
2720 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2721 (phys_offset & IO_MEM_ROMD)) {
2722 phys_offset += TARGET_PAGE_SIZE;
2723 } else {
2724 target_phys_addr_t start_addr2, end_addr2;
2725 int need_subpage = 0;
2727 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2728 end_addr2, need_subpage);
2730 if (need_subpage) {
2731 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2732 &p->phys_offset, IO_MEM_UNASSIGNED,
2733 addr & TARGET_PAGE_MASK);
2734 subpage_register(subpage, start_addr2, end_addr2,
2735 phys_offset, region_offset);
2736 p->region_offset = 0;
2740 region_offset += TARGET_PAGE_SIZE;
2741 addr += TARGET_PAGE_SIZE;
2742 } while (addr != end_addr);
2744 /* since each CPU stores ram addresses in its TLB cache, we must
2745 reset the modified entries */
2746 /* XXX: slow ! */
2747 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2748 tlb_flush(env, 1);
2752 /* XXX: temporary until new memory mapping API */
2753 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2755 PhysPageDesc *p;
2757 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2758 if (!p)
2759 return IO_MEM_UNASSIGNED;
2760 return p->phys_offset;
2763 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2765 if (kvm_enabled())
2766 kvm_coalesce_mmio_region(addr, size);
2769 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2771 if (kvm_enabled())
2772 kvm_uncoalesce_mmio_region(addr, size);
2775 void qemu_flush_coalesced_mmio_buffer(void)
2777 if (kvm_enabled())
2778 kvm_flush_coalesced_mmio_buffer();
2781 #if defined(__linux__) && !defined(TARGET_S390X)
2783 #include <sys/vfs.h>
2785 #define HUGETLBFS_MAGIC 0x958458f6
2787 static long gethugepagesize(const char *path)
2789 struct statfs fs;
2790 int ret;
2792 do {
2793 ret = statfs(path, &fs);
2794 } while (ret != 0 && errno == EINTR);
2796 if (ret != 0) {
2797 perror(path);
2798 return 0;
2801 if (fs.f_type != HUGETLBFS_MAGIC)
2802 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2804 return fs.f_bsize;
2807 static void *file_ram_alloc(RAMBlock *block,
2808 ram_addr_t memory,
2809 const char *path)
2811 char *filename;
2812 void *area;
2813 int fd;
2814 #ifdef MAP_POPULATE
2815 int flags;
2816 #endif
2817 unsigned long hpagesize;
2819 hpagesize = gethugepagesize(path);
2820 if (!hpagesize) {
2821 return NULL;
2824 if (memory < hpagesize) {
2825 return NULL;
2828 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2829 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2830 return NULL;
2833 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2834 return NULL;
2837 fd = mkstemp(filename);
2838 if (fd < 0) {
2839 perror("unable to create backing store for hugepages");
2840 free(filename);
2841 return NULL;
2843 unlink(filename);
2844 free(filename);
2846 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2849 * ftruncate is not supported by hugetlbfs in older
2850 * hosts, so don't bother bailing out on errors.
2851 * If anything goes wrong with it under other filesystems,
2852 * mmap will fail.
2854 if (ftruncate(fd, memory))
2855 perror("ftruncate");
2857 #ifdef MAP_POPULATE
2858 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2859 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2860 * to sidestep this quirk.
2862 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2863 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2864 #else
2865 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2866 #endif
2867 if (area == MAP_FAILED) {
2868 perror("file_ram_alloc: can't mmap RAM pages");
2869 close(fd);
2870 return (NULL);
2872 block->fd = fd;
2873 return area;
2875 #endif
2877 static ram_addr_t find_ram_offset(ram_addr_t size)
2879 RAMBlock *block, *next_block;
2880 ram_addr_t offset = 0, mingap = RAM_ADDR_MAX;
2882 if (QLIST_EMPTY(&ram_list.blocks))
2883 return 0;
2885 QLIST_FOREACH(block, &ram_list.blocks, next) {
2886 ram_addr_t end, next = RAM_ADDR_MAX;
2888 end = block->offset + block->length;
2890 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2891 if (next_block->offset >= end) {
2892 next = MIN(next, next_block->offset);
2895 if (next - end >= size && next - end < mingap) {
2896 offset = end;
2897 mingap = next - end;
2900 return offset;
2903 static ram_addr_t last_ram_offset(void)
2905 RAMBlock *block;
2906 ram_addr_t last = 0;
2908 QLIST_FOREACH(block, &ram_list.blocks, next)
2909 last = MAX(last, block->offset + block->length);
2911 return last;
2914 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2915 ram_addr_t size, void *host)
2917 RAMBlock *new_block, *block;
2919 size = TARGET_PAGE_ALIGN(size);
2920 new_block = g_malloc0(sizeof(*new_block));
2922 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2923 char *id = dev->parent_bus->info->get_dev_path(dev);
2924 if (id) {
2925 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2926 g_free(id);
2929 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2931 QLIST_FOREACH(block, &ram_list.blocks, next) {
2932 if (!strcmp(block->idstr, new_block->idstr)) {
2933 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2934 new_block->idstr);
2935 abort();
2939 new_block->offset = find_ram_offset(size);
2940 if (host) {
2941 new_block->host = host;
2942 new_block->flags |= RAM_PREALLOC_MASK;
2943 } else {
2944 if (mem_path) {
2945 #if defined (__linux__) && !defined(TARGET_S390X)
2946 new_block->host = file_ram_alloc(new_block, size, mem_path);
2947 if (!new_block->host) {
2948 new_block->host = qemu_vmalloc(size);
2949 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2951 #else
2952 fprintf(stderr, "-mem-path option unsupported\n");
2953 exit(1);
2954 #endif
2955 } else {
2956 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2957 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2958 an system defined value, which is at least 256GB. Larger systems
2959 have larger values. We put the guest between the end of data
2960 segment (system break) and this value. We use 32GB as a base to
2961 have enough room for the system break to grow. */
2962 new_block->host = mmap((void*)0x800000000, size,
2963 PROT_EXEC|PROT_READ|PROT_WRITE,
2964 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2965 if (new_block->host == MAP_FAILED) {
2966 fprintf(stderr, "Allocating RAM failed\n");
2967 abort();
2969 #else
2970 if (xen_enabled()) {
2971 xen_ram_alloc(new_block->offset, size);
2972 } else {
2973 new_block->host = qemu_vmalloc(size);
2975 #endif
2976 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2979 new_block->length = size;
2981 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2983 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2984 last_ram_offset() >> TARGET_PAGE_BITS);
2985 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2986 0xff, size >> TARGET_PAGE_BITS);
2988 if (kvm_enabled())
2989 kvm_setup_guest_memory(new_block->host, size);
2991 return new_block->offset;
2994 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
2996 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
2999 void qemu_ram_free_from_ptr(ram_addr_t addr)
3001 RAMBlock *block;
3003 QLIST_FOREACH(block, &ram_list.blocks, next) {
3004 if (addr == block->offset) {
3005 QLIST_REMOVE(block, next);
3006 g_free(block);
3007 return;
3012 void qemu_ram_free(ram_addr_t addr)
3014 RAMBlock *block;
3016 QLIST_FOREACH(block, &ram_list.blocks, next) {
3017 if (addr == block->offset) {
3018 QLIST_REMOVE(block, next);
3019 if (block->flags & RAM_PREALLOC_MASK) {
3021 } else if (mem_path) {
3022 #if defined (__linux__) && !defined(TARGET_S390X)
3023 if (block->fd) {
3024 munmap(block->host, block->length);
3025 close(block->fd);
3026 } else {
3027 qemu_vfree(block->host);
3029 #else
3030 abort();
3031 #endif
3032 } else {
3033 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3034 munmap(block->host, block->length);
3035 #else
3036 if (xen_enabled()) {
3037 xen_invalidate_map_cache_entry(block->host);
3038 } else {
3039 qemu_vfree(block->host);
3041 #endif
3043 g_free(block);
3044 return;
3050 #ifndef _WIN32
3051 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3053 RAMBlock *block;
3054 ram_addr_t offset;
3055 int flags;
3056 void *area, *vaddr;
3058 QLIST_FOREACH(block, &ram_list.blocks, next) {
3059 offset = addr - block->offset;
3060 if (offset < block->length) {
3061 vaddr = block->host + offset;
3062 if (block->flags & RAM_PREALLOC_MASK) {
3064 } else {
3065 flags = MAP_FIXED;
3066 munmap(vaddr, length);
3067 if (mem_path) {
3068 #if defined(__linux__) && !defined(TARGET_S390X)
3069 if (block->fd) {
3070 #ifdef MAP_POPULATE
3071 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3072 MAP_PRIVATE;
3073 #else
3074 flags |= MAP_PRIVATE;
3075 #endif
3076 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3077 flags, block->fd, offset);
3078 } else {
3079 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3080 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3081 flags, -1, 0);
3083 #else
3084 abort();
3085 #endif
3086 } else {
3087 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3088 flags |= MAP_SHARED | MAP_ANONYMOUS;
3089 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3090 flags, -1, 0);
3091 #else
3092 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3093 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3094 flags, -1, 0);
3095 #endif
3097 if (area != vaddr) {
3098 fprintf(stderr, "Could not remap addr: "
3099 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3100 length, addr);
3101 exit(1);
3103 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3105 return;
3109 #endif /* !_WIN32 */
3111 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3112 With the exception of the softmmu code in this file, this should
3113 only be used for local memory (e.g. video ram) that the device owns,
3114 and knows it isn't going to access beyond the end of the block.
3116 It should not be used for general purpose DMA.
3117 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3119 void *qemu_get_ram_ptr(ram_addr_t addr)
3121 RAMBlock *block;
3123 QLIST_FOREACH(block, &ram_list.blocks, next) {
3124 if (addr - block->offset < block->length) {
3125 /* Move this entry to to start of the list. */
3126 if (block != QLIST_FIRST(&ram_list.blocks)) {
3127 QLIST_REMOVE(block, next);
3128 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3130 if (xen_enabled()) {
3131 /* We need to check if the requested address is in the RAM
3132 * because we don't want to map the entire memory in QEMU.
3133 * In that case just map until the end of the page.
3135 if (block->offset == 0) {
3136 return xen_map_cache(addr, 0, 0);
3137 } else if (block->host == NULL) {
3138 block->host =
3139 xen_map_cache(block->offset, block->length, 1);
3142 return block->host + (addr - block->offset);
3146 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3147 abort();
3149 return NULL;
3152 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3153 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3155 void *qemu_safe_ram_ptr(ram_addr_t addr)
3157 RAMBlock *block;
3159 QLIST_FOREACH(block, &ram_list.blocks, next) {
3160 if (addr - block->offset < block->length) {
3161 if (xen_enabled()) {
3162 /* We need to check if the requested address is in the RAM
3163 * because we don't want to map the entire memory in QEMU.
3164 * In that case just map until the end of the page.
3166 if (block->offset == 0) {
3167 return xen_map_cache(addr, 0, 0);
3168 } else if (block->host == NULL) {
3169 block->host =
3170 xen_map_cache(block->offset, block->length, 1);
3173 return block->host + (addr - block->offset);
3177 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3178 abort();
3180 return NULL;
3183 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3184 * but takes a size argument */
3185 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3187 if (*size == 0) {
3188 return NULL;
3190 if (xen_enabled()) {
3191 return xen_map_cache(addr, *size, 1);
3192 } else {
3193 RAMBlock *block;
3195 QLIST_FOREACH(block, &ram_list.blocks, next) {
3196 if (addr - block->offset < block->length) {
3197 if (addr - block->offset + *size > block->length)
3198 *size = block->length - addr + block->offset;
3199 return block->host + (addr - block->offset);
3203 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3204 abort();
3208 void qemu_put_ram_ptr(void *addr)
3210 trace_qemu_put_ram_ptr(addr);
3213 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3215 RAMBlock *block;
3216 uint8_t *host = ptr;
3218 if (xen_enabled()) {
3219 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3220 return 0;
3223 QLIST_FOREACH(block, &ram_list.blocks, next) {
3224 /* This case append when the block is not mapped. */
3225 if (block->host == NULL) {
3226 continue;
3228 if (host - block->host < block->length) {
3229 *ram_addr = block->offset + (host - block->host);
3230 return 0;
3234 return -1;
3237 /* Some of the softmmu routines need to translate from a host pointer
3238 (typically a TLB entry) back to a ram offset. */
3239 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3241 ram_addr_t ram_addr;
3243 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3244 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3245 abort();
3247 return ram_addr;
3250 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3252 #ifdef DEBUG_UNASSIGNED
3253 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3254 #endif
3255 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3256 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3257 #endif
3258 return 0;
3261 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3263 #ifdef DEBUG_UNASSIGNED
3264 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3265 #endif
3266 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3267 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3268 #endif
3269 return 0;
3272 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3274 #ifdef DEBUG_UNASSIGNED
3275 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3276 #endif
3277 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3278 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3279 #endif
3280 return 0;
3283 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3285 #ifdef DEBUG_UNASSIGNED
3286 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3287 #endif
3288 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3289 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3290 #endif
3293 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3295 #ifdef DEBUG_UNASSIGNED
3296 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3297 #endif
3298 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3299 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3300 #endif
3303 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3305 #ifdef DEBUG_UNASSIGNED
3306 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3307 #endif
3308 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3309 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3310 #endif
3313 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3314 unassigned_mem_readb,
3315 unassigned_mem_readw,
3316 unassigned_mem_readl,
3319 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3320 unassigned_mem_writeb,
3321 unassigned_mem_writew,
3322 unassigned_mem_writel,
3325 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3326 uint32_t val)
3328 int dirty_flags;
3329 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3330 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3331 #if !defined(CONFIG_USER_ONLY)
3332 tb_invalidate_phys_page_fast(ram_addr, 1);
3333 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3334 #endif
3336 stb_p(qemu_get_ram_ptr(ram_addr), val);
3337 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3338 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3339 /* we remove the notdirty callback only if the code has been
3340 flushed */
3341 if (dirty_flags == 0xff)
3342 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3345 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3346 uint32_t val)
3348 int dirty_flags;
3349 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3350 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3351 #if !defined(CONFIG_USER_ONLY)
3352 tb_invalidate_phys_page_fast(ram_addr, 2);
3353 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3354 #endif
3356 stw_p(qemu_get_ram_ptr(ram_addr), val);
3357 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3358 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3359 /* we remove the notdirty callback only if the code has been
3360 flushed */
3361 if (dirty_flags == 0xff)
3362 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3365 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3366 uint32_t val)
3368 int dirty_flags;
3369 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3370 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3371 #if !defined(CONFIG_USER_ONLY)
3372 tb_invalidate_phys_page_fast(ram_addr, 4);
3373 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3374 #endif
3376 stl_p(qemu_get_ram_ptr(ram_addr), val);
3377 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3378 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3379 /* we remove the notdirty callback only if the code has been
3380 flushed */
3381 if (dirty_flags == 0xff)
3382 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3385 static CPUReadMemoryFunc * const error_mem_read[3] = {
3386 NULL, /* never used */
3387 NULL, /* never used */
3388 NULL, /* never used */
3391 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3392 notdirty_mem_writeb,
3393 notdirty_mem_writew,
3394 notdirty_mem_writel,
3397 /* Generate a debug exception if a watchpoint has been hit. */
3398 static void check_watchpoint(int offset, int len_mask, int flags)
3400 CPUState *env = cpu_single_env;
3401 target_ulong pc, cs_base;
3402 TranslationBlock *tb;
3403 target_ulong vaddr;
3404 CPUWatchpoint *wp;
3405 int cpu_flags;
3407 if (env->watchpoint_hit) {
3408 /* We re-entered the check after replacing the TB. Now raise
3409 * the debug interrupt so that is will trigger after the
3410 * current instruction. */
3411 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3412 return;
3414 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3415 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3416 if ((vaddr == (wp->vaddr & len_mask) ||
3417 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3418 wp->flags |= BP_WATCHPOINT_HIT;
3419 if (!env->watchpoint_hit) {
3420 env->watchpoint_hit = wp;
3421 tb = tb_find_pc(env->mem_io_pc);
3422 if (!tb) {
3423 cpu_abort(env, "check_watchpoint: could not find TB for "
3424 "pc=%p", (void *)env->mem_io_pc);
3426 cpu_restore_state(tb, env, env->mem_io_pc);
3427 tb_phys_invalidate(tb, -1);
3428 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3429 env->exception_index = EXCP_DEBUG;
3430 } else {
3431 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3432 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3434 cpu_resume_from_signal(env, NULL);
3436 } else {
3437 wp->flags &= ~BP_WATCHPOINT_HIT;
3442 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3443 so these check for a hit then pass through to the normal out-of-line
3444 phys routines. */
3445 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3447 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3448 return ldub_phys(addr);
3451 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3453 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3454 return lduw_phys(addr);
3457 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3459 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3460 return ldl_phys(addr);
3463 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3464 uint32_t val)
3466 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3467 stb_phys(addr, val);
3470 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3471 uint32_t val)
3473 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3474 stw_phys(addr, val);
3477 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3478 uint32_t val)
3480 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3481 stl_phys(addr, val);
3484 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3485 watch_mem_readb,
3486 watch_mem_readw,
3487 watch_mem_readl,
3490 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3491 watch_mem_writeb,
3492 watch_mem_writew,
3493 watch_mem_writel,
3496 static inline uint32_t subpage_readlen (subpage_t *mmio,
3497 target_phys_addr_t addr,
3498 unsigned int len)
3500 unsigned int idx = SUBPAGE_IDX(addr);
3501 #if defined(DEBUG_SUBPAGE)
3502 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3503 mmio, len, addr, idx);
3504 #endif
3506 addr += mmio->region_offset[idx];
3507 idx = mmio->sub_io_index[idx];
3508 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3511 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3512 uint32_t value, unsigned int len)
3514 unsigned int idx = SUBPAGE_IDX(addr);
3515 #if defined(DEBUG_SUBPAGE)
3516 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3517 __func__, mmio, len, addr, idx, value);
3518 #endif
3520 addr += mmio->region_offset[idx];
3521 idx = mmio->sub_io_index[idx];
3522 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3525 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3527 return subpage_readlen(opaque, addr, 0);
3530 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3531 uint32_t value)
3533 subpage_writelen(opaque, addr, value, 0);
3536 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3538 return subpage_readlen(opaque, addr, 1);
3541 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3542 uint32_t value)
3544 subpage_writelen(opaque, addr, value, 1);
3547 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3549 return subpage_readlen(opaque, addr, 2);
3552 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3553 uint32_t value)
3555 subpage_writelen(opaque, addr, value, 2);
3558 static CPUReadMemoryFunc * const subpage_read[] = {
3559 &subpage_readb,
3560 &subpage_readw,
3561 &subpage_readl,
3564 static CPUWriteMemoryFunc * const subpage_write[] = {
3565 &subpage_writeb,
3566 &subpage_writew,
3567 &subpage_writel,
3570 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3571 ram_addr_t memory, ram_addr_t region_offset)
3573 int idx, eidx;
3575 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3576 return -1;
3577 idx = SUBPAGE_IDX(start);
3578 eidx = SUBPAGE_IDX(end);
3579 #if defined(DEBUG_SUBPAGE)
3580 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3581 mmio, start, end, idx, eidx, memory);
3582 #endif
3583 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3584 memory = IO_MEM_UNASSIGNED;
3585 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3586 for (; idx <= eidx; idx++) {
3587 mmio->sub_io_index[idx] = memory;
3588 mmio->region_offset[idx] = region_offset;
3591 return 0;
3594 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3595 ram_addr_t orig_memory,
3596 ram_addr_t region_offset)
3598 subpage_t *mmio;
3599 int subpage_memory;
3601 mmio = g_malloc0(sizeof(subpage_t));
3603 mmio->base = base;
3604 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3605 DEVICE_NATIVE_ENDIAN);
3606 #if defined(DEBUG_SUBPAGE)
3607 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3608 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3609 #endif
3610 *phys = subpage_memory | IO_MEM_SUBPAGE;
3611 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3613 return mmio;
3616 static int get_free_io_mem_idx(void)
3618 int i;
3620 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3621 if (!io_mem_used[i]) {
3622 io_mem_used[i] = 1;
3623 return i;
3625 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3626 return -1;
3630 * Usually, devices operate in little endian mode. There are devices out
3631 * there that operate in big endian too. Each device gets byte swapped
3632 * mmio if plugged onto a CPU that does the other endianness.
3634 * CPU Device swap?
3636 * little little no
3637 * little big yes
3638 * big little yes
3639 * big big no
3642 typedef struct SwapEndianContainer {
3643 CPUReadMemoryFunc *read[3];
3644 CPUWriteMemoryFunc *write[3];
3645 void *opaque;
3646 } SwapEndianContainer;
3648 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3650 uint32_t val;
3651 SwapEndianContainer *c = opaque;
3652 val = c->read[0](c->opaque, addr);
3653 return val;
3656 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3658 uint32_t val;
3659 SwapEndianContainer *c = opaque;
3660 val = bswap16(c->read[1](c->opaque, addr));
3661 return val;
3664 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3666 uint32_t val;
3667 SwapEndianContainer *c = opaque;
3668 val = bswap32(c->read[2](c->opaque, addr));
3669 return val;
3672 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3673 swapendian_mem_readb,
3674 swapendian_mem_readw,
3675 swapendian_mem_readl
3678 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3679 uint32_t val)
3681 SwapEndianContainer *c = opaque;
3682 c->write[0](c->opaque, addr, val);
3685 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3686 uint32_t val)
3688 SwapEndianContainer *c = opaque;
3689 c->write[1](c->opaque, addr, bswap16(val));
3692 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3693 uint32_t val)
3695 SwapEndianContainer *c = opaque;
3696 c->write[2](c->opaque, addr, bswap32(val));
3699 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3700 swapendian_mem_writeb,
3701 swapendian_mem_writew,
3702 swapendian_mem_writel
3705 static void swapendian_init(int io_index)
3707 SwapEndianContainer *c = g_malloc(sizeof(SwapEndianContainer));
3708 int i;
3710 /* Swap mmio for big endian targets */
3711 c->opaque = io_mem_opaque[io_index];
3712 for (i = 0; i < 3; i++) {
3713 c->read[i] = io_mem_read[io_index][i];
3714 c->write[i] = io_mem_write[io_index][i];
3716 io_mem_read[io_index][i] = swapendian_readfn[i];
3717 io_mem_write[io_index][i] = swapendian_writefn[i];
3719 io_mem_opaque[io_index] = c;
3722 static void swapendian_del(int io_index)
3724 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3725 g_free(io_mem_opaque[io_index]);
3729 /* mem_read and mem_write are arrays of functions containing the
3730 function to access byte (index 0), word (index 1) and dword (index
3731 2). Functions can be omitted with a NULL function pointer.
3732 If io_index is non zero, the corresponding io zone is
3733 modified. If it is zero, a new io zone is allocated. The return
3734 value can be used with cpu_register_physical_memory(). (-1) is
3735 returned if error. */
3736 static int cpu_register_io_memory_fixed(int io_index,
3737 CPUReadMemoryFunc * const *mem_read,
3738 CPUWriteMemoryFunc * const *mem_write,
3739 void *opaque, enum device_endian endian)
3741 int i;
3743 if (io_index <= 0) {
3744 io_index = get_free_io_mem_idx();
3745 if (io_index == -1)
3746 return io_index;
3747 } else {
3748 io_index >>= IO_MEM_SHIFT;
3749 if (io_index >= IO_MEM_NB_ENTRIES)
3750 return -1;
3753 for (i = 0; i < 3; ++i) {
3754 io_mem_read[io_index][i]
3755 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3757 for (i = 0; i < 3; ++i) {
3758 io_mem_write[io_index][i]
3759 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3761 io_mem_opaque[io_index] = opaque;
3763 switch (endian) {
3764 case DEVICE_BIG_ENDIAN:
3765 #ifndef TARGET_WORDS_BIGENDIAN
3766 swapendian_init(io_index);
3767 #endif
3768 break;
3769 case DEVICE_LITTLE_ENDIAN:
3770 #ifdef TARGET_WORDS_BIGENDIAN
3771 swapendian_init(io_index);
3772 #endif
3773 break;
3774 case DEVICE_NATIVE_ENDIAN:
3775 default:
3776 break;
3779 return (io_index << IO_MEM_SHIFT);
3782 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3783 CPUWriteMemoryFunc * const *mem_write,
3784 void *opaque, enum device_endian endian)
3786 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3789 void cpu_unregister_io_memory(int io_table_address)
3791 int i;
3792 int io_index = io_table_address >> IO_MEM_SHIFT;
3794 swapendian_del(io_index);
3796 for (i=0;i < 3; i++) {
3797 io_mem_read[io_index][i] = unassigned_mem_read[i];
3798 io_mem_write[io_index][i] = unassigned_mem_write[i];
3800 io_mem_opaque[io_index] = NULL;
3801 io_mem_used[io_index] = 0;
3804 static void io_mem_init(void)
3806 int i;
3808 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3809 unassigned_mem_write, NULL,
3810 DEVICE_NATIVE_ENDIAN);
3811 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3812 unassigned_mem_write, NULL,
3813 DEVICE_NATIVE_ENDIAN);
3814 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3815 notdirty_mem_write, NULL,
3816 DEVICE_NATIVE_ENDIAN);
3817 for (i=0; i<5; i++)
3818 io_mem_used[i] = 1;
3820 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3821 watch_mem_write, NULL,
3822 DEVICE_NATIVE_ENDIAN);
3825 static void memory_map_init(void)
3827 system_memory = g_malloc(sizeof(*system_memory));
3828 memory_region_init(system_memory, "system", INT64_MAX);
3829 set_system_memory_map(system_memory);
3831 system_io = g_malloc(sizeof(*system_io));
3832 memory_region_init(system_io, "io", 65536);
3833 set_system_io_map(system_io);
3836 MemoryRegion *get_system_memory(void)
3838 return system_memory;
3841 MemoryRegion *get_system_io(void)
3843 return system_io;
3846 #endif /* !defined(CONFIG_USER_ONLY) */
3848 /* physical memory access (slow version, mainly for debug) */
3849 #if defined(CONFIG_USER_ONLY)
3850 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3851 uint8_t *buf, int len, int is_write)
3853 int l, flags;
3854 target_ulong page;
3855 void * p;
3857 while (len > 0) {
3858 page = addr & TARGET_PAGE_MASK;
3859 l = (page + TARGET_PAGE_SIZE) - addr;
3860 if (l > len)
3861 l = len;
3862 flags = page_get_flags(page);
3863 if (!(flags & PAGE_VALID))
3864 return -1;
3865 if (is_write) {
3866 if (!(flags & PAGE_WRITE))
3867 return -1;
3868 /* XXX: this code should not depend on lock_user */
3869 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3870 return -1;
3871 memcpy(p, buf, l);
3872 unlock_user(p, addr, l);
3873 } else {
3874 if (!(flags & PAGE_READ))
3875 return -1;
3876 /* XXX: this code should not depend on lock_user */
3877 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3878 return -1;
3879 memcpy(buf, p, l);
3880 unlock_user(p, addr, 0);
3882 len -= l;
3883 buf += l;
3884 addr += l;
3886 return 0;
3889 #else
3890 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3891 int len, int is_write)
3893 int l, io_index;
3894 uint8_t *ptr;
3895 uint32_t val;
3896 target_phys_addr_t page;
3897 ram_addr_t pd;
3898 PhysPageDesc *p;
3900 while (len > 0) {
3901 page = addr & TARGET_PAGE_MASK;
3902 l = (page + TARGET_PAGE_SIZE) - addr;
3903 if (l > len)
3904 l = len;
3905 p = phys_page_find(page >> TARGET_PAGE_BITS);
3906 if (!p) {
3907 pd = IO_MEM_UNASSIGNED;
3908 } else {
3909 pd = p->phys_offset;
3912 if (is_write) {
3913 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3914 target_phys_addr_t addr1 = addr;
3915 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3916 if (p)
3917 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3918 /* XXX: could force cpu_single_env to NULL to avoid
3919 potential bugs */
3920 if (l >= 4 && ((addr1 & 3) == 0)) {
3921 /* 32 bit write access */
3922 val = ldl_p(buf);
3923 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3924 l = 4;
3925 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3926 /* 16 bit write access */
3927 val = lduw_p(buf);
3928 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3929 l = 2;
3930 } else {
3931 /* 8 bit write access */
3932 val = ldub_p(buf);
3933 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3934 l = 1;
3936 } else {
3937 ram_addr_t addr1;
3938 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3939 /* RAM case */
3940 ptr = qemu_get_ram_ptr(addr1);
3941 memcpy(ptr, buf, l);
3942 if (!cpu_physical_memory_is_dirty(addr1)) {
3943 /* invalidate code */
3944 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3945 /* set dirty bit */
3946 cpu_physical_memory_set_dirty_flags(
3947 addr1, (0xff & ~CODE_DIRTY_FLAG));
3949 qemu_put_ram_ptr(ptr);
3951 } else {
3952 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3953 !(pd & IO_MEM_ROMD)) {
3954 target_phys_addr_t addr1 = addr;
3955 /* I/O case */
3956 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3957 if (p)
3958 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3959 if (l >= 4 && ((addr1 & 3) == 0)) {
3960 /* 32 bit read access */
3961 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3962 stl_p(buf, val);
3963 l = 4;
3964 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3965 /* 16 bit read access */
3966 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3967 stw_p(buf, val);
3968 l = 2;
3969 } else {
3970 /* 8 bit read access */
3971 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3972 stb_p(buf, val);
3973 l = 1;
3975 } else {
3976 /* RAM case */
3977 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3978 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3979 qemu_put_ram_ptr(ptr);
3982 len -= l;
3983 buf += l;
3984 addr += l;
3988 /* used for ROM loading : can write in RAM and ROM */
3989 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3990 const uint8_t *buf, int len)
3992 int l;
3993 uint8_t *ptr;
3994 target_phys_addr_t page;
3995 unsigned long pd;
3996 PhysPageDesc *p;
3998 while (len > 0) {
3999 page = addr & TARGET_PAGE_MASK;
4000 l = (page + TARGET_PAGE_SIZE) - addr;
4001 if (l > len)
4002 l = len;
4003 p = phys_page_find(page >> TARGET_PAGE_BITS);
4004 if (!p) {
4005 pd = IO_MEM_UNASSIGNED;
4006 } else {
4007 pd = p->phys_offset;
4010 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
4011 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
4012 !(pd & IO_MEM_ROMD)) {
4013 /* do nothing */
4014 } else {
4015 unsigned long addr1;
4016 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4017 /* ROM/RAM case */
4018 ptr = qemu_get_ram_ptr(addr1);
4019 memcpy(ptr, buf, l);
4020 qemu_put_ram_ptr(ptr);
4022 len -= l;
4023 buf += l;
4024 addr += l;
4028 typedef struct {
4029 void *buffer;
4030 target_phys_addr_t addr;
4031 target_phys_addr_t len;
4032 } BounceBuffer;
4034 static BounceBuffer bounce;
4036 typedef struct MapClient {
4037 void *opaque;
4038 void (*callback)(void *opaque);
4039 QLIST_ENTRY(MapClient) link;
4040 } MapClient;
4042 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4043 = QLIST_HEAD_INITIALIZER(map_client_list);
4045 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4047 MapClient *client = g_malloc(sizeof(*client));
4049 client->opaque = opaque;
4050 client->callback = callback;
4051 QLIST_INSERT_HEAD(&map_client_list, client, link);
4052 return client;
4055 void cpu_unregister_map_client(void *_client)
4057 MapClient *client = (MapClient *)_client;
4059 QLIST_REMOVE(client, link);
4060 g_free(client);
4063 static void cpu_notify_map_clients(void)
4065 MapClient *client;
4067 while (!QLIST_EMPTY(&map_client_list)) {
4068 client = QLIST_FIRST(&map_client_list);
4069 client->callback(client->opaque);
4070 cpu_unregister_map_client(client);
4074 /* Map a physical memory region into a host virtual address.
4075 * May map a subset of the requested range, given by and returned in *plen.
4076 * May return NULL if resources needed to perform the mapping are exhausted.
4077 * Use only for reads OR writes - not for read-modify-write operations.
4078 * Use cpu_register_map_client() to know when retrying the map operation is
4079 * likely to succeed.
4081 void *cpu_physical_memory_map(target_phys_addr_t addr,
4082 target_phys_addr_t *plen,
4083 int is_write)
4085 target_phys_addr_t len = *plen;
4086 target_phys_addr_t todo = 0;
4087 int l;
4088 target_phys_addr_t page;
4089 unsigned long pd;
4090 PhysPageDesc *p;
4091 ram_addr_t raddr = RAM_ADDR_MAX;
4092 ram_addr_t rlen;
4093 void *ret;
4095 while (len > 0) {
4096 page = addr & TARGET_PAGE_MASK;
4097 l = (page + TARGET_PAGE_SIZE) - addr;
4098 if (l > len)
4099 l = len;
4100 p = phys_page_find(page >> TARGET_PAGE_BITS);
4101 if (!p) {
4102 pd = IO_MEM_UNASSIGNED;
4103 } else {
4104 pd = p->phys_offset;
4107 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4108 if (todo || bounce.buffer) {
4109 break;
4111 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4112 bounce.addr = addr;
4113 bounce.len = l;
4114 if (!is_write) {
4115 cpu_physical_memory_read(addr, bounce.buffer, l);
4118 *plen = l;
4119 return bounce.buffer;
4121 if (!todo) {
4122 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4125 len -= l;
4126 addr += l;
4127 todo += l;
4129 rlen = todo;
4130 ret = qemu_ram_ptr_length(raddr, &rlen);
4131 *plen = rlen;
4132 return ret;
4135 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4136 * Will also mark the memory as dirty if is_write == 1. access_len gives
4137 * the amount of memory that was actually read or written by the caller.
4139 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4140 int is_write, target_phys_addr_t access_len)
4142 if (buffer != bounce.buffer) {
4143 if (is_write) {
4144 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4145 while (access_len) {
4146 unsigned l;
4147 l = TARGET_PAGE_SIZE;
4148 if (l > access_len)
4149 l = access_len;
4150 if (!cpu_physical_memory_is_dirty(addr1)) {
4151 /* invalidate code */
4152 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4153 /* set dirty bit */
4154 cpu_physical_memory_set_dirty_flags(
4155 addr1, (0xff & ~CODE_DIRTY_FLAG));
4157 addr1 += l;
4158 access_len -= l;
4161 if (xen_enabled()) {
4162 xen_invalidate_map_cache_entry(buffer);
4164 return;
4166 if (is_write) {
4167 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4169 qemu_vfree(bounce.buffer);
4170 bounce.buffer = NULL;
4171 cpu_notify_map_clients();
4174 /* warning: addr must be aligned */
4175 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4176 enum device_endian endian)
4178 int io_index;
4179 uint8_t *ptr;
4180 uint32_t val;
4181 unsigned long pd;
4182 PhysPageDesc *p;
4184 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4185 if (!p) {
4186 pd = IO_MEM_UNASSIGNED;
4187 } else {
4188 pd = p->phys_offset;
4191 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4192 !(pd & IO_MEM_ROMD)) {
4193 /* I/O case */
4194 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4195 if (p)
4196 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4197 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4198 #if defined(TARGET_WORDS_BIGENDIAN)
4199 if (endian == DEVICE_LITTLE_ENDIAN) {
4200 val = bswap32(val);
4202 #else
4203 if (endian == DEVICE_BIG_ENDIAN) {
4204 val = bswap32(val);
4206 #endif
4207 } else {
4208 /* RAM case */
4209 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4210 (addr & ~TARGET_PAGE_MASK);
4211 switch (endian) {
4212 case DEVICE_LITTLE_ENDIAN:
4213 val = ldl_le_p(ptr);
4214 break;
4215 case DEVICE_BIG_ENDIAN:
4216 val = ldl_be_p(ptr);
4217 break;
4218 default:
4219 val = ldl_p(ptr);
4220 break;
4223 return val;
4226 uint32_t ldl_phys(target_phys_addr_t addr)
4228 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4231 uint32_t ldl_le_phys(target_phys_addr_t addr)
4233 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4236 uint32_t ldl_be_phys(target_phys_addr_t addr)
4238 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4241 /* warning: addr must be aligned */
4242 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4243 enum device_endian endian)
4245 int io_index;
4246 uint8_t *ptr;
4247 uint64_t val;
4248 unsigned long pd;
4249 PhysPageDesc *p;
4251 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4252 if (!p) {
4253 pd = IO_MEM_UNASSIGNED;
4254 } else {
4255 pd = p->phys_offset;
4258 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4259 !(pd & IO_MEM_ROMD)) {
4260 /* I/O case */
4261 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4262 if (p)
4263 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4265 /* XXX This is broken when device endian != cpu endian.
4266 Fix and add "endian" variable check */
4267 #ifdef TARGET_WORDS_BIGENDIAN
4268 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4269 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4270 #else
4271 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4272 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4273 #endif
4274 } else {
4275 /* RAM case */
4276 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4277 (addr & ~TARGET_PAGE_MASK);
4278 switch (endian) {
4279 case DEVICE_LITTLE_ENDIAN:
4280 val = ldq_le_p(ptr);
4281 break;
4282 case DEVICE_BIG_ENDIAN:
4283 val = ldq_be_p(ptr);
4284 break;
4285 default:
4286 val = ldq_p(ptr);
4287 break;
4290 return val;
4293 uint64_t ldq_phys(target_phys_addr_t addr)
4295 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4298 uint64_t ldq_le_phys(target_phys_addr_t addr)
4300 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4303 uint64_t ldq_be_phys(target_phys_addr_t addr)
4305 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4308 /* XXX: optimize */
4309 uint32_t ldub_phys(target_phys_addr_t addr)
4311 uint8_t val;
4312 cpu_physical_memory_read(addr, &val, 1);
4313 return val;
4316 /* warning: addr must be aligned */
4317 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4318 enum device_endian endian)
4320 int io_index;
4321 uint8_t *ptr;
4322 uint64_t val;
4323 unsigned long pd;
4324 PhysPageDesc *p;
4326 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4327 if (!p) {
4328 pd = IO_MEM_UNASSIGNED;
4329 } else {
4330 pd = p->phys_offset;
4333 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4334 !(pd & IO_MEM_ROMD)) {
4335 /* I/O case */
4336 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4337 if (p)
4338 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4339 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4340 #if defined(TARGET_WORDS_BIGENDIAN)
4341 if (endian == DEVICE_LITTLE_ENDIAN) {
4342 val = bswap16(val);
4344 #else
4345 if (endian == DEVICE_BIG_ENDIAN) {
4346 val = bswap16(val);
4348 #endif
4349 } else {
4350 /* RAM case */
4351 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4352 (addr & ~TARGET_PAGE_MASK);
4353 switch (endian) {
4354 case DEVICE_LITTLE_ENDIAN:
4355 val = lduw_le_p(ptr);
4356 break;
4357 case DEVICE_BIG_ENDIAN:
4358 val = lduw_be_p(ptr);
4359 break;
4360 default:
4361 val = lduw_p(ptr);
4362 break;
4365 return val;
4368 uint32_t lduw_phys(target_phys_addr_t addr)
4370 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4373 uint32_t lduw_le_phys(target_phys_addr_t addr)
4375 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4378 uint32_t lduw_be_phys(target_phys_addr_t addr)
4380 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4383 /* warning: addr must be aligned. The ram page is not masked as dirty
4384 and the code inside is not invalidated. It is useful if the dirty
4385 bits are used to track modified PTEs */
4386 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4388 int io_index;
4389 uint8_t *ptr;
4390 unsigned long pd;
4391 PhysPageDesc *p;
4393 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4394 if (!p) {
4395 pd = IO_MEM_UNASSIGNED;
4396 } else {
4397 pd = p->phys_offset;
4400 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4401 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4402 if (p)
4403 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4404 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4405 } else {
4406 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4407 ptr = qemu_get_ram_ptr(addr1);
4408 stl_p(ptr, val);
4410 if (unlikely(in_migration)) {
4411 if (!cpu_physical_memory_is_dirty(addr1)) {
4412 /* invalidate code */
4413 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4414 /* set dirty bit */
4415 cpu_physical_memory_set_dirty_flags(
4416 addr1, (0xff & ~CODE_DIRTY_FLAG));
4422 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4424 int io_index;
4425 uint8_t *ptr;
4426 unsigned long pd;
4427 PhysPageDesc *p;
4429 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4430 if (!p) {
4431 pd = IO_MEM_UNASSIGNED;
4432 } else {
4433 pd = p->phys_offset;
4436 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4437 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4438 if (p)
4439 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4440 #ifdef TARGET_WORDS_BIGENDIAN
4441 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4442 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4443 #else
4444 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4445 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4446 #endif
4447 } else {
4448 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4449 (addr & ~TARGET_PAGE_MASK);
4450 stq_p(ptr, val);
4454 /* warning: addr must be aligned */
4455 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4456 enum device_endian endian)
4458 int io_index;
4459 uint8_t *ptr;
4460 unsigned long pd;
4461 PhysPageDesc *p;
4463 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4464 if (!p) {
4465 pd = IO_MEM_UNASSIGNED;
4466 } else {
4467 pd = p->phys_offset;
4470 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4471 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4472 if (p)
4473 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4474 #if defined(TARGET_WORDS_BIGENDIAN)
4475 if (endian == DEVICE_LITTLE_ENDIAN) {
4476 val = bswap32(val);
4478 #else
4479 if (endian == DEVICE_BIG_ENDIAN) {
4480 val = bswap32(val);
4482 #endif
4483 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4484 } else {
4485 unsigned long addr1;
4486 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4487 /* RAM case */
4488 ptr = qemu_get_ram_ptr(addr1);
4489 switch (endian) {
4490 case DEVICE_LITTLE_ENDIAN:
4491 stl_le_p(ptr, val);
4492 break;
4493 case DEVICE_BIG_ENDIAN:
4494 stl_be_p(ptr, val);
4495 break;
4496 default:
4497 stl_p(ptr, val);
4498 break;
4500 if (!cpu_physical_memory_is_dirty(addr1)) {
4501 /* invalidate code */
4502 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4503 /* set dirty bit */
4504 cpu_physical_memory_set_dirty_flags(addr1,
4505 (0xff & ~CODE_DIRTY_FLAG));
4510 void stl_phys(target_phys_addr_t addr, uint32_t val)
4512 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4515 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4517 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4520 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4522 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4525 /* XXX: optimize */
4526 void stb_phys(target_phys_addr_t addr, uint32_t val)
4528 uint8_t v = val;
4529 cpu_physical_memory_write(addr, &v, 1);
4532 /* warning: addr must be aligned */
4533 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4534 enum device_endian endian)
4536 int io_index;
4537 uint8_t *ptr;
4538 unsigned long pd;
4539 PhysPageDesc *p;
4541 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4542 if (!p) {
4543 pd = IO_MEM_UNASSIGNED;
4544 } else {
4545 pd = p->phys_offset;
4548 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4549 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4550 if (p)
4551 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4552 #if defined(TARGET_WORDS_BIGENDIAN)
4553 if (endian == DEVICE_LITTLE_ENDIAN) {
4554 val = bswap16(val);
4556 #else
4557 if (endian == DEVICE_BIG_ENDIAN) {
4558 val = bswap16(val);
4560 #endif
4561 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4562 } else {
4563 unsigned long addr1;
4564 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4565 /* RAM case */
4566 ptr = qemu_get_ram_ptr(addr1);
4567 switch (endian) {
4568 case DEVICE_LITTLE_ENDIAN:
4569 stw_le_p(ptr, val);
4570 break;
4571 case DEVICE_BIG_ENDIAN:
4572 stw_be_p(ptr, val);
4573 break;
4574 default:
4575 stw_p(ptr, val);
4576 break;
4578 if (!cpu_physical_memory_is_dirty(addr1)) {
4579 /* invalidate code */
4580 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4581 /* set dirty bit */
4582 cpu_physical_memory_set_dirty_flags(addr1,
4583 (0xff & ~CODE_DIRTY_FLAG));
4588 void stw_phys(target_phys_addr_t addr, uint32_t val)
4590 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4593 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4595 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4598 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4600 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4603 /* XXX: optimize */
4604 void stq_phys(target_phys_addr_t addr, uint64_t val)
4606 val = tswap64(val);
4607 cpu_physical_memory_write(addr, &val, 8);
4610 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4612 val = cpu_to_le64(val);
4613 cpu_physical_memory_write(addr, &val, 8);
4616 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4618 val = cpu_to_be64(val);
4619 cpu_physical_memory_write(addr, &val, 8);
4622 /* virtual memory access for debug (includes writing to ROM) */
4623 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4624 uint8_t *buf, int len, int is_write)
4626 int l;
4627 target_phys_addr_t phys_addr;
4628 target_ulong page;
4630 while (len > 0) {
4631 page = addr & TARGET_PAGE_MASK;
4632 phys_addr = cpu_get_phys_page_debug(env, page);
4633 /* if no physical page mapped, return an error */
4634 if (phys_addr == -1)
4635 return -1;
4636 l = (page + TARGET_PAGE_SIZE) - addr;
4637 if (l > len)
4638 l = len;
4639 phys_addr += (addr & ~TARGET_PAGE_MASK);
4640 if (is_write)
4641 cpu_physical_memory_write_rom(phys_addr, buf, l);
4642 else
4643 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4644 len -= l;
4645 buf += l;
4646 addr += l;
4648 return 0;
4650 #endif
4652 /* in deterministic execution mode, instructions doing device I/Os
4653 must be at the end of the TB */
4654 void cpu_io_recompile(CPUState *env, void *retaddr)
4656 TranslationBlock *tb;
4657 uint32_t n, cflags;
4658 target_ulong pc, cs_base;
4659 uint64_t flags;
4661 tb = tb_find_pc((unsigned long)retaddr);
4662 if (!tb) {
4663 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4664 retaddr);
4666 n = env->icount_decr.u16.low + tb->icount;
4667 cpu_restore_state(tb, env, (unsigned long)retaddr);
4668 /* Calculate how many instructions had been executed before the fault
4669 occurred. */
4670 n = n - env->icount_decr.u16.low;
4671 /* Generate a new TB ending on the I/O insn. */
4672 n++;
4673 /* On MIPS and SH, delay slot instructions can only be restarted if
4674 they were already the first instruction in the TB. If this is not
4675 the first instruction in a TB then re-execute the preceding
4676 branch. */
4677 #if defined(TARGET_MIPS)
4678 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4679 env->active_tc.PC -= 4;
4680 env->icount_decr.u16.low++;
4681 env->hflags &= ~MIPS_HFLAG_BMASK;
4683 #elif defined(TARGET_SH4)
4684 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4685 && n > 1) {
4686 env->pc -= 2;
4687 env->icount_decr.u16.low++;
4688 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4690 #endif
4691 /* This should never happen. */
4692 if (n > CF_COUNT_MASK)
4693 cpu_abort(env, "TB too big during recompile");
4695 cflags = n | CF_LAST_IO;
4696 pc = tb->pc;
4697 cs_base = tb->cs_base;
4698 flags = tb->flags;
4699 tb_phys_invalidate(tb, -1);
4700 /* FIXME: In theory this could raise an exception. In practice
4701 we have already translated the block once so it's probably ok. */
4702 tb_gen_code(env, pc, cs_base, flags, cflags);
4703 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4704 the first in the TB) then we end up generating a whole new TB and
4705 repeating the fault, which is horribly inefficient.
4706 Better would be to execute just this insn uncached, or generate a
4707 second new TB. */
4708 cpu_resume_from_signal(env, NULL);
4711 #if !defined(CONFIG_USER_ONLY)
4713 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4715 int i, target_code_size, max_target_code_size;
4716 int direct_jmp_count, direct_jmp2_count, cross_page;
4717 TranslationBlock *tb;
4719 target_code_size = 0;
4720 max_target_code_size = 0;
4721 cross_page = 0;
4722 direct_jmp_count = 0;
4723 direct_jmp2_count = 0;
4724 for(i = 0; i < nb_tbs; i++) {
4725 tb = &tbs[i];
4726 target_code_size += tb->size;
4727 if (tb->size > max_target_code_size)
4728 max_target_code_size = tb->size;
4729 if (tb->page_addr[1] != -1)
4730 cross_page++;
4731 if (tb->tb_next_offset[0] != 0xffff) {
4732 direct_jmp_count++;
4733 if (tb->tb_next_offset[1] != 0xffff) {
4734 direct_jmp2_count++;
4738 /* XXX: avoid using doubles ? */
4739 cpu_fprintf(f, "Translation buffer state:\n");
4740 cpu_fprintf(f, "gen code size %td/%ld\n",
4741 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4742 cpu_fprintf(f, "TB count %d/%d\n",
4743 nb_tbs, code_gen_max_blocks);
4744 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4745 nb_tbs ? target_code_size / nb_tbs : 0,
4746 max_target_code_size);
4747 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4748 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4749 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4750 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4751 cross_page,
4752 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4753 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4754 direct_jmp_count,
4755 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4756 direct_jmp2_count,
4757 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4758 cpu_fprintf(f, "\nStatistics:\n");
4759 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4760 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4761 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4762 tcg_dump_info(f, cpu_fprintf);
4765 #define MMUSUFFIX _cmmu
4766 #undef GETPC
4767 #define GETPC() NULL
4768 #define env cpu_single_env
4769 #define SOFTMMU_CODE_ACCESS
4771 #define SHIFT 0
4772 #include "softmmu_template.h"
4774 #define SHIFT 1
4775 #include "softmmu_template.h"
4777 #define SHIFT 2
4778 #include "softmmu_template.h"
4780 #define SHIFT 3
4781 #include "softmmu_template.h"
4783 #undef env
4785 #endif