hw/9pfs: Add support to use named socket for proxy FS
[qemu/v9fs.git] / exec.c
blobd8b21801708f8069d42edaf449db17f8fdf75b60
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 //#define DEBUG_TB_INVALIDATE
61 //#define DEBUG_FLUSH
62 //#define DEBUG_TLB
63 //#define DEBUG_UNASSIGNED
65 /* make various TB consistency checks */
66 //#define DEBUG_TB_CHECK
67 //#define DEBUG_TLB_CHECK
69 //#define DEBUG_IOPORT
70 //#define DEBUG_SUBPAGE
72 #if !defined(CONFIG_USER_ONLY)
73 /* TB consistency checks only implemented for usermode emulation. */
74 #undef DEBUG_TB_CHECK
75 #endif
77 #define SMC_BITMAP_USE_THRESHOLD 10
79 static TranslationBlock *tbs;
80 static int code_gen_max_blocks;
81 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
82 static int nb_tbs;
83 /* any access to the tbs or the page table must use this lock */
84 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
86 #if defined(__arm__) || defined(__sparc_v9__)
87 /* The prologue must be reachable with a direct jump. ARM and Sparc64
88 have limited branch ranges (possibly also PPC) so place it in a
89 section close to code segment. */
90 #define code_gen_section \
91 __attribute__((__section__(".gen_code"))) \
92 __attribute__((aligned (32)))
93 #elif defined(_WIN32)
94 /* Maximum alignment for Win32 is 16. */
95 #define code_gen_section \
96 __attribute__((aligned (16)))
97 #else
98 #define code_gen_section \
99 __attribute__((aligned (32)))
100 #endif
102 uint8_t code_gen_prologue[1024] code_gen_section;
103 static uint8_t *code_gen_buffer;
104 static unsigned long code_gen_buffer_size;
105 /* threshold to flush the translated code buffer */
106 static unsigned long code_gen_buffer_max_size;
107 static uint8_t *code_gen_ptr;
109 #if !defined(CONFIG_USER_ONLY)
110 int phys_ram_fd;
111 static int in_migration;
113 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
115 static MemoryRegion *system_memory;
116 static MemoryRegion *system_io;
118 #endif
120 CPUState *first_cpu;
121 /* current CPU in the current thread. It is only valid inside
122 cpu_exec() */
123 DEFINE_TLS(CPUState *,cpu_single_env);
124 /* 0 = Do not count executed instructions.
125 1 = Precise instruction counting.
126 2 = Adaptive rate instruction counting. */
127 int use_icount = 0;
129 typedef struct PageDesc {
130 /* list of TBs intersecting this ram page */
131 TranslationBlock *first_tb;
132 /* in order to optimize self modifying code, we count the number
133 of lookups we do to a given page to use a bitmap */
134 unsigned int code_write_count;
135 uint8_t *code_bitmap;
136 #if defined(CONFIG_USER_ONLY)
137 unsigned long flags;
138 #endif
139 } PageDesc;
141 /* In system mode we want L1_MAP to be based on ram offsets,
142 while in user mode we want it to be based on virtual addresses. */
143 #if !defined(CONFIG_USER_ONLY)
144 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
145 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
146 #else
147 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
148 #endif
149 #else
150 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
151 #endif
153 /* Size of the L2 (and L3, etc) page tables. */
154 #define L2_BITS 10
155 #define L2_SIZE (1 << L2_BITS)
157 /* The bits remaining after N lower levels of page tables. */
158 #define P_L1_BITS_REM \
159 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
160 #define V_L1_BITS_REM \
161 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
163 /* Size of the L1 page table. Avoid silly small sizes. */
164 #if P_L1_BITS_REM < 4
165 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
166 #else
167 #define P_L1_BITS P_L1_BITS_REM
168 #endif
170 #if V_L1_BITS_REM < 4
171 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
172 #else
173 #define V_L1_BITS V_L1_BITS_REM
174 #endif
176 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
177 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
179 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
180 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
182 unsigned long qemu_real_host_page_size;
183 unsigned long qemu_host_page_size;
184 unsigned long qemu_host_page_mask;
186 /* This is a multi-level map on the virtual address space.
187 The bottom level has pointers to PageDesc. */
188 static void *l1_map[V_L1_SIZE];
190 #if !defined(CONFIG_USER_ONLY)
191 typedef struct PhysPageDesc {
192 /* offset in host memory of the page + io_index in the low bits */
193 ram_addr_t phys_offset;
194 ram_addr_t region_offset;
195 } PhysPageDesc;
197 /* This is a multi-level map on the physical address space.
198 The bottom level has pointers to PhysPageDesc. */
199 static void *l1_phys_map[P_L1_SIZE];
201 static void io_mem_init(void);
202 static void memory_map_init(void);
204 /* io memory support */
205 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
206 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
207 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
208 static char io_mem_used[IO_MEM_NB_ENTRIES];
209 static int io_mem_watch;
210 #endif
212 /* log support */
213 #ifdef WIN32
214 static const char *logfilename = "qemu.log";
215 #else
216 static const char *logfilename = "/tmp/qemu.log";
217 #endif
218 FILE *logfile;
219 int loglevel;
220 static int log_append = 0;
222 /* statistics */
223 #if !defined(CONFIG_USER_ONLY)
224 static int tlb_flush_count;
225 #endif
226 static int tb_flush_count;
227 static int tb_phys_invalidate_count;
229 #ifdef _WIN32
230 static void map_exec(void *addr, long size)
232 DWORD old_protect;
233 VirtualProtect(addr, size,
234 PAGE_EXECUTE_READWRITE, &old_protect);
237 #else
238 static void map_exec(void *addr, long size)
240 unsigned long start, end, page_size;
242 page_size = getpagesize();
243 start = (unsigned long)addr;
244 start &= ~(page_size - 1);
246 end = (unsigned long)addr + size;
247 end += page_size - 1;
248 end &= ~(page_size - 1);
250 mprotect((void *)start, end - start,
251 PROT_READ | PROT_WRITE | PROT_EXEC);
253 #endif
255 static void page_init(void)
257 /* NOTE: we can always suppose that qemu_host_page_size >=
258 TARGET_PAGE_SIZE */
259 #ifdef _WIN32
261 SYSTEM_INFO system_info;
263 GetSystemInfo(&system_info);
264 qemu_real_host_page_size = system_info.dwPageSize;
266 #else
267 qemu_real_host_page_size = getpagesize();
268 #endif
269 if (qemu_host_page_size == 0)
270 qemu_host_page_size = qemu_real_host_page_size;
271 if (qemu_host_page_size < TARGET_PAGE_SIZE)
272 qemu_host_page_size = TARGET_PAGE_SIZE;
273 qemu_host_page_mask = ~(qemu_host_page_size - 1);
275 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
277 #ifdef HAVE_KINFO_GETVMMAP
278 struct kinfo_vmentry *freep;
279 int i, cnt;
281 freep = kinfo_getvmmap(getpid(), &cnt);
282 if (freep) {
283 mmap_lock();
284 for (i = 0; i < cnt; i++) {
285 unsigned long startaddr, endaddr;
287 startaddr = freep[i].kve_start;
288 endaddr = freep[i].kve_end;
289 if (h2g_valid(startaddr)) {
290 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
292 if (h2g_valid(endaddr)) {
293 endaddr = h2g(endaddr);
294 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
295 } else {
296 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
297 endaddr = ~0ul;
298 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
299 #endif
303 free(freep);
304 mmap_unlock();
306 #else
307 FILE *f;
309 last_brk = (unsigned long)sbrk(0);
311 f = fopen("/compat/linux/proc/self/maps", "r");
312 if (f) {
313 mmap_lock();
315 do {
316 unsigned long startaddr, endaddr;
317 int n;
319 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
321 if (n == 2 && h2g_valid(startaddr)) {
322 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
324 if (h2g_valid(endaddr)) {
325 endaddr = h2g(endaddr);
326 } else {
327 endaddr = ~0ul;
329 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
331 } while (!feof(f));
333 fclose(f);
334 mmap_unlock();
336 #endif
338 #endif
341 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
343 PageDesc *pd;
344 void **lp;
345 int i;
347 #if defined(CONFIG_USER_ONLY)
348 /* We can't use g_malloc because it may recurse into a locked mutex. */
349 # define ALLOC(P, SIZE) \
350 do { \
351 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
352 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
353 } while (0)
354 #else
355 # define ALLOC(P, SIZE) \
356 do { P = g_malloc0(SIZE); } while (0)
357 #endif
359 /* Level 1. Always allocated. */
360 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
362 /* Level 2..N-1. */
363 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
364 void **p = *lp;
366 if (p == NULL) {
367 if (!alloc) {
368 return NULL;
370 ALLOC(p, sizeof(void *) * L2_SIZE);
371 *lp = p;
374 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
377 pd = *lp;
378 if (pd == NULL) {
379 if (!alloc) {
380 return NULL;
382 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
383 *lp = pd;
386 #undef ALLOC
388 return pd + (index & (L2_SIZE - 1));
391 static inline PageDesc *page_find(tb_page_addr_t index)
393 return page_find_alloc(index, 0);
396 #if !defined(CONFIG_USER_ONLY)
397 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
399 PhysPageDesc *pd;
400 void **lp;
401 int i;
403 /* Level 1. Always allocated. */
404 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
406 /* Level 2..N-1. */
407 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
408 void **p = *lp;
409 if (p == NULL) {
410 if (!alloc) {
411 return NULL;
413 *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
415 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
418 pd = *lp;
419 if (pd == NULL) {
420 int i;
422 if (!alloc) {
423 return NULL;
426 *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
428 for (i = 0; i < L2_SIZE; i++) {
429 pd[i].phys_offset = IO_MEM_UNASSIGNED;
430 pd[i].region_offset = (index + i) << TARGET_PAGE_BITS;
434 return pd + (index & (L2_SIZE - 1));
437 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
439 return phys_page_find_alloc(index, 0);
442 static void tlb_protect_code(ram_addr_t ram_addr);
443 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
444 target_ulong vaddr);
445 #define mmap_lock() do { } while(0)
446 #define mmap_unlock() do { } while(0)
447 #endif
449 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
451 #if defined(CONFIG_USER_ONLY)
452 /* Currently it is not recommended to allocate big chunks of data in
453 user mode. It will change when a dedicated libc will be used */
454 #define USE_STATIC_CODE_GEN_BUFFER
455 #endif
457 #ifdef USE_STATIC_CODE_GEN_BUFFER
458 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
459 __attribute__((aligned (CODE_GEN_ALIGN)));
460 #endif
462 static void code_gen_alloc(unsigned long tb_size)
464 #ifdef USE_STATIC_CODE_GEN_BUFFER
465 code_gen_buffer = static_code_gen_buffer;
466 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
467 map_exec(code_gen_buffer, code_gen_buffer_size);
468 #else
469 code_gen_buffer_size = tb_size;
470 if (code_gen_buffer_size == 0) {
471 #if defined(CONFIG_USER_ONLY)
472 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
473 #else
474 /* XXX: needs adjustments */
475 code_gen_buffer_size = (unsigned long)(ram_size / 4);
476 #endif
478 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
479 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
480 /* The code gen buffer location may have constraints depending on
481 the host cpu and OS */
482 #if defined(__linux__)
484 int flags;
485 void *start = NULL;
487 flags = MAP_PRIVATE | MAP_ANONYMOUS;
488 #if defined(__x86_64__)
489 flags |= MAP_32BIT;
490 /* Cannot map more than that */
491 if (code_gen_buffer_size > (800 * 1024 * 1024))
492 code_gen_buffer_size = (800 * 1024 * 1024);
493 #elif defined(__sparc_v9__)
494 // Map the buffer below 2G, so we can use direct calls and branches
495 flags |= MAP_FIXED;
496 start = (void *) 0x60000000UL;
497 if (code_gen_buffer_size > (512 * 1024 * 1024))
498 code_gen_buffer_size = (512 * 1024 * 1024);
499 #elif defined(__arm__)
500 /* Map the buffer below 32M, so we can use direct calls and branches */
501 flags |= MAP_FIXED;
502 start = (void *) 0x01000000UL;
503 if (code_gen_buffer_size > 16 * 1024 * 1024)
504 code_gen_buffer_size = 16 * 1024 * 1024;
505 #elif defined(__s390x__)
506 /* Map the buffer so that we can use direct calls and branches. */
507 /* We have a +- 4GB range on the branches; leave some slop. */
508 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
509 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
511 start = (void *)0x90000000UL;
512 #endif
513 code_gen_buffer = mmap(start, code_gen_buffer_size,
514 PROT_WRITE | PROT_READ | PROT_EXEC,
515 flags, -1, 0);
516 if (code_gen_buffer == MAP_FAILED) {
517 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
518 exit(1);
521 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
522 || defined(__DragonFly__) || defined(__OpenBSD__) \
523 || defined(__NetBSD__)
525 int flags;
526 void *addr = NULL;
527 flags = MAP_PRIVATE | MAP_ANONYMOUS;
528 #if defined(__x86_64__)
529 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
530 * 0x40000000 is free */
531 flags |= MAP_FIXED;
532 addr = (void *)0x40000000;
533 /* Cannot map more than that */
534 if (code_gen_buffer_size > (800 * 1024 * 1024))
535 code_gen_buffer_size = (800 * 1024 * 1024);
536 #elif defined(__sparc_v9__)
537 // Map the buffer below 2G, so we can use direct calls and branches
538 flags |= MAP_FIXED;
539 addr = (void *) 0x60000000UL;
540 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
541 code_gen_buffer_size = (512 * 1024 * 1024);
543 #endif
544 code_gen_buffer = mmap(addr, code_gen_buffer_size,
545 PROT_WRITE | PROT_READ | PROT_EXEC,
546 flags, -1, 0);
547 if (code_gen_buffer == MAP_FAILED) {
548 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
549 exit(1);
552 #else
553 code_gen_buffer = g_malloc(code_gen_buffer_size);
554 map_exec(code_gen_buffer, code_gen_buffer_size);
555 #endif
556 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
557 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
558 code_gen_buffer_max_size = code_gen_buffer_size -
559 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
560 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
561 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
564 /* Must be called before using the QEMU cpus. 'tb_size' is the size
565 (in bytes) allocated to the translation buffer. Zero means default
566 size. */
567 void tcg_exec_init(unsigned long tb_size)
569 cpu_gen_init();
570 code_gen_alloc(tb_size);
571 code_gen_ptr = code_gen_buffer;
572 page_init();
573 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
574 /* There's no guest base to take into account, so go ahead and
575 initialize the prologue now. */
576 tcg_prologue_init(&tcg_ctx);
577 #endif
580 bool tcg_enabled(void)
582 return code_gen_buffer != NULL;
585 void cpu_exec_init_all(void)
587 #if !defined(CONFIG_USER_ONLY)
588 memory_map_init();
589 io_mem_init();
590 #endif
593 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
595 static int cpu_common_post_load(void *opaque, int version_id)
597 CPUState *env = opaque;
599 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
600 version_id is increased. */
601 env->interrupt_request &= ~0x01;
602 tlb_flush(env, 1);
604 return 0;
607 static const VMStateDescription vmstate_cpu_common = {
608 .name = "cpu_common",
609 .version_id = 1,
610 .minimum_version_id = 1,
611 .minimum_version_id_old = 1,
612 .post_load = cpu_common_post_load,
613 .fields = (VMStateField []) {
614 VMSTATE_UINT32(halted, CPUState),
615 VMSTATE_UINT32(interrupt_request, CPUState),
616 VMSTATE_END_OF_LIST()
619 #endif
621 CPUState *qemu_get_cpu(int cpu)
623 CPUState *env = first_cpu;
625 while (env) {
626 if (env->cpu_index == cpu)
627 break;
628 env = env->next_cpu;
631 return env;
634 void cpu_exec_init(CPUState *env)
636 CPUState **penv;
637 int cpu_index;
639 #if defined(CONFIG_USER_ONLY)
640 cpu_list_lock();
641 #endif
642 env->next_cpu = NULL;
643 penv = &first_cpu;
644 cpu_index = 0;
645 while (*penv != NULL) {
646 penv = &(*penv)->next_cpu;
647 cpu_index++;
649 env->cpu_index = cpu_index;
650 env->numa_node = 0;
651 QTAILQ_INIT(&env->breakpoints);
652 QTAILQ_INIT(&env->watchpoints);
653 #ifndef CONFIG_USER_ONLY
654 env->thread_id = qemu_get_thread_id();
655 #endif
656 *penv = env;
657 #if defined(CONFIG_USER_ONLY)
658 cpu_list_unlock();
659 #endif
660 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
661 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
662 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
663 cpu_save, cpu_load, env);
664 #endif
667 /* Allocate a new translation block. Flush the translation buffer if
668 too many translation blocks or too much generated code. */
669 static TranslationBlock *tb_alloc(target_ulong pc)
671 TranslationBlock *tb;
673 if (nb_tbs >= code_gen_max_blocks ||
674 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
675 return NULL;
676 tb = &tbs[nb_tbs++];
677 tb->pc = pc;
678 tb->cflags = 0;
679 return tb;
682 void tb_free(TranslationBlock *tb)
684 /* In practice this is mostly used for single use temporary TB
685 Ignore the hard cases and just back up if this TB happens to
686 be the last one generated. */
687 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
688 code_gen_ptr = tb->tc_ptr;
689 nb_tbs--;
693 static inline void invalidate_page_bitmap(PageDesc *p)
695 if (p->code_bitmap) {
696 g_free(p->code_bitmap);
697 p->code_bitmap = NULL;
699 p->code_write_count = 0;
702 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
704 static void page_flush_tb_1 (int level, void **lp)
706 int i;
708 if (*lp == NULL) {
709 return;
711 if (level == 0) {
712 PageDesc *pd = *lp;
713 for (i = 0; i < L2_SIZE; ++i) {
714 pd[i].first_tb = NULL;
715 invalidate_page_bitmap(pd + i);
717 } else {
718 void **pp = *lp;
719 for (i = 0; i < L2_SIZE; ++i) {
720 page_flush_tb_1 (level - 1, pp + i);
725 static void page_flush_tb(void)
727 int i;
728 for (i = 0; i < V_L1_SIZE; i++) {
729 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
733 /* flush all the translation blocks */
734 /* XXX: tb_flush is currently not thread safe */
735 void tb_flush(CPUState *env1)
737 CPUState *env;
738 #if defined(DEBUG_FLUSH)
739 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
740 (unsigned long)(code_gen_ptr - code_gen_buffer),
741 nb_tbs, nb_tbs > 0 ?
742 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
743 #endif
744 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
745 cpu_abort(env1, "Internal error: code buffer overflow\n");
747 nb_tbs = 0;
749 for(env = first_cpu; env != NULL; env = env->next_cpu) {
750 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
753 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
754 page_flush_tb();
756 code_gen_ptr = code_gen_buffer;
757 /* XXX: flush processor icache at this point if cache flush is
758 expensive */
759 tb_flush_count++;
762 #ifdef DEBUG_TB_CHECK
764 static void tb_invalidate_check(target_ulong address)
766 TranslationBlock *tb;
767 int i;
768 address &= TARGET_PAGE_MASK;
769 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
770 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
771 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
772 address >= tb->pc + tb->size)) {
773 printf("ERROR invalidate: address=" TARGET_FMT_lx
774 " PC=%08lx size=%04x\n",
775 address, (long)tb->pc, tb->size);
781 /* verify that all the pages have correct rights for code */
782 static void tb_page_check(void)
784 TranslationBlock *tb;
785 int i, flags1, flags2;
787 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
788 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
789 flags1 = page_get_flags(tb->pc);
790 flags2 = page_get_flags(tb->pc + tb->size - 1);
791 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
792 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
793 (long)tb->pc, tb->size, flags1, flags2);
799 #endif
801 /* invalidate one TB */
802 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
803 int next_offset)
805 TranslationBlock *tb1;
806 for(;;) {
807 tb1 = *ptb;
808 if (tb1 == tb) {
809 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
810 break;
812 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
816 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
818 TranslationBlock *tb1;
819 unsigned int n1;
821 for(;;) {
822 tb1 = *ptb;
823 n1 = (long)tb1 & 3;
824 tb1 = (TranslationBlock *)((long)tb1 & ~3);
825 if (tb1 == tb) {
826 *ptb = tb1->page_next[n1];
827 break;
829 ptb = &tb1->page_next[n1];
833 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
835 TranslationBlock *tb1, **ptb;
836 unsigned int n1;
838 ptb = &tb->jmp_next[n];
839 tb1 = *ptb;
840 if (tb1) {
841 /* find tb(n) in circular list */
842 for(;;) {
843 tb1 = *ptb;
844 n1 = (long)tb1 & 3;
845 tb1 = (TranslationBlock *)((long)tb1 & ~3);
846 if (n1 == n && tb1 == tb)
847 break;
848 if (n1 == 2) {
849 ptb = &tb1->jmp_first;
850 } else {
851 ptb = &tb1->jmp_next[n1];
854 /* now we can suppress tb(n) from the list */
855 *ptb = tb->jmp_next[n];
857 tb->jmp_next[n] = NULL;
861 /* reset the jump entry 'n' of a TB so that it is not chained to
862 another TB */
863 static inline void tb_reset_jump(TranslationBlock *tb, int n)
865 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
868 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
870 CPUState *env;
871 PageDesc *p;
872 unsigned int h, n1;
873 tb_page_addr_t phys_pc;
874 TranslationBlock *tb1, *tb2;
876 /* remove the TB from the hash list */
877 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
878 h = tb_phys_hash_func(phys_pc);
879 tb_remove(&tb_phys_hash[h], tb,
880 offsetof(TranslationBlock, phys_hash_next));
882 /* remove the TB from the page list */
883 if (tb->page_addr[0] != page_addr) {
884 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
885 tb_page_remove(&p->first_tb, tb);
886 invalidate_page_bitmap(p);
888 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
889 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
890 tb_page_remove(&p->first_tb, tb);
891 invalidate_page_bitmap(p);
894 tb_invalidated_flag = 1;
896 /* remove the TB from the hash list */
897 h = tb_jmp_cache_hash_func(tb->pc);
898 for(env = first_cpu; env != NULL; env = env->next_cpu) {
899 if (env->tb_jmp_cache[h] == tb)
900 env->tb_jmp_cache[h] = NULL;
903 /* suppress this TB from the two jump lists */
904 tb_jmp_remove(tb, 0);
905 tb_jmp_remove(tb, 1);
907 /* suppress any remaining jumps to this TB */
908 tb1 = tb->jmp_first;
909 for(;;) {
910 n1 = (long)tb1 & 3;
911 if (n1 == 2)
912 break;
913 tb1 = (TranslationBlock *)((long)tb1 & ~3);
914 tb2 = tb1->jmp_next[n1];
915 tb_reset_jump(tb1, n1);
916 tb1->jmp_next[n1] = NULL;
917 tb1 = tb2;
919 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
921 tb_phys_invalidate_count++;
924 static inline void set_bits(uint8_t *tab, int start, int len)
926 int end, mask, end1;
928 end = start + len;
929 tab += start >> 3;
930 mask = 0xff << (start & 7);
931 if ((start & ~7) == (end & ~7)) {
932 if (start < end) {
933 mask &= ~(0xff << (end & 7));
934 *tab |= mask;
936 } else {
937 *tab++ |= mask;
938 start = (start + 8) & ~7;
939 end1 = end & ~7;
940 while (start < end1) {
941 *tab++ = 0xff;
942 start += 8;
944 if (start < end) {
945 mask = ~(0xff << (end & 7));
946 *tab |= mask;
951 static void build_page_bitmap(PageDesc *p)
953 int n, tb_start, tb_end;
954 TranslationBlock *tb;
956 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
958 tb = p->first_tb;
959 while (tb != NULL) {
960 n = (long)tb & 3;
961 tb = (TranslationBlock *)((long)tb & ~3);
962 /* NOTE: this is subtle as a TB may span two physical pages */
963 if (n == 0) {
964 /* NOTE: tb_end may be after the end of the page, but
965 it is not a problem */
966 tb_start = tb->pc & ~TARGET_PAGE_MASK;
967 tb_end = tb_start + tb->size;
968 if (tb_end > TARGET_PAGE_SIZE)
969 tb_end = TARGET_PAGE_SIZE;
970 } else {
971 tb_start = 0;
972 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
974 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
975 tb = tb->page_next[n];
979 TranslationBlock *tb_gen_code(CPUState *env,
980 target_ulong pc, target_ulong cs_base,
981 int flags, int cflags)
983 TranslationBlock *tb;
984 uint8_t *tc_ptr;
985 tb_page_addr_t phys_pc, phys_page2;
986 target_ulong virt_page2;
987 int code_gen_size;
989 phys_pc = get_page_addr_code(env, pc);
990 tb = tb_alloc(pc);
991 if (!tb) {
992 /* flush must be done */
993 tb_flush(env);
994 /* cannot fail at this point */
995 tb = tb_alloc(pc);
996 /* Don't forget to invalidate previous TB info. */
997 tb_invalidated_flag = 1;
999 tc_ptr = code_gen_ptr;
1000 tb->tc_ptr = tc_ptr;
1001 tb->cs_base = cs_base;
1002 tb->flags = flags;
1003 tb->cflags = cflags;
1004 cpu_gen_code(env, tb, &code_gen_size);
1005 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1007 /* check next page if needed */
1008 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1009 phys_page2 = -1;
1010 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1011 phys_page2 = get_page_addr_code(env, virt_page2);
1013 tb_link_page(tb, phys_pc, phys_page2);
1014 return tb;
1017 /* invalidate all TBs which intersect with the target physical page
1018 starting in range [start;end[. NOTE: start and end must refer to
1019 the same physical page. 'is_cpu_write_access' should be true if called
1020 from a real cpu write access: the virtual CPU will exit the current
1021 TB if code is modified inside this TB. */
1022 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1023 int is_cpu_write_access)
1025 TranslationBlock *tb, *tb_next, *saved_tb;
1026 CPUState *env = cpu_single_env;
1027 tb_page_addr_t tb_start, tb_end;
1028 PageDesc *p;
1029 int n;
1030 #ifdef TARGET_HAS_PRECISE_SMC
1031 int current_tb_not_found = is_cpu_write_access;
1032 TranslationBlock *current_tb = NULL;
1033 int current_tb_modified = 0;
1034 target_ulong current_pc = 0;
1035 target_ulong current_cs_base = 0;
1036 int current_flags = 0;
1037 #endif /* TARGET_HAS_PRECISE_SMC */
1039 p = page_find(start >> TARGET_PAGE_BITS);
1040 if (!p)
1041 return;
1042 if (!p->code_bitmap &&
1043 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1044 is_cpu_write_access) {
1045 /* build code bitmap */
1046 build_page_bitmap(p);
1049 /* we remove all the TBs in the range [start, end[ */
1050 /* XXX: see if in some cases it could be faster to invalidate all the code */
1051 tb = p->first_tb;
1052 while (tb != NULL) {
1053 n = (long)tb & 3;
1054 tb = (TranslationBlock *)((long)tb & ~3);
1055 tb_next = tb->page_next[n];
1056 /* NOTE: this is subtle as a TB may span two physical pages */
1057 if (n == 0) {
1058 /* NOTE: tb_end may be after the end of the page, but
1059 it is not a problem */
1060 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1061 tb_end = tb_start + tb->size;
1062 } else {
1063 tb_start = tb->page_addr[1];
1064 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1066 if (!(tb_end <= start || tb_start >= end)) {
1067 #ifdef TARGET_HAS_PRECISE_SMC
1068 if (current_tb_not_found) {
1069 current_tb_not_found = 0;
1070 current_tb = NULL;
1071 if (env->mem_io_pc) {
1072 /* now we have a real cpu fault */
1073 current_tb = tb_find_pc(env->mem_io_pc);
1076 if (current_tb == tb &&
1077 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1078 /* If we are modifying the current TB, we must stop
1079 its execution. We could be more precise by checking
1080 that the modification is after the current PC, but it
1081 would require a specialized function to partially
1082 restore the CPU state */
1084 current_tb_modified = 1;
1085 cpu_restore_state(current_tb, env, env->mem_io_pc);
1086 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1087 &current_flags);
1089 #endif /* TARGET_HAS_PRECISE_SMC */
1090 /* we need to do that to handle the case where a signal
1091 occurs while doing tb_phys_invalidate() */
1092 saved_tb = NULL;
1093 if (env) {
1094 saved_tb = env->current_tb;
1095 env->current_tb = NULL;
1097 tb_phys_invalidate(tb, -1);
1098 if (env) {
1099 env->current_tb = saved_tb;
1100 if (env->interrupt_request && env->current_tb)
1101 cpu_interrupt(env, env->interrupt_request);
1104 tb = tb_next;
1106 #if !defined(CONFIG_USER_ONLY)
1107 /* if no code remaining, no need to continue to use slow writes */
1108 if (!p->first_tb) {
1109 invalidate_page_bitmap(p);
1110 if (is_cpu_write_access) {
1111 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1114 #endif
1115 #ifdef TARGET_HAS_PRECISE_SMC
1116 if (current_tb_modified) {
1117 /* we generate a block containing just the instruction
1118 modifying the memory. It will ensure that it cannot modify
1119 itself */
1120 env->current_tb = NULL;
1121 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1122 cpu_resume_from_signal(env, NULL);
1124 #endif
1127 /* len must be <= 8 and start must be a multiple of len */
1128 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1130 PageDesc *p;
1131 int offset, b;
1132 #if 0
1133 if (1) {
1134 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1135 cpu_single_env->mem_io_vaddr, len,
1136 cpu_single_env->eip,
1137 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1139 #endif
1140 p = page_find(start >> TARGET_PAGE_BITS);
1141 if (!p)
1142 return;
1143 if (p->code_bitmap) {
1144 offset = start & ~TARGET_PAGE_MASK;
1145 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1146 if (b & ((1 << len) - 1))
1147 goto do_invalidate;
1148 } else {
1149 do_invalidate:
1150 tb_invalidate_phys_page_range(start, start + len, 1);
1154 #if !defined(CONFIG_SOFTMMU)
1155 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1156 unsigned long pc, void *puc)
1158 TranslationBlock *tb;
1159 PageDesc *p;
1160 int n;
1161 #ifdef TARGET_HAS_PRECISE_SMC
1162 TranslationBlock *current_tb = NULL;
1163 CPUState *env = cpu_single_env;
1164 int current_tb_modified = 0;
1165 target_ulong current_pc = 0;
1166 target_ulong current_cs_base = 0;
1167 int current_flags = 0;
1168 #endif
1170 addr &= TARGET_PAGE_MASK;
1171 p = page_find(addr >> TARGET_PAGE_BITS);
1172 if (!p)
1173 return;
1174 tb = p->first_tb;
1175 #ifdef TARGET_HAS_PRECISE_SMC
1176 if (tb && pc != 0) {
1177 current_tb = tb_find_pc(pc);
1179 #endif
1180 while (tb != NULL) {
1181 n = (long)tb & 3;
1182 tb = (TranslationBlock *)((long)tb & ~3);
1183 #ifdef TARGET_HAS_PRECISE_SMC
1184 if (current_tb == tb &&
1185 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1186 /* If we are modifying the current TB, we must stop
1187 its execution. We could be more precise by checking
1188 that the modification is after the current PC, but it
1189 would require a specialized function to partially
1190 restore the CPU state */
1192 current_tb_modified = 1;
1193 cpu_restore_state(current_tb, env, pc);
1194 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1195 &current_flags);
1197 #endif /* TARGET_HAS_PRECISE_SMC */
1198 tb_phys_invalidate(tb, addr);
1199 tb = tb->page_next[n];
1201 p->first_tb = NULL;
1202 #ifdef TARGET_HAS_PRECISE_SMC
1203 if (current_tb_modified) {
1204 /* we generate a block containing just the instruction
1205 modifying the memory. It will ensure that it cannot modify
1206 itself */
1207 env->current_tb = NULL;
1208 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1209 cpu_resume_from_signal(env, puc);
1211 #endif
1213 #endif
1215 /* add the tb in the target page and protect it if necessary */
1216 static inline void tb_alloc_page(TranslationBlock *tb,
1217 unsigned int n, tb_page_addr_t page_addr)
1219 PageDesc *p;
1220 #ifndef CONFIG_USER_ONLY
1221 bool page_already_protected;
1222 #endif
1224 tb->page_addr[n] = page_addr;
1225 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1226 tb->page_next[n] = p->first_tb;
1227 #ifndef CONFIG_USER_ONLY
1228 page_already_protected = p->first_tb != NULL;
1229 #endif
1230 p->first_tb = (TranslationBlock *)((long)tb | n);
1231 invalidate_page_bitmap(p);
1233 #if defined(TARGET_HAS_SMC) || 1
1235 #if defined(CONFIG_USER_ONLY)
1236 if (p->flags & PAGE_WRITE) {
1237 target_ulong addr;
1238 PageDesc *p2;
1239 int prot;
1241 /* force the host page as non writable (writes will have a
1242 page fault + mprotect overhead) */
1243 page_addr &= qemu_host_page_mask;
1244 prot = 0;
1245 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1246 addr += TARGET_PAGE_SIZE) {
1248 p2 = page_find (addr >> TARGET_PAGE_BITS);
1249 if (!p2)
1250 continue;
1251 prot |= p2->flags;
1252 p2->flags &= ~PAGE_WRITE;
1254 mprotect(g2h(page_addr), qemu_host_page_size,
1255 (prot & PAGE_BITS) & ~PAGE_WRITE);
1256 #ifdef DEBUG_TB_INVALIDATE
1257 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1258 page_addr);
1259 #endif
1261 #else
1262 /* if some code is already present, then the pages are already
1263 protected. So we handle the case where only the first TB is
1264 allocated in a physical page */
1265 if (!page_already_protected) {
1266 tlb_protect_code(page_addr);
1268 #endif
1270 #endif /* TARGET_HAS_SMC */
1273 /* add a new TB and link it to the physical page tables. phys_page2 is
1274 (-1) to indicate that only one page contains the TB. */
1275 void tb_link_page(TranslationBlock *tb,
1276 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1278 unsigned int h;
1279 TranslationBlock **ptb;
1281 /* Grab the mmap lock to stop another thread invalidating this TB
1282 before we are done. */
1283 mmap_lock();
1284 /* add in the physical hash table */
1285 h = tb_phys_hash_func(phys_pc);
1286 ptb = &tb_phys_hash[h];
1287 tb->phys_hash_next = *ptb;
1288 *ptb = tb;
1290 /* add in the page list */
1291 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1292 if (phys_page2 != -1)
1293 tb_alloc_page(tb, 1, phys_page2);
1294 else
1295 tb->page_addr[1] = -1;
1297 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1298 tb->jmp_next[0] = NULL;
1299 tb->jmp_next[1] = NULL;
1301 /* init original jump addresses */
1302 if (tb->tb_next_offset[0] != 0xffff)
1303 tb_reset_jump(tb, 0);
1304 if (tb->tb_next_offset[1] != 0xffff)
1305 tb_reset_jump(tb, 1);
1307 #ifdef DEBUG_TB_CHECK
1308 tb_page_check();
1309 #endif
1310 mmap_unlock();
1313 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1314 tb[1].tc_ptr. Return NULL if not found */
1315 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1317 int m_min, m_max, m;
1318 unsigned long v;
1319 TranslationBlock *tb;
1321 if (nb_tbs <= 0)
1322 return NULL;
1323 if (tc_ptr < (unsigned long)code_gen_buffer ||
1324 tc_ptr >= (unsigned long)code_gen_ptr)
1325 return NULL;
1326 /* binary search (cf Knuth) */
1327 m_min = 0;
1328 m_max = nb_tbs - 1;
1329 while (m_min <= m_max) {
1330 m = (m_min + m_max) >> 1;
1331 tb = &tbs[m];
1332 v = (unsigned long)tb->tc_ptr;
1333 if (v == tc_ptr)
1334 return tb;
1335 else if (tc_ptr < v) {
1336 m_max = m - 1;
1337 } else {
1338 m_min = m + 1;
1341 return &tbs[m_max];
1344 static void tb_reset_jump_recursive(TranslationBlock *tb);
1346 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1348 TranslationBlock *tb1, *tb_next, **ptb;
1349 unsigned int n1;
1351 tb1 = tb->jmp_next[n];
1352 if (tb1 != NULL) {
1353 /* find head of list */
1354 for(;;) {
1355 n1 = (long)tb1 & 3;
1356 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1357 if (n1 == 2)
1358 break;
1359 tb1 = tb1->jmp_next[n1];
1361 /* we are now sure now that tb jumps to tb1 */
1362 tb_next = tb1;
1364 /* remove tb from the jmp_first list */
1365 ptb = &tb_next->jmp_first;
1366 for(;;) {
1367 tb1 = *ptb;
1368 n1 = (long)tb1 & 3;
1369 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1370 if (n1 == n && tb1 == tb)
1371 break;
1372 ptb = &tb1->jmp_next[n1];
1374 *ptb = tb->jmp_next[n];
1375 tb->jmp_next[n] = NULL;
1377 /* suppress the jump to next tb in generated code */
1378 tb_reset_jump(tb, n);
1380 /* suppress jumps in the tb on which we could have jumped */
1381 tb_reset_jump_recursive(tb_next);
1385 static void tb_reset_jump_recursive(TranslationBlock *tb)
1387 tb_reset_jump_recursive2(tb, 0);
1388 tb_reset_jump_recursive2(tb, 1);
1391 #if defined(TARGET_HAS_ICE)
1392 #if defined(CONFIG_USER_ONLY)
1393 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1395 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1397 #else
1398 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1400 target_phys_addr_t addr;
1401 target_ulong pd;
1402 ram_addr_t ram_addr;
1403 PhysPageDesc *p;
1405 addr = cpu_get_phys_page_debug(env, pc);
1406 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1407 if (!p) {
1408 pd = IO_MEM_UNASSIGNED;
1409 } else {
1410 pd = p->phys_offset;
1412 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1413 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1415 #endif
1416 #endif /* TARGET_HAS_ICE */
1418 #if defined(CONFIG_USER_ONLY)
1419 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1424 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1425 int flags, CPUWatchpoint **watchpoint)
1427 return -ENOSYS;
1429 #else
1430 /* Add a watchpoint. */
1431 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1432 int flags, CPUWatchpoint **watchpoint)
1434 target_ulong len_mask = ~(len - 1);
1435 CPUWatchpoint *wp;
1437 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1438 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1439 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1440 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1441 return -EINVAL;
1443 wp = g_malloc(sizeof(*wp));
1445 wp->vaddr = addr;
1446 wp->len_mask = len_mask;
1447 wp->flags = flags;
1449 /* keep all GDB-injected watchpoints in front */
1450 if (flags & BP_GDB)
1451 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1452 else
1453 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1455 tlb_flush_page(env, addr);
1457 if (watchpoint)
1458 *watchpoint = wp;
1459 return 0;
1462 /* Remove a specific watchpoint. */
1463 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1464 int flags)
1466 target_ulong len_mask = ~(len - 1);
1467 CPUWatchpoint *wp;
1469 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1470 if (addr == wp->vaddr && len_mask == wp->len_mask
1471 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1472 cpu_watchpoint_remove_by_ref(env, wp);
1473 return 0;
1476 return -ENOENT;
1479 /* Remove a specific watchpoint by reference. */
1480 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1482 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1484 tlb_flush_page(env, watchpoint->vaddr);
1486 g_free(watchpoint);
1489 /* Remove all matching watchpoints. */
1490 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1492 CPUWatchpoint *wp, *next;
1494 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1495 if (wp->flags & mask)
1496 cpu_watchpoint_remove_by_ref(env, wp);
1499 #endif
1501 /* Add a breakpoint. */
1502 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1503 CPUBreakpoint **breakpoint)
1505 #if defined(TARGET_HAS_ICE)
1506 CPUBreakpoint *bp;
1508 bp = g_malloc(sizeof(*bp));
1510 bp->pc = pc;
1511 bp->flags = flags;
1513 /* keep all GDB-injected breakpoints in front */
1514 if (flags & BP_GDB)
1515 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1516 else
1517 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1519 breakpoint_invalidate(env, pc);
1521 if (breakpoint)
1522 *breakpoint = bp;
1523 return 0;
1524 #else
1525 return -ENOSYS;
1526 #endif
1529 /* Remove a specific breakpoint. */
1530 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1532 #if defined(TARGET_HAS_ICE)
1533 CPUBreakpoint *bp;
1535 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1536 if (bp->pc == pc && bp->flags == flags) {
1537 cpu_breakpoint_remove_by_ref(env, bp);
1538 return 0;
1541 return -ENOENT;
1542 #else
1543 return -ENOSYS;
1544 #endif
1547 /* Remove a specific breakpoint by reference. */
1548 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1550 #if defined(TARGET_HAS_ICE)
1551 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1553 breakpoint_invalidate(env, breakpoint->pc);
1555 g_free(breakpoint);
1556 #endif
1559 /* Remove all matching breakpoints. */
1560 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1562 #if defined(TARGET_HAS_ICE)
1563 CPUBreakpoint *bp, *next;
1565 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1566 if (bp->flags & mask)
1567 cpu_breakpoint_remove_by_ref(env, bp);
1569 #endif
1572 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1573 CPU loop after each instruction */
1574 void cpu_single_step(CPUState *env, int enabled)
1576 #if defined(TARGET_HAS_ICE)
1577 if (env->singlestep_enabled != enabled) {
1578 env->singlestep_enabled = enabled;
1579 if (kvm_enabled())
1580 kvm_update_guest_debug(env, 0);
1581 else {
1582 /* must flush all the translated code to avoid inconsistencies */
1583 /* XXX: only flush what is necessary */
1584 tb_flush(env);
1587 #endif
1590 /* enable or disable low levels log */
1591 void cpu_set_log(int log_flags)
1593 loglevel = log_flags;
1594 if (loglevel && !logfile) {
1595 logfile = fopen(logfilename, log_append ? "a" : "w");
1596 if (!logfile) {
1597 perror(logfilename);
1598 _exit(1);
1600 #if !defined(CONFIG_SOFTMMU)
1601 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1603 static char logfile_buf[4096];
1604 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1606 #elif defined(_WIN32)
1607 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1608 setvbuf(logfile, NULL, _IONBF, 0);
1609 #else
1610 setvbuf(logfile, NULL, _IOLBF, 0);
1611 #endif
1612 log_append = 1;
1614 if (!loglevel && logfile) {
1615 fclose(logfile);
1616 logfile = NULL;
1620 void cpu_set_log_filename(const char *filename)
1622 logfilename = strdup(filename);
1623 if (logfile) {
1624 fclose(logfile);
1625 logfile = NULL;
1627 cpu_set_log(loglevel);
1630 static void cpu_unlink_tb(CPUState *env)
1632 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1633 problem and hope the cpu will stop of its own accord. For userspace
1634 emulation this often isn't actually as bad as it sounds. Often
1635 signals are used primarily to interrupt blocking syscalls. */
1636 TranslationBlock *tb;
1637 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1639 spin_lock(&interrupt_lock);
1640 tb = env->current_tb;
1641 /* if the cpu is currently executing code, we must unlink it and
1642 all the potentially executing TB */
1643 if (tb) {
1644 env->current_tb = NULL;
1645 tb_reset_jump_recursive(tb);
1647 spin_unlock(&interrupt_lock);
1650 #ifndef CONFIG_USER_ONLY
1651 /* mask must never be zero, except for A20 change call */
1652 static void tcg_handle_interrupt(CPUState *env, int mask)
1654 int old_mask;
1656 old_mask = env->interrupt_request;
1657 env->interrupt_request |= mask;
1660 * If called from iothread context, wake the target cpu in
1661 * case its halted.
1663 if (!qemu_cpu_is_self(env)) {
1664 qemu_cpu_kick(env);
1665 return;
1668 if (use_icount) {
1669 env->icount_decr.u16.high = 0xffff;
1670 if (!can_do_io(env)
1671 && (mask & ~old_mask) != 0) {
1672 cpu_abort(env, "Raised interrupt while not in I/O function");
1674 } else {
1675 cpu_unlink_tb(env);
1679 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1681 #else /* CONFIG_USER_ONLY */
1683 void cpu_interrupt(CPUState *env, int mask)
1685 env->interrupt_request |= mask;
1686 cpu_unlink_tb(env);
1688 #endif /* CONFIG_USER_ONLY */
1690 void cpu_reset_interrupt(CPUState *env, int mask)
1692 env->interrupt_request &= ~mask;
1695 void cpu_exit(CPUState *env)
1697 env->exit_request = 1;
1698 cpu_unlink_tb(env);
1701 const CPULogItem cpu_log_items[] = {
1702 { CPU_LOG_TB_OUT_ASM, "out_asm",
1703 "show generated host assembly code for each compiled TB" },
1704 { CPU_LOG_TB_IN_ASM, "in_asm",
1705 "show target assembly code for each compiled TB" },
1706 { CPU_LOG_TB_OP, "op",
1707 "show micro ops for each compiled TB" },
1708 { CPU_LOG_TB_OP_OPT, "op_opt",
1709 "show micro ops "
1710 #ifdef TARGET_I386
1711 "before eflags optimization and "
1712 #endif
1713 "after liveness analysis" },
1714 { CPU_LOG_INT, "int",
1715 "show interrupts/exceptions in short format" },
1716 { CPU_LOG_EXEC, "exec",
1717 "show trace before each executed TB (lots of logs)" },
1718 { CPU_LOG_TB_CPU, "cpu",
1719 "show CPU state before block translation" },
1720 #ifdef TARGET_I386
1721 { CPU_LOG_PCALL, "pcall",
1722 "show protected mode far calls/returns/exceptions" },
1723 { CPU_LOG_RESET, "cpu_reset",
1724 "show CPU state before CPU resets" },
1725 #endif
1726 #ifdef DEBUG_IOPORT
1727 { CPU_LOG_IOPORT, "ioport",
1728 "show all i/o ports accesses" },
1729 #endif
1730 { 0, NULL, NULL },
1733 #ifndef CONFIG_USER_ONLY
1734 static QLIST_HEAD(memory_client_list, CPUPhysMemoryClient) memory_client_list
1735 = QLIST_HEAD_INITIALIZER(memory_client_list);
1737 static void cpu_notify_set_memory(target_phys_addr_t start_addr,
1738 ram_addr_t size,
1739 ram_addr_t phys_offset,
1740 bool log_dirty)
1742 CPUPhysMemoryClient *client;
1743 QLIST_FOREACH(client, &memory_client_list, list) {
1744 client->set_memory(client, start_addr, size, phys_offset, log_dirty);
1748 static int cpu_notify_sync_dirty_bitmap(target_phys_addr_t start,
1749 target_phys_addr_t end)
1751 CPUPhysMemoryClient *client;
1752 QLIST_FOREACH(client, &memory_client_list, list) {
1753 int r = client->sync_dirty_bitmap(client, start, end);
1754 if (r < 0)
1755 return r;
1757 return 0;
1760 static int cpu_notify_migration_log(int enable)
1762 CPUPhysMemoryClient *client;
1763 QLIST_FOREACH(client, &memory_client_list, list) {
1764 int r = client->migration_log(client, enable);
1765 if (r < 0)
1766 return r;
1768 return 0;
1771 struct last_map {
1772 target_phys_addr_t start_addr;
1773 ram_addr_t size;
1774 ram_addr_t phys_offset;
1777 /* The l1_phys_map provides the upper P_L1_BITs of the guest physical
1778 * address. Each intermediate table provides the next L2_BITs of guest
1779 * physical address space. The number of levels vary based on host and
1780 * guest configuration, making it efficient to build the final guest
1781 * physical address by seeding the L1 offset and shifting and adding in
1782 * each L2 offset as we recurse through them. */
1783 static void phys_page_for_each_1(CPUPhysMemoryClient *client, int level,
1784 void **lp, target_phys_addr_t addr,
1785 struct last_map *map)
1787 int i;
1789 if (*lp == NULL) {
1790 return;
1792 if (level == 0) {
1793 PhysPageDesc *pd = *lp;
1794 addr <<= L2_BITS + TARGET_PAGE_BITS;
1795 for (i = 0; i < L2_SIZE; ++i) {
1796 if (pd[i].phys_offset != IO_MEM_UNASSIGNED) {
1797 target_phys_addr_t start_addr = addr | i << TARGET_PAGE_BITS;
1799 if (map->size &&
1800 start_addr == map->start_addr + map->size &&
1801 pd[i].phys_offset == map->phys_offset + map->size) {
1803 map->size += TARGET_PAGE_SIZE;
1804 continue;
1805 } else if (map->size) {
1806 client->set_memory(client, map->start_addr,
1807 map->size, map->phys_offset, false);
1810 map->start_addr = start_addr;
1811 map->size = TARGET_PAGE_SIZE;
1812 map->phys_offset = pd[i].phys_offset;
1815 } else {
1816 void **pp = *lp;
1817 for (i = 0; i < L2_SIZE; ++i) {
1818 phys_page_for_each_1(client, level - 1, pp + i,
1819 (addr << L2_BITS) | i, map);
1824 static void phys_page_for_each(CPUPhysMemoryClient *client)
1826 int i;
1827 struct last_map map = { };
1829 for (i = 0; i < P_L1_SIZE; ++i) {
1830 phys_page_for_each_1(client, P_L1_SHIFT / L2_BITS - 1,
1831 l1_phys_map + i, i, &map);
1833 if (map.size) {
1834 client->set_memory(client, map.start_addr, map.size, map.phys_offset,
1835 false);
1839 void cpu_register_phys_memory_client(CPUPhysMemoryClient *client)
1841 QLIST_INSERT_HEAD(&memory_client_list, client, list);
1842 phys_page_for_each(client);
1845 void cpu_unregister_phys_memory_client(CPUPhysMemoryClient *client)
1847 QLIST_REMOVE(client, list);
1849 #endif
1851 static int cmp1(const char *s1, int n, const char *s2)
1853 if (strlen(s2) != n)
1854 return 0;
1855 return memcmp(s1, s2, n) == 0;
1858 /* takes a comma separated list of log masks. Return 0 if error. */
1859 int cpu_str_to_log_mask(const char *str)
1861 const CPULogItem *item;
1862 int mask;
1863 const char *p, *p1;
1865 p = str;
1866 mask = 0;
1867 for(;;) {
1868 p1 = strchr(p, ',');
1869 if (!p1)
1870 p1 = p + strlen(p);
1871 if(cmp1(p,p1-p,"all")) {
1872 for(item = cpu_log_items; item->mask != 0; item++) {
1873 mask |= item->mask;
1875 } else {
1876 for(item = cpu_log_items; item->mask != 0; item++) {
1877 if (cmp1(p, p1 - p, item->name))
1878 goto found;
1880 return 0;
1882 found:
1883 mask |= item->mask;
1884 if (*p1 != ',')
1885 break;
1886 p = p1 + 1;
1888 return mask;
1891 void cpu_abort(CPUState *env, const char *fmt, ...)
1893 va_list ap;
1894 va_list ap2;
1896 va_start(ap, fmt);
1897 va_copy(ap2, ap);
1898 fprintf(stderr, "qemu: fatal: ");
1899 vfprintf(stderr, fmt, ap);
1900 fprintf(stderr, "\n");
1901 #ifdef TARGET_I386
1902 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1903 #else
1904 cpu_dump_state(env, stderr, fprintf, 0);
1905 #endif
1906 if (qemu_log_enabled()) {
1907 qemu_log("qemu: fatal: ");
1908 qemu_log_vprintf(fmt, ap2);
1909 qemu_log("\n");
1910 #ifdef TARGET_I386
1911 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1912 #else
1913 log_cpu_state(env, 0);
1914 #endif
1915 qemu_log_flush();
1916 qemu_log_close();
1918 va_end(ap2);
1919 va_end(ap);
1920 #if defined(CONFIG_USER_ONLY)
1922 struct sigaction act;
1923 sigfillset(&act.sa_mask);
1924 act.sa_handler = SIG_DFL;
1925 sigaction(SIGABRT, &act, NULL);
1927 #endif
1928 abort();
1931 CPUState *cpu_copy(CPUState *env)
1933 CPUState *new_env = cpu_init(env->cpu_model_str);
1934 CPUState *next_cpu = new_env->next_cpu;
1935 int cpu_index = new_env->cpu_index;
1936 #if defined(TARGET_HAS_ICE)
1937 CPUBreakpoint *bp;
1938 CPUWatchpoint *wp;
1939 #endif
1941 memcpy(new_env, env, sizeof(CPUState));
1943 /* Preserve chaining and index. */
1944 new_env->next_cpu = next_cpu;
1945 new_env->cpu_index = cpu_index;
1947 /* Clone all break/watchpoints.
1948 Note: Once we support ptrace with hw-debug register access, make sure
1949 BP_CPU break/watchpoints are handled correctly on clone. */
1950 QTAILQ_INIT(&env->breakpoints);
1951 QTAILQ_INIT(&env->watchpoints);
1952 #if defined(TARGET_HAS_ICE)
1953 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1954 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1956 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1957 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1958 wp->flags, NULL);
1960 #endif
1962 return new_env;
1965 #if !defined(CONFIG_USER_ONLY)
1967 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1969 unsigned int i;
1971 /* Discard jump cache entries for any tb which might potentially
1972 overlap the flushed page. */
1973 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1974 memset (&env->tb_jmp_cache[i], 0,
1975 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1977 i = tb_jmp_cache_hash_page(addr);
1978 memset (&env->tb_jmp_cache[i], 0,
1979 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1982 static CPUTLBEntry s_cputlb_empty_entry = {
1983 .addr_read = -1,
1984 .addr_write = -1,
1985 .addr_code = -1,
1986 .addend = -1,
1989 /* NOTE: if flush_global is true, also flush global entries (not
1990 implemented yet) */
1991 void tlb_flush(CPUState *env, int flush_global)
1993 int i;
1995 #if defined(DEBUG_TLB)
1996 printf("tlb_flush:\n");
1997 #endif
1998 /* must reset current TB so that interrupts cannot modify the
1999 links while we are modifying them */
2000 env->current_tb = NULL;
2002 for(i = 0; i < CPU_TLB_SIZE; i++) {
2003 int mmu_idx;
2004 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2005 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
2009 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
2011 env->tlb_flush_addr = -1;
2012 env->tlb_flush_mask = 0;
2013 tlb_flush_count++;
2016 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
2018 if (addr == (tlb_entry->addr_read &
2019 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2020 addr == (tlb_entry->addr_write &
2021 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
2022 addr == (tlb_entry->addr_code &
2023 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
2024 *tlb_entry = s_cputlb_empty_entry;
2028 void tlb_flush_page(CPUState *env, target_ulong addr)
2030 int i;
2031 int mmu_idx;
2033 #if defined(DEBUG_TLB)
2034 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
2035 #endif
2036 /* Check if we need to flush due to large pages. */
2037 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
2038 #if defined(DEBUG_TLB)
2039 printf("tlb_flush_page: forced full flush ("
2040 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
2041 env->tlb_flush_addr, env->tlb_flush_mask);
2042 #endif
2043 tlb_flush(env, 1);
2044 return;
2046 /* must reset current TB so that interrupts cannot modify the
2047 links while we are modifying them */
2048 env->current_tb = NULL;
2050 addr &= TARGET_PAGE_MASK;
2051 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2052 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2053 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
2055 tlb_flush_jmp_cache(env, addr);
2058 /* update the TLBs so that writes to code in the virtual page 'addr'
2059 can be detected */
2060 static void tlb_protect_code(ram_addr_t ram_addr)
2062 cpu_physical_memory_reset_dirty(ram_addr,
2063 ram_addr + TARGET_PAGE_SIZE,
2064 CODE_DIRTY_FLAG);
2067 /* update the TLB so that writes in physical page 'phys_addr' are no longer
2068 tested for self modifying code */
2069 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
2070 target_ulong vaddr)
2072 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
2075 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
2076 unsigned long start, unsigned long length)
2078 unsigned long addr;
2079 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2080 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
2081 if ((addr - start) < length) {
2082 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
2087 /* Note: start and end must be within the same ram block. */
2088 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
2089 int dirty_flags)
2091 CPUState *env;
2092 unsigned long length, start1;
2093 int i;
2095 start &= TARGET_PAGE_MASK;
2096 end = TARGET_PAGE_ALIGN(end);
2098 length = end - start;
2099 if (length == 0)
2100 return;
2101 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
2103 /* we modify the TLB cache so that the dirty bit will be set again
2104 when accessing the range */
2105 start1 = (unsigned long)qemu_safe_ram_ptr(start);
2106 /* Check that we don't span multiple blocks - this breaks the
2107 address comparisons below. */
2108 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
2109 != (end - 1) - start) {
2110 abort();
2113 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2114 int mmu_idx;
2115 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2116 for(i = 0; i < CPU_TLB_SIZE; i++)
2117 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2118 start1, length);
2123 int cpu_physical_memory_set_dirty_tracking(int enable)
2125 int ret = 0;
2126 in_migration = enable;
2127 ret = cpu_notify_migration_log(!!enable);
2128 return ret;
2131 int cpu_physical_memory_get_dirty_tracking(void)
2133 return in_migration;
2136 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
2137 target_phys_addr_t end_addr)
2139 int ret;
2141 ret = cpu_notify_sync_dirty_bitmap(start_addr, end_addr);
2142 return ret;
2145 int cpu_physical_log_start(target_phys_addr_t start_addr,
2146 ram_addr_t size)
2148 CPUPhysMemoryClient *client;
2149 QLIST_FOREACH(client, &memory_client_list, list) {
2150 if (client->log_start) {
2151 int r = client->log_start(client, start_addr, size);
2152 if (r < 0) {
2153 return r;
2157 return 0;
2160 int cpu_physical_log_stop(target_phys_addr_t start_addr,
2161 ram_addr_t size)
2163 CPUPhysMemoryClient *client;
2164 QLIST_FOREACH(client, &memory_client_list, list) {
2165 if (client->log_stop) {
2166 int r = client->log_stop(client, start_addr, size);
2167 if (r < 0) {
2168 return r;
2172 return 0;
2175 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2177 ram_addr_t ram_addr;
2178 void *p;
2180 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2181 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2182 + tlb_entry->addend);
2183 ram_addr = qemu_ram_addr_from_host_nofail(p);
2184 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2185 tlb_entry->addr_write |= TLB_NOTDIRTY;
2190 /* update the TLB according to the current state of the dirty bits */
2191 void cpu_tlb_update_dirty(CPUState *env)
2193 int i;
2194 int mmu_idx;
2195 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2196 for(i = 0; i < CPU_TLB_SIZE; i++)
2197 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2201 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2203 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2204 tlb_entry->addr_write = vaddr;
2207 /* update the TLB corresponding to virtual page vaddr
2208 so that it is no longer dirty */
2209 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2211 int i;
2212 int mmu_idx;
2214 vaddr &= TARGET_PAGE_MASK;
2215 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2216 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2217 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2220 /* Our TLB does not support large pages, so remember the area covered by
2221 large pages and trigger a full TLB flush if these are invalidated. */
2222 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2223 target_ulong size)
2225 target_ulong mask = ~(size - 1);
2227 if (env->tlb_flush_addr == (target_ulong)-1) {
2228 env->tlb_flush_addr = vaddr & mask;
2229 env->tlb_flush_mask = mask;
2230 return;
2232 /* Extend the existing region to include the new page.
2233 This is a compromise between unnecessary flushes and the cost
2234 of maintaining a full variable size TLB. */
2235 mask &= env->tlb_flush_mask;
2236 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2237 mask <<= 1;
2239 env->tlb_flush_addr &= mask;
2240 env->tlb_flush_mask = mask;
2243 /* Add a new TLB entry. At most one entry for a given virtual address
2244 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2245 supplied size is only used by tlb_flush_page. */
2246 void tlb_set_page(CPUState *env, target_ulong vaddr,
2247 target_phys_addr_t paddr, int prot,
2248 int mmu_idx, target_ulong size)
2250 PhysPageDesc *p;
2251 unsigned long pd;
2252 unsigned int index;
2253 target_ulong address;
2254 target_ulong code_address;
2255 unsigned long addend;
2256 CPUTLBEntry *te;
2257 CPUWatchpoint *wp;
2258 target_phys_addr_t iotlb;
2260 assert(size >= TARGET_PAGE_SIZE);
2261 if (size != TARGET_PAGE_SIZE) {
2262 tlb_add_large_page(env, vaddr, size);
2264 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2265 if (!p) {
2266 pd = IO_MEM_UNASSIGNED;
2267 } else {
2268 pd = p->phys_offset;
2270 #if defined(DEBUG_TLB)
2271 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2272 " prot=%x idx=%d pd=0x%08lx\n",
2273 vaddr, paddr, prot, mmu_idx, pd);
2274 #endif
2276 address = vaddr;
2277 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2278 /* IO memory case (romd handled later) */
2279 address |= TLB_MMIO;
2281 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2282 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2283 /* Normal RAM. */
2284 iotlb = pd & TARGET_PAGE_MASK;
2285 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2286 iotlb |= IO_MEM_NOTDIRTY;
2287 else
2288 iotlb |= IO_MEM_ROM;
2289 } else {
2290 /* IO handlers are currently passed a physical address.
2291 It would be nice to pass an offset from the base address
2292 of that region. This would avoid having to special case RAM,
2293 and avoid full address decoding in every device.
2294 We can't use the high bits of pd for this because
2295 IO_MEM_ROMD uses these as a ram address. */
2296 iotlb = (pd & ~TARGET_PAGE_MASK);
2297 if (p) {
2298 iotlb += p->region_offset;
2299 } else {
2300 iotlb += paddr;
2304 code_address = address;
2305 /* Make accesses to pages with watchpoints go via the
2306 watchpoint trap routines. */
2307 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2308 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2309 /* Avoid trapping reads of pages with a write breakpoint. */
2310 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2311 iotlb = io_mem_watch + paddr;
2312 address |= TLB_MMIO;
2313 break;
2318 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2319 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2320 te = &env->tlb_table[mmu_idx][index];
2321 te->addend = addend - vaddr;
2322 if (prot & PAGE_READ) {
2323 te->addr_read = address;
2324 } else {
2325 te->addr_read = -1;
2328 if (prot & PAGE_EXEC) {
2329 te->addr_code = code_address;
2330 } else {
2331 te->addr_code = -1;
2333 if (prot & PAGE_WRITE) {
2334 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2335 (pd & IO_MEM_ROMD)) {
2336 /* Write access calls the I/O callback. */
2337 te->addr_write = address | TLB_MMIO;
2338 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2339 !cpu_physical_memory_is_dirty(pd)) {
2340 te->addr_write = address | TLB_NOTDIRTY;
2341 } else {
2342 te->addr_write = address;
2344 } else {
2345 te->addr_write = -1;
2349 #else
2351 void tlb_flush(CPUState *env, int flush_global)
2355 void tlb_flush_page(CPUState *env, target_ulong addr)
2360 * Walks guest process memory "regions" one by one
2361 * and calls callback function 'fn' for each region.
2364 struct walk_memory_regions_data
2366 walk_memory_regions_fn fn;
2367 void *priv;
2368 unsigned long start;
2369 int prot;
2372 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2373 abi_ulong end, int new_prot)
2375 if (data->start != -1ul) {
2376 int rc = data->fn(data->priv, data->start, end, data->prot);
2377 if (rc != 0) {
2378 return rc;
2382 data->start = (new_prot ? end : -1ul);
2383 data->prot = new_prot;
2385 return 0;
2388 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2389 abi_ulong base, int level, void **lp)
2391 abi_ulong pa;
2392 int i, rc;
2394 if (*lp == NULL) {
2395 return walk_memory_regions_end(data, base, 0);
2398 if (level == 0) {
2399 PageDesc *pd = *lp;
2400 for (i = 0; i < L2_SIZE; ++i) {
2401 int prot = pd[i].flags;
2403 pa = base | (i << TARGET_PAGE_BITS);
2404 if (prot != data->prot) {
2405 rc = walk_memory_regions_end(data, pa, prot);
2406 if (rc != 0) {
2407 return rc;
2411 } else {
2412 void **pp = *lp;
2413 for (i = 0; i < L2_SIZE; ++i) {
2414 pa = base | ((abi_ulong)i <<
2415 (TARGET_PAGE_BITS + L2_BITS * level));
2416 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2417 if (rc != 0) {
2418 return rc;
2423 return 0;
2426 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2428 struct walk_memory_regions_data data;
2429 unsigned long i;
2431 data.fn = fn;
2432 data.priv = priv;
2433 data.start = -1ul;
2434 data.prot = 0;
2436 for (i = 0; i < V_L1_SIZE; i++) {
2437 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2438 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2439 if (rc != 0) {
2440 return rc;
2444 return walk_memory_regions_end(&data, 0, 0);
2447 static int dump_region(void *priv, abi_ulong start,
2448 abi_ulong end, unsigned long prot)
2450 FILE *f = (FILE *)priv;
2452 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2453 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2454 start, end, end - start,
2455 ((prot & PAGE_READ) ? 'r' : '-'),
2456 ((prot & PAGE_WRITE) ? 'w' : '-'),
2457 ((prot & PAGE_EXEC) ? 'x' : '-'));
2459 return (0);
2462 /* dump memory mappings */
2463 void page_dump(FILE *f)
2465 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2466 "start", "end", "size", "prot");
2467 walk_memory_regions(f, dump_region);
2470 int page_get_flags(target_ulong address)
2472 PageDesc *p;
2474 p = page_find(address >> TARGET_PAGE_BITS);
2475 if (!p)
2476 return 0;
2477 return p->flags;
2480 /* Modify the flags of a page and invalidate the code if necessary.
2481 The flag PAGE_WRITE_ORG is positioned automatically depending
2482 on PAGE_WRITE. The mmap_lock should already be held. */
2483 void page_set_flags(target_ulong start, target_ulong end, int flags)
2485 target_ulong addr, len;
2487 /* This function should never be called with addresses outside the
2488 guest address space. If this assert fires, it probably indicates
2489 a missing call to h2g_valid. */
2490 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2491 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2492 #endif
2493 assert(start < end);
2495 start = start & TARGET_PAGE_MASK;
2496 end = TARGET_PAGE_ALIGN(end);
2498 if (flags & PAGE_WRITE) {
2499 flags |= PAGE_WRITE_ORG;
2502 for (addr = start, len = end - start;
2503 len != 0;
2504 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2505 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2507 /* If the write protection bit is set, then we invalidate
2508 the code inside. */
2509 if (!(p->flags & PAGE_WRITE) &&
2510 (flags & PAGE_WRITE) &&
2511 p->first_tb) {
2512 tb_invalidate_phys_page(addr, 0, NULL);
2514 p->flags = flags;
2518 int page_check_range(target_ulong start, target_ulong len, int flags)
2520 PageDesc *p;
2521 target_ulong end;
2522 target_ulong addr;
2524 /* This function should never be called with addresses outside the
2525 guest address space. If this assert fires, it probably indicates
2526 a missing call to h2g_valid. */
2527 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2528 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2529 #endif
2531 if (len == 0) {
2532 return 0;
2534 if (start + len - 1 < start) {
2535 /* We've wrapped around. */
2536 return -1;
2539 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2540 start = start & TARGET_PAGE_MASK;
2542 for (addr = start, len = end - start;
2543 len != 0;
2544 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2545 p = page_find(addr >> TARGET_PAGE_BITS);
2546 if( !p )
2547 return -1;
2548 if( !(p->flags & PAGE_VALID) )
2549 return -1;
2551 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2552 return -1;
2553 if (flags & PAGE_WRITE) {
2554 if (!(p->flags & PAGE_WRITE_ORG))
2555 return -1;
2556 /* unprotect the page if it was put read-only because it
2557 contains translated code */
2558 if (!(p->flags & PAGE_WRITE)) {
2559 if (!page_unprotect(addr, 0, NULL))
2560 return -1;
2562 return 0;
2565 return 0;
2568 /* called from signal handler: invalidate the code and unprotect the
2569 page. Return TRUE if the fault was successfully handled. */
2570 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2572 unsigned int prot;
2573 PageDesc *p;
2574 target_ulong host_start, host_end, addr;
2576 /* Technically this isn't safe inside a signal handler. However we
2577 know this only ever happens in a synchronous SEGV handler, so in
2578 practice it seems to be ok. */
2579 mmap_lock();
2581 p = page_find(address >> TARGET_PAGE_BITS);
2582 if (!p) {
2583 mmap_unlock();
2584 return 0;
2587 /* if the page was really writable, then we change its
2588 protection back to writable */
2589 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2590 host_start = address & qemu_host_page_mask;
2591 host_end = host_start + qemu_host_page_size;
2593 prot = 0;
2594 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2595 p = page_find(addr >> TARGET_PAGE_BITS);
2596 p->flags |= PAGE_WRITE;
2597 prot |= p->flags;
2599 /* and since the content will be modified, we must invalidate
2600 the corresponding translated code. */
2601 tb_invalidate_phys_page(addr, pc, puc);
2602 #ifdef DEBUG_TB_CHECK
2603 tb_invalidate_check(addr);
2604 #endif
2606 mprotect((void *)g2h(host_start), qemu_host_page_size,
2607 prot & PAGE_BITS);
2609 mmap_unlock();
2610 return 1;
2612 mmap_unlock();
2613 return 0;
2616 static inline void tlb_set_dirty(CPUState *env,
2617 unsigned long addr, target_ulong vaddr)
2620 #endif /* defined(CONFIG_USER_ONLY) */
2622 #if !defined(CONFIG_USER_ONLY)
2624 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2625 typedef struct subpage_t {
2626 target_phys_addr_t base;
2627 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2628 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2629 } subpage_t;
2631 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2632 ram_addr_t memory, ram_addr_t region_offset);
2633 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2634 ram_addr_t orig_memory,
2635 ram_addr_t region_offset);
2636 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2637 need_subpage) \
2638 do { \
2639 if (addr > start_addr) \
2640 start_addr2 = 0; \
2641 else { \
2642 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2643 if (start_addr2 > 0) \
2644 need_subpage = 1; \
2647 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2648 end_addr2 = TARGET_PAGE_SIZE - 1; \
2649 else { \
2650 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2651 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2652 need_subpage = 1; \
2654 } while (0)
2656 /* register physical memory.
2657 For RAM, 'size' must be a multiple of the target page size.
2658 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2659 io memory page. The address used when calling the IO function is
2660 the offset from the start of the region, plus region_offset. Both
2661 start_addr and region_offset are rounded down to a page boundary
2662 before calculating this offset. This should not be a problem unless
2663 the low bits of start_addr and region_offset differ. */
2664 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2665 ram_addr_t size,
2666 ram_addr_t phys_offset,
2667 ram_addr_t region_offset,
2668 bool log_dirty)
2670 target_phys_addr_t addr, end_addr;
2671 PhysPageDesc *p;
2672 CPUState *env;
2673 ram_addr_t orig_size = size;
2674 subpage_t *subpage;
2676 assert(size);
2677 cpu_notify_set_memory(start_addr, size, phys_offset, log_dirty);
2679 if (phys_offset == IO_MEM_UNASSIGNED) {
2680 region_offset = start_addr;
2682 region_offset &= TARGET_PAGE_MASK;
2683 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2684 end_addr = start_addr + (target_phys_addr_t)size;
2686 addr = start_addr;
2687 do {
2688 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2689 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2690 ram_addr_t orig_memory = p->phys_offset;
2691 target_phys_addr_t start_addr2, end_addr2;
2692 int need_subpage = 0;
2694 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2695 need_subpage);
2696 if (need_subpage) {
2697 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2698 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2699 &p->phys_offset, orig_memory,
2700 p->region_offset);
2701 } else {
2702 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2703 >> IO_MEM_SHIFT];
2705 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2706 region_offset);
2707 p->region_offset = 0;
2708 } else {
2709 p->phys_offset = phys_offset;
2710 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2711 (phys_offset & IO_MEM_ROMD))
2712 phys_offset += TARGET_PAGE_SIZE;
2714 } else {
2715 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2716 p->phys_offset = phys_offset;
2717 p->region_offset = region_offset;
2718 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2719 (phys_offset & IO_MEM_ROMD)) {
2720 phys_offset += TARGET_PAGE_SIZE;
2721 } else {
2722 target_phys_addr_t start_addr2, end_addr2;
2723 int need_subpage = 0;
2725 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2726 end_addr2, need_subpage);
2728 if (need_subpage) {
2729 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2730 &p->phys_offset, IO_MEM_UNASSIGNED,
2731 addr & TARGET_PAGE_MASK);
2732 subpage_register(subpage, start_addr2, end_addr2,
2733 phys_offset, region_offset);
2734 p->region_offset = 0;
2738 region_offset += TARGET_PAGE_SIZE;
2739 addr += TARGET_PAGE_SIZE;
2740 } while (addr != end_addr);
2742 /* since each CPU stores ram addresses in its TLB cache, we must
2743 reset the modified entries */
2744 /* XXX: slow ! */
2745 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2746 tlb_flush(env, 1);
2750 /* XXX: temporary until new memory mapping API */
2751 ram_addr_t cpu_get_physical_page_desc(target_phys_addr_t addr)
2753 PhysPageDesc *p;
2755 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2756 if (!p)
2757 return IO_MEM_UNASSIGNED;
2758 return p->phys_offset;
2761 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2763 if (kvm_enabled())
2764 kvm_coalesce_mmio_region(addr, size);
2767 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2769 if (kvm_enabled())
2770 kvm_uncoalesce_mmio_region(addr, size);
2773 void qemu_flush_coalesced_mmio_buffer(void)
2775 if (kvm_enabled())
2776 kvm_flush_coalesced_mmio_buffer();
2779 #if defined(__linux__) && !defined(TARGET_S390X)
2781 #include <sys/vfs.h>
2783 #define HUGETLBFS_MAGIC 0x958458f6
2785 static long gethugepagesize(const char *path)
2787 struct statfs fs;
2788 int ret;
2790 do {
2791 ret = statfs(path, &fs);
2792 } while (ret != 0 && errno == EINTR);
2794 if (ret != 0) {
2795 perror(path);
2796 return 0;
2799 if (fs.f_type != HUGETLBFS_MAGIC)
2800 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2802 return fs.f_bsize;
2805 static void *file_ram_alloc(RAMBlock *block,
2806 ram_addr_t memory,
2807 const char *path)
2809 char *filename;
2810 void *area;
2811 int fd;
2812 #ifdef MAP_POPULATE
2813 int flags;
2814 #endif
2815 unsigned long hpagesize;
2817 hpagesize = gethugepagesize(path);
2818 if (!hpagesize) {
2819 return NULL;
2822 if (memory < hpagesize) {
2823 return NULL;
2826 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2827 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2828 return NULL;
2831 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2832 return NULL;
2835 fd = mkstemp(filename);
2836 if (fd < 0) {
2837 perror("unable to create backing store for hugepages");
2838 free(filename);
2839 return NULL;
2841 unlink(filename);
2842 free(filename);
2844 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2847 * ftruncate is not supported by hugetlbfs in older
2848 * hosts, so don't bother bailing out on errors.
2849 * If anything goes wrong with it under other filesystems,
2850 * mmap will fail.
2852 if (ftruncate(fd, memory))
2853 perror("ftruncate");
2855 #ifdef MAP_POPULATE
2856 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2857 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2858 * to sidestep this quirk.
2860 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2861 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2862 #else
2863 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2864 #endif
2865 if (area == MAP_FAILED) {
2866 perror("file_ram_alloc: can't mmap RAM pages");
2867 close(fd);
2868 return (NULL);
2870 block->fd = fd;
2871 return area;
2873 #endif
2875 static ram_addr_t find_ram_offset(ram_addr_t size)
2877 RAMBlock *block, *next_block;
2878 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2880 if (QLIST_EMPTY(&ram_list.blocks))
2881 return 0;
2883 QLIST_FOREACH(block, &ram_list.blocks, next) {
2884 ram_addr_t end, next = RAM_ADDR_MAX;
2886 end = block->offset + block->length;
2888 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2889 if (next_block->offset >= end) {
2890 next = MIN(next, next_block->offset);
2893 if (next - end >= size && next - end < mingap) {
2894 offset = end;
2895 mingap = next - end;
2899 if (offset == RAM_ADDR_MAX) {
2900 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2901 (uint64_t)size);
2902 abort();
2905 return offset;
2908 static ram_addr_t last_ram_offset(void)
2910 RAMBlock *block;
2911 ram_addr_t last = 0;
2913 QLIST_FOREACH(block, &ram_list.blocks, next)
2914 last = MAX(last, block->offset + block->length);
2916 return last;
2919 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
2920 ram_addr_t size, void *host)
2922 RAMBlock *new_block, *block;
2924 size = TARGET_PAGE_ALIGN(size);
2925 new_block = g_malloc0(sizeof(*new_block));
2927 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2928 char *id = dev->parent_bus->info->get_dev_path(dev);
2929 if (id) {
2930 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2931 g_free(id);
2934 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2936 QLIST_FOREACH(block, &ram_list.blocks, next) {
2937 if (!strcmp(block->idstr, new_block->idstr)) {
2938 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2939 new_block->idstr);
2940 abort();
2944 new_block->offset = find_ram_offset(size);
2945 if (host) {
2946 new_block->host = host;
2947 new_block->flags |= RAM_PREALLOC_MASK;
2948 } else {
2949 if (mem_path) {
2950 #if defined (__linux__) && !defined(TARGET_S390X)
2951 new_block->host = file_ram_alloc(new_block, size, mem_path);
2952 if (!new_block->host) {
2953 new_block->host = qemu_vmalloc(size);
2954 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2956 #else
2957 fprintf(stderr, "-mem-path option unsupported\n");
2958 exit(1);
2959 #endif
2960 } else {
2961 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2962 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2963 an system defined value, which is at least 256GB. Larger systems
2964 have larger values. We put the guest between the end of data
2965 segment (system break) and this value. We use 32GB as a base to
2966 have enough room for the system break to grow. */
2967 new_block->host = mmap((void*)0x800000000, size,
2968 PROT_EXEC|PROT_READ|PROT_WRITE,
2969 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2970 if (new_block->host == MAP_FAILED) {
2971 fprintf(stderr, "Allocating RAM failed\n");
2972 abort();
2974 #else
2975 if (xen_enabled()) {
2976 xen_ram_alloc(new_block->offset, size);
2977 } else {
2978 new_block->host = qemu_vmalloc(size);
2980 #endif
2981 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2984 new_block->length = size;
2986 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2988 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2989 last_ram_offset() >> TARGET_PAGE_BITS);
2990 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2991 0xff, size >> TARGET_PAGE_BITS);
2993 if (kvm_enabled())
2994 kvm_setup_guest_memory(new_block->host, size);
2996 return new_block->offset;
2999 ram_addr_t qemu_ram_alloc(DeviceState *dev, const char *name, ram_addr_t size)
3001 return qemu_ram_alloc_from_ptr(dev, name, size, NULL);
3004 void qemu_ram_free_from_ptr(ram_addr_t addr)
3006 RAMBlock *block;
3008 QLIST_FOREACH(block, &ram_list.blocks, next) {
3009 if (addr == block->offset) {
3010 QLIST_REMOVE(block, next);
3011 g_free(block);
3012 return;
3017 void qemu_ram_free(ram_addr_t addr)
3019 RAMBlock *block;
3021 QLIST_FOREACH(block, &ram_list.blocks, next) {
3022 if (addr == block->offset) {
3023 QLIST_REMOVE(block, next);
3024 if (block->flags & RAM_PREALLOC_MASK) {
3026 } else if (mem_path) {
3027 #if defined (__linux__) && !defined(TARGET_S390X)
3028 if (block->fd) {
3029 munmap(block->host, block->length);
3030 close(block->fd);
3031 } else {
3032 qemu_vfree(block->host);
3034 #else
3035 abort();
3036 #endif
3037 } else {
3038 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3039 munmap(block->host, block->length);
3040 #else
3041 if (xen_enabled()) {
3042 xen_invalidate_map_cache_entry(block->host);
3043 } else {
3044 qemu_vfree(block->host);
3046 #endif
3048 g_free(block);
3049 return;
3055 #ifndef _WIN32
3056 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
3058 RAMBlock *block;
3059 ram_addr_t offset;
3060 int flags;
3061 void *area, *vaddr;
3063 QLIST_FOREACH(block, &ram_list.blocks, next) {
3064 offset = addr - block->offset;
3065 if (offset < block->length) {
3066 vaddr = block->host + offset;
3067 if (block->flags & RAM_PREALLOC_MASK) {
3069 } else {
3070 flags = MAP_FIXED;
3071 munmap(vaddr, length);
3072 if (mem_path) {
3073 #if defined(__linux__) && !defined(TARGET_S390X)
3074 if (block->fd) {
3075 #ifdef MAP_POPULATE
3076 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
3077 MAP_PRIVATE;
3078 #else
3079 flags |= MAP_PRIVATE;
3080 #endif
3081 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3082 flags, block->fd, offset);
3083 } else {
3084 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3085 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3086 flags, -1, 0);
3088 #else
3089 abort();
3090 #endif
3091 } else {
3092 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
3093 flags |= MAP_SHARED | MAP_ANONYMOUS;
3094 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
3095 flags, -1, 0);
3096 #else
3097 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
3098 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
3099 flags, -1, 0);
3100 #endif
3102 if (area != vaddr) {
3103 fprintf(stderr, "Could not remap addr: "
3104 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
3105 length, addr);
3106 exit(1);
3108 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
3110 return;
3114 #endif /* !_WIN32 */
3116 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3117 With the exception of the softmmu code in this file, this should
3118 only be used for local memory (e.g. video ram) that the device owns,
3119 and knows it isn't going to access beyond the end of the block.
3121 It should not be used for general purpose DMA.
3122 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
3124 void *qemu_get_ram_ptr(ram_addr_t addr)
3126 RAMBlock *block;
3128 QLIST_FOREACH(block, &ram_list.blocks, next) {
3129 if (addr - block->offset < block->length) {
3130 /* Move this entry to to start of the list. */
3131 if (block != QLIST_FIRST(&ram_list.blocks)) {
3132 QLIST_REMOVE(block, next);
3133 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
3135 if (xen_enabled()) {
3136 /* We need to check if the requested address is in the RAM
3137 * because we don't want to map the entire memory in QEMU.
3138 * In that case just map until the end of the page.
3140 if (block->offset == 0) {
3141 return xen_map_cache(addr, 0, 0);
3142 } else if (block->host == NULL) {
3143 block->host =
3144 xen_map_cache(block->offset, block->length, 1);
3147 return block->host + (addr - block->offset);
3151 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3152 abort();
3154 return NULL;
3157 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3158 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3160 void *qemu_safe_ram_ptr(ram_addr_t addr)
3162 RAMBlock *block;
3164 QLIST_FOREACH(block, &ram_list.blocks, next) {
3165 if (addr - block->offset < block->length) {
3166 if (xen_enabled()) {
3167 /* We need to check if the requested address is in the RAM
3168 * because we don't want to map the entire memory in QEMU.
3169 * In that case just map until the end of the page.
3171 if (block->offset == 0) {
3172 return xen_map_cache(addr, 0, 0);
3173 } else if (block->host == NULL) {
3174 block->host =
3175 xen_map_cache(block->offset, block->length, 1);
3178 return block->host + (addr - block->offset);
3182 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3183 abort();
3185 return NULL;
3188 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3189 * but takes a size argument */
3190 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3192 if (*size == 0) {
3193 return NULL;
3195 if (xen_enabled()) {
3196 return xen_map_cache(addr, *size, 1);
3197 } else {
3198 RAMBlock *block;
3200 QLIST_FOREACH(block, &ram_list.blocks, next) {
3201 if (addr - block->offset < block->length) {
3202 if (addr - block->offset + *size > block->length)
3203 *size = block->length - addr + block->offset;
3204 return block->host + (addr - block->offset);
3208 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3209 abort();
3213 void qemu_put_ram_ptr(void *addr)
3215 trace_qemu_put_ram_ptr(addr);
3218 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3220 RAMBlock *block;
3221 uint8_t *host = ptr;
3223 if (xen_enabled()) {
3224 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3225 return 0;
3228 QLIST_FOREACH(block, &ram_list.blocks, next) {
3229 /* This case append when the block is not mapped. */
3230 if (block->host == NULL) {
3231 continue;
3233 if (host - block->host < block->length) {
3234 *ram_addr = block->offset + (host - block->host);
3235 return 0;
3239 return -1;
3242 /* Some of the softmmu routines need to translate from a host pointer
3243 (typically a TLB entry) back to a ram offset. */
3244 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3246 ram_addr_t ram_addr;
3248 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3249 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3250 abort();
3252 return ram_addr;
3255 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3257 #ifdef DEBUG_UNASSIGNED
3258 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3259 #endif
3260 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3261 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3262 #endif
3263 return 0;
3266 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3268 #ifdef DEBUG_UNASSIGNED
3269 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3270 #endif
3271 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3272 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3273 #endif
3274 return 0;
3277 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3279 #ifdef DEBUG_UNASSIGNED
3280 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3281 #endif
3282 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3283 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3284 #endif
3285 return 0;
3288 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3290 #ifdef DEBUG_UNASSIGNED
3291 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3292 #endif
3293 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3294 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3295 #endif
3298 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3300 #ifdef DEBUG_UNASSIGNED
3301 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3302 #endif
3303 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3304 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3305 #endif
3308 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3310 #ifdef DEBUG_UNASSIGNED
3311 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3312 #endif
3313 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3314 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3315 #endif
3318 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3319 unassigned_mem_readb,
3320 unassigned_mem_readw,
3321 unassigned_mem_readl,
3324 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3325 unassigned_mem_writeb,
3326 unassigned_mem_writew,
3327 unassigned_mem_writel,
3330 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3331 uint32_t val)
3333 int dirty_flags;
3334 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3335 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3336 #if !defined(CONFIG_USER_ONLY)
3337 tb_invalidate_phys_page_fast(ram_addr, 1);
3338 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3339 #endif
3341 stb_p(qemu_get_ram_ptr(ram_addr), val);
3342 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3343 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3344 /* we remove the notdirty callback only if the code has been
3345 flushed */
3346 if (dirty_flags == 0xff)
3347 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3350 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3351 uint32_t val)
3353 int dirty_flags;
3354 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3355 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3356 #if !defined(CONFIG_USER_ONLY)
3357 tb_invalidate_phys_page_fast(ram_addr, 2);
3358 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3359 #endif
3361 stw_p(qemu_get_ram_ptr(ram_addr), val);
3362 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3363 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3364 /* we remove the notdirty callback only if the code has been
3365 flushed */
3366 if (dirty_flags == 0xff)
3367 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3370 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3371 uint32_t val)
3373 int dirty_flags;
3374 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3375 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3376 #if !defined(CONFIG_USER_ONLY)
3377 tb_invalidate_phys_page_fast(ram_addr, 4);
3378 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3379 #endif
3381 stl_p(qemu_get_ram_ptr(ram_addr), val);
3382 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3383 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3384 /* we remove the notdirty callback only if the code has been
3385 flushed */
3386 if (dirty_flags == 0xff)
3387 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3390 static CPUReadMemoryFunc * const error_mem_read[3] = {
3391 NULL, /* never used */
3392 NULL, /* never used */
3393 NULL, /* never used */
3396 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3397 notdirty_mem_writeb,
3398 notdirty_mem_writew,
3399 notdirty_mem_writel,
3402 /* Generate a debug exception if a watchpoint has been hit. */
3403 static void check_watchpoint(int offset, int len_mask, int flags)
3405 CPUState *env = cpu_single_env;
3406 target_ulong pc, cs_base;
3407 TranslationBlock *tb;
3408 target_ulong vaddr;
3409 CPUWatchpoint *wp;
3410 int cpu_flags;
3412 if (env->watchpoint_hit) {
3413 /* We re-entered the check after replacing the TB. Now raise
3414 * the debug interrupt so that is will trigger after the
3415 * current instruction. */
3416 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3417 return;
3419 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3420 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3421 if ((vaddr == (wp->vaddr & len_mask) ||
3422 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3423 wp->flags |= BP_WATCHPOINT_HIT;
3424 if (!env->watchpoint_hit) {
3425 env->watchpoint_hit = wp;
3426 tb = tb_find_pc(env->mem_io_pc);
3427 if (!tb) {
3428 cpu_abort(env, "check_watchpoint: could not find TB for "
3429 "pc=%p", (void *)env->mem_io_pc);
3431 cpu_restore_state(tb, env, env->mem_io_pc);
3432 tb_phys_invalidate(tb, -1);
3433 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3434 env->exception_index = EXCP_DEBUG;
3435 } else {
3436 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3437 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3439 cpu_resume_from_signal(env, NULL);
3441 } else {
3442 wp->flags &= ~BP_WATCHPOINT_HIT;
3447 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3448 so these check for a hit then pass through to the normal out-of-line
3449 phys routines. */
3450 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3452 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3453 return ldub_phys(addr);
3456 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3458 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3459 return lduw_phys(addr);
3462 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3464 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3465 return ldl_phys(addr);
3468 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3469 uint32_t val)
3471 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3472 stb_phys(addr, val);
3475 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3476 uint32_t val)
3478 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3479 stw_phys(addr, val);
3482 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3483 uint32_t val)
3485 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3486 stl_phys(addr, val);
3489 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3490 watch_mem_readb,
3491 watch_mem_readw,
3492 watch_mem_readl,
3495 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3496 watch_mem_writeb,
3497 watch_mem_writew,
3498 watch_mem_writel,
3501 static inline uint32_t subpage_readlen (subpage_t *mmio,
3502 target_phys_addr_t addr,
3503 unsigned int len)
3505 unsigned int idx = SUBPAGE_IDX(addr);
3506 #if defined(DEBUG_SUBPAGE)
3507 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3508 mmio, len, addr, idx);
3509 #endif
3511 addr += mmio->region_offset[idx];
3512 idx = mmio->sub_io_index[idx];
3513 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3516 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3517 uint32_t value, unsigned int len)
3519 unsigned int idx = SUBPAGE_IDX(addr);
3520 #if defined(DEBUG_SUBPAGE)
3521 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3522 __func__, mmio, len, addr, idx, value);
3523 #endif
3525 addr += mmio->region_offset[idx];
3526 idx = mmio->sub_io_index[idx];
3527 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3530 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3532 return subpage_readlen(opaque, addr, 0);
3535 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3536 uint32_t value)
3538 subpage_writelen(opaque, addr, value, 0);
3541 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3543 return subpage_readlen(opaque, addr, 1);
3546 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3547 uint32_t value)
3549 subpage_writelen(opaque, addr, value, 1);
3552 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3554 return subpage_readlen(opaque, addr, 2);
3557 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3558 uint32_t value)
3560 subpage_writelen(opaque, addr, value, 2);
3563 static CPUReadMemoryFunc * const subpage_read[] = {
3564 &subpage_readb,
3565 &subpage_readw,
3566 &subpage_readl,
3569 static CPUWriteMemoryFunc * const subpage_write[] = {
3570 &subpage_writeb,
3571 &subpage_writew,
3572 &subpage_writel,
3575 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3576 ram_addr_t memory, ram_addr_t region_offset)
3578 int idx, eidx;
3580 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3581 return -1;
3582 idx = SUBPAGE_IDX(start);
3583 eidx = SUBPAGE_IDX(end);
3584 #if defined(DEBUG_SUBPAGE)
3585 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3586 mmio, start, end, idx, eidx, memory);
3587 #endif
3588 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
3589 memory = IO_MEM_UNASSIGNED;
3590 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3591 for (; idx <= eidx; idx++) {
3592 mmio->sub_io_index[idx] = memory;
3593 mmio->region_offset[idx] = region_offset;
3596 return 0;
3599 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3600 ram_addr_t orig_memory,
3601 ram_addr_t region_offset)
3603 subpage_t *mmio;
3604 int subpage_memory;
3606 mmio = g_malloc0(sizeof(subpage_t));
3608 mmio->base = base;
3609 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3610 DEVICE_NATIVE_ENDIAN);
3611 #if defined(DEBUG_SUBPAGE)
3612 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3613 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3614 #endif
3615 *phys = subpage_memory | IO_MEM_SUBPAGE;
3616 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3618 return mmio;
3621 static int get_free_io_mem_idx(void)
3623 int i;
3625 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3626 if (!io_mem_used[i]) {
3627 io_mem_used[i] = 1;
3628 return i;
3630 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3631 return -1;
3635 * Usually, devices operate in little endian mode. There are devices out
3636 * there that operate in big endian too. Each device gets byte swapped
3637 * mmio if plugged onto a CPU that does the other endianness.
3639 * CPU Device swap?
3641 * little little no
3642 * little big yes
3643 * big little yes
3644 * big big no
3647 typedef struct SwapEndianContainer {
3648 CPUReadMemoryFunc *read[3];
3649 CPUWriteMemoryFunc *write[3];
3650 void *opaque;
3651 } SwapEndianContainer;
3653 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3655 uint32_t val;
3656 SwapEndianContainer *c = opaque;
3657 val = c->read[0](c->opaque, addr);
3658 return val;
3661 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3663 uint32_t val;
3664 SwapEndianContainer *c = opaque;
3665 val = bswap16(c->read[1](c->opaque, addr));
3666 return val;
3669 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3671 uint32_t val;
3672 SwapEndianContainer *c = opaque;
3673 val = bswap32(c->read[2](c->opaque, addr));
3674 return val;
3677 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3678 swapendian_mem_readb,
3679 swapendian_mem_readw,
3680 swapendian_mem_readl
3683 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3684 uint32_t val)
3686 SwapEndianContainer *c = opaque;
3687 c->write[0](c->opaque, addr, val);
3690 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3691 uint32_t val)
3693 SwapEndianContainer *c = opaque;
3694 c->write[1](c->opaque, addr, bswap16(val));
3697 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3698 uint32_t val)
3700 SwapEndianContainer *c = opaque;
3701 c->write[2](c->opaque, addr, bswap32(val));
3704 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3705 swapendian_mem_writeb,
3706 swapendian_mem_writew,
3707 swapendian_mem_writel
3710 static void swapendian_init(int io_index)
3712 SwapEndianContainer *c = g_malloc(sizeof(SwapEndianContainer));
3713 int i;
3715 /* Swap mmio for big endian targets */
3716 c->opaque = io_mem_opaque[io_index];
3717 for (i = 0; i < 3; i++) {
3718 c->read[i] = io_mem_read[io_index][i];
3719 c->write[i] = io_mem_write[io_index][i];
3721 io_mem_read[io_index][i] = swapendian_readfn[i];
3722 io_mem_write[io_index][i] = swapendian_writefn[i];
3724 io_mem_opaque[io_index] = c;
3727 static void swapendian_del(int io_index)
3729 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3730 g_free(io_mem_opaque[io_index]);
3734 /* mem_read and mem_write are arrays of functions containing the
3735 function to access byte (index 0), word (index 1) and dword (index
3736 2). Functions can be omitted with a NULL function pointer.
3737 If io_index is non zero, the corresponding io zone is
3738 modified. If it is zero, a new io zone is allocated. The return
3739 value can be used with cpu_register_physical_memory(). (-1) is
3740 returned if error. */
3741 static int cpu_register_io_memory_fixed(int io_index,
3742 CPUReadMemoryFunc * const *mem_read,
3743 CPUWriteMemoryFunc * const *mem_write,
3744 void *opaque, enum device_endian endian)
3746 int i;
3748 if (io_index <= 0) {
3749 io_index = get_free_io_mem_idx();
3750 if (io_index == -1)
3751 return io_index;
3752 } else {
3753 io_index >>= IO_MEM_SHIFT;
3754 if (io_index >= IO_MEM_NB_ENTRIES)
3755 return -1;
3758 for (i = 0; i < 3; ++i) {
3759 io_mem_read[io_index][i]
3760 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3762 for (i = 0; i < 3; ++i) {
3763 io_mem_write[io_index][i]
3764 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3766 io_mem_opaque[io_index] = opaque;
3768 switch (endian) {
3769 case DEVICE_BIG_ENDIAN:
3770 #ifndef TARGET_WORDS_BIGENDIAN
3771 swapendian_init(io_index);
3772 #endif
3773 break;
3774 case DEVICE_LITTLE_ENDIAN:
3775 #ifdef TARGET_WORDS_BIGENDIAN
3776 swapendian_init(io_index);
3777 #endif
3778 break;
3779 case DEVICE_NATIVE_ENDIAN:
3780 default:
3781 break;
3784 return (io_index << IO_MEM_SHIFT);
3787 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3788 CPUWriteMemoryFunc * const *mem_write,
3789 void *opaque, enum device_endian endian)
3791 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3794 void cpu_unregister_io_memory(int io_table_address)
3796 int i;
3797 int io_index = io_table_address >> IO_MEM_SHIFT;
3799 swapendian_del(io_index);
3801 for (i=0;i < 3; i++) {
3802 io_mem_read[io_index][i] = unassigned_mem_read[i];
3803 io_mem_write[io_index][i] = unassigned_mem_write[i];
3805 io_mem_opaque[io_index] = NULL;
3806 io_mem_used[io_index] = 0;
3809 static void io_mem_init(void)
3811 int i;
3813 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3814 unassigned_mem_write, NULL,
3815 DEVICE_NATIVE_ENDIAN);
3816 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3817 unassigned_mem_write, NULL,
3818 DEVICE_NATIVE_ENDIAN);
3819 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3820 notdirty_mem_write, NULL,
3821 DEVICE_NATIVE_ENDIAN);
3822 for (i=0; i<5; i++)
3823 io_mem_used[i] = 1;
3825 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3826 watch_mem_write, NULL,
3827 DEVICE_NATIVE_ENDIAN);
3830 static void memory_map_init(void)
3832 system_memory = g_malloc(sizeof(*system_memory));
3833 memory_region_init(system_memory, "system", INT64_MAX);
3834 set_system_memory_map(system_memory);
3836 system_io = g_malloc(sizeof(*system_io));
3837 memory_region_init(system_io, "io", 65536);
3838 set_system_io_map(system_io);
3841 MemoryRegion *get_system_memory(void)
3843 return system_memory;
3846 MemoryRegion *get_system_io(void)
3848 return system_io;
3851 #endif /* !defined(CONFIG_USER_ONLY) */
3853 /* physical memory access (slow version, mainly for debug) */
3854 #if defined(CONFIG_USER_ONLY)
3855 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3856 uint8_t *buf, int len, int is_write)
3858 int l, flags;
3859 target_ulong page;
3860 void * p;
3862 while (len > 0) {
3863 page = addr & TARGET_PAGE_MASK;
3864 l = (page + TARGET_PAGE_SIZE) - addr;
3865 if (l > len)
3866 l = len;
3867 flags = page_get_flags(page);
3868 if (!(flags & PAGE_VALID))
3869 return -1;
3870 if (is_write) {
3871 if (!(flags & PAGE_WRITE))
3872 return -1;
3873 /* XXX: this code should not depend on lock_user */
3874 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3875 return -1;
3876 memcpy(p, buf, l);
3877 unlock_user(p, addr, l);
3878 } else {
3879 if (!(flags & PAGE_READ))
3880 return -1;
3881 /* XXX: this code should not depend on lock_user */
3882 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3883 return -1;
3884 memcpy(buf, p, l);
3885 unlock_user(p, addr, 0);
3887 len -= l;
3888 buf += l;
3889 addr += l;
3891 return 0;
3894 #else
3895 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3896 int len, int is_write)
3898 int l, io_index;
3899 uint8_t *ptr;
3900 uint32_t val;
3901 target_phys_addr_t page;
3902 ram_addr_t pd;
3903 PhysPageDesc *p;
3905 while (len > 0) {
3906 page = addr & TARGET_PAGE_MASK;
3907 l = (page + TARGET_PAGE_SIZE) - addr;
3908 if (l > len)
3909 l = len;
3910 p = phys_page_find(page >> TARGET_PAGE_BITS);
3911 if (!p) {
3912 pd = IO_MEM_UNASSIGNED;
3913 } else {
3914 pd = p->phys_offset;
3917 if (is_write) {
3918 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3919 target_phys_addr_t addr1 = addr;
3920 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3921 if (p)
3922 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3923 /* XXX: could force cpu_single_env to NULL to avoid
3924 potential bugs */
3925 if (l >= 4 && ((addr1 & 3) == 0)) {
3926 /* 32 bit write access */
3927 val = ldl_p(buf);
3928 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3929 l = 4;
3930 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3931 /* 16 bit write access */
3932 val = lduw_p(buf);
3933 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3934 l = 2;
3935 } else {
3936 /* 8 bit write access */
3937 val = ldub_p(buf);
3938 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3939 l = 1;
3941 } else {
3942 ram_addr_t addr1;
3943 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3944 /* RAM case */
3945 ptr = qemu_get_ram_ptr(addr1);
3946 memcpy(ptr, buf, l);
3947 if (!cpu_physical_memory_is_dirty(addr1)) {
3948 /* invalidate code */
3949 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3950 /* set dirty bit */
3951 cpu_physical_memory_set_dirty_flags(
3952 addr1, (0xff & ~CODE_DIRTY_FLAG));
3954 qemu_put_ram_ptr(ptr);
3956 } else {
3957 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3958 !(pd & IO_MEM_ROMD)) {
3959 target_phys_addr_t addr1 = addr;
3960 /* I/O case */
3961 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3962 if (p)
3963 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3964 if (l >= 4 && ((addr1 & 3) == 0)) {
3965 /* 32 bit read access */
3966 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3967 stl_p(buf, val);
3968 l = 4;
3969 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3970 /* 16 bit read access */
3971 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3972 stw_p(buf, val);
3973 l = 2;
3974 } else {
3975 /* 8 bit read access */
3976 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3977 stb_p(buf, val);
3978 l = 1;
3980 } else {
3981 /* RAM case */
3982 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3983 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3984 qemu_put_ram_ptr(ptr);
3987 len -= l;
3988 buf += l;
3989 addr += l;
3993 /* used for ROM loading : can write in RAM and ROM */
3994 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3995 const uint8_t *buf, int len)
3997 int l;
3998 uint8_t *ptr;
3999 target_phys_addr_t page;
4000 unsigned long pd;
4001 PhysPageDesc *p;
4003 while (len > 0) {
4004 page = addr & TARGET_PAGE_MASK;
4005 l = (page + TARGET_PAGE_SIZE) - addr;
4006 if (l > len)
4007 l = len;
4008 p = phys_page_find(page >> TARGET_PAGE_BITS);
4009 if (!p) {
4010 pd = IO_MEM_UNASSIGNED;
4011 } else {
4012 pd = p->phys_offset;
4015 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
4016 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
4017 !(pd & IO_MEM_ROMD)) {
4018 /* do nothing */
4019 } else {
4020 unsigned long addr1;
4021 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4022 /* ROM/RAM case */
4023 ptr = qemu_get_ram_ptr(addr1);
4024 memcpy(ptr, buf, l);
4025 qemu_put_ram_ptr(ptr);
4027 len -= l;
4028 buf += l;
4029 addr += l;
4033 typedef struct {
4034 void *buffer;
4035 target_phys_addr_t addr;
4036 target_phys_addr_t len;
4037 } BounceBuffer;
4039 static BounceBuffer bounce;
4041 typedef struct MapClient {
4042 void *opaque;
4043 void (*callback)(void *opaque);
4044 QLIST_ENTRY(MapClient) link;
4045 } MapClient;
4047 static QLIST_HEAD(map_client_list, MapClient) map_client_list
4048 = QLIST_HEAD_INITIALIZER(map_client_list);
4050 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
4052 MapClient *client = g_malloc(sizeof(*client));
4054 client->opaque = opaque;
4055 client->callback = callback;
4056 QLIST_INSERT_HEAD(&map_client_list, client, link);
4057 return client;
4060 void cpu_unregister_map_client(void *_client)
4062 MapClient *client = (MapClient *)_client;
4064 QLIST_REMOVE(client, link);
4065 g_free(client);
4068 static void cpu_notify_map_clients(void)
4070 MapClient *client;
4072 while (!QLIST_EMPTY(&map_client_list)) {
4073 client = QLIST_FIRST(&map_client_list);
4074 client->callback(client->opaque);
4075 cpu_unregister_map_client(client);
4079 /* Map a physical memory region into a host virtual address.
4080 * May map a subset of the requested range, given by and returned in *plen.
4081 * May return NULL if resources needed to perform the mapping are exhausted.
4082 * Use only for reads OR writes - not for read-modify-write operations.
4083 * Use cpu_register_map_client() to know when retrying the map operation is
4084 * likely to succeed.
4086 void *cpu_physical_memory_map(target_phys_addr_t addr,
4087 target_phys_addr_t *plen,
4088 int is_write)
4090 target_phys_addr_t len = *plen;
4091 target_phys_addr_t todo = 0;
4092 int l;
4093 target_phys_addr_t page;
4094 unsigned long pd;
4095 PhysPageDesc *p;
4096 ram_addr_t raddr = RAM_ADDR_MAX;
4097 ram_addr_t rlen;
4098 void *ret;
4100 while (len > 0) {
4101 page = addr & TARGET_PAGE_MASK;
4102 l = (page + TARGET_PAGE_SIZE) - addr;
4103 if (l > len)
4104 l = len;
4105 p = phys_page_find(page >> TARGET_PAGE_BITS);
4106 if (!p) {
4107 pd = IO_MEM_UNASSIGNED;
4108 } else {
4109 pd = p->phys_offset;
4112 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4113 if (todo || bounce.buffer) {
4114 break;
4116 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4117 bounce.addr = addr;
4118 bounce.len = l;
4119 if (!is_write) {
4120 cpu_physical_memory_read(addr, bounce.buffer, l);
4123 *plen = l;
4124 return bounce.buffer;
4126 if (!todo) {
4127 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4130 len -= l;
4131 addr += l;
4132 todo += l;
4134 rlen = todo;
4135 ret = qemu_ram_ptr_length(raddr, &rlen);
4136 *plen = rlen;
4137 return ret;
4140 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4141 * Will also mark the memory as dirty if is_write == 1. access_len gives
4142 * the amount of memory that was actually read or written by the caller.
4144 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4145 int is_write, target_phys_addr_t access_len)
4147 if (buffer != bounce.buffer) {
4148 if (is_write) {
4149 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4150 while (access_len) {
4151 unsigned l;
4152 l = TARGET_PAGE_SIZE;
4153 if (l > access_len)
4154 l = access_len;
4155 if (!cpu_physical_memory_is_dirty(addr1)) {
4156 /* invalidate code */
4157 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4158 /* set dirty bit */
4159 cpu_physical_memory_set_dirty_flags(
4160 addr1, (0xff & ~CODE_DIRTY_FLAG));
4162 addr1 += l;
4163 access_len -= l;
4166 if (xen_enabled()) {
4167 xen_invalidate_map_cache_entry(buffer);
4169 return;
4171 if (is_write) {
4172 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4174 qemu_vfree(bounce.buffer);
4175 bounce.buffer = NULL;
4176 cpu_notify_map_clients();
4179 /* warning: addr must be aligned */
4180 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4181 enum device_endian endian)
4183 int io_index;
4184 uint8_t *ptr;
4185 uint32_t val;
4186 unsigned long pd;
4187 PhysPageDesc *p;
4189 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4190 if (!p) {
4191 pd = IO_MEM_UNASSIGNED;
4192 } else {
4193 pd = p->phys_offset;
4196 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4197 !(pd & IO_MEM_ROMD)) {
4198 /* I/O case */
4199 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4200 if (p)
4201 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4202 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4203 #if defined(TARGET_WORDS_BIGENDIAN)
4204 if (endian == DEVICE_LITTLE_ENDIAN) {
4205 val = bswap32(val);
4207 #else
4208 if (endian == DEVICE_BIG_ENDIAN) {
4209 val = bswap32(val);
4211 #endif
4212 } else {
4213 /* RAM case */
4214 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4215 (addr & ~TARGET_PAGE_MASK);
4216 switch (endian) {
4217 case DEVICE_LITTLE_ENDIAN:
4218 val = ldl_le_p(ptr);
4219 break;
4220 case DEVICE_BIG_ENDIAN:
4221 val = ldl_be_p(ptr);
4222 break;
4223 default:
4224 val = ldl_p(ptr);
4225 break;
4228 return val;
4231 uint32_t ldl_phys(target_phys_addr_t addr)
4233 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4236 uint32_t ldl_le_phys(target_phys_addr_t addr)
4238 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4241 uint32_t ldl_be_phys(target_phys_addr_t addr)
4243 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4246 /* warning: addr must be aligned */
4247 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4248 enum device_endian endian)
4250 int io_index;
4251 uint8_t *ptr;
4252 uint64_t val;
4253 unsigned long pd;
4254 PhysPageDesc *p;
4256 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4257 if (!p) {
4258 pd = IO_MEM_UNASSIGNED;
4259 } else {
4260 pd = p->phys_offset;
4263 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4264 !(pd & IO_MEM_ROMD)) {
4265 /* I/O case */
4266 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4267 if (p)
4268 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4270 /* XXX This is broken when device endian != cpu endian.
4271 Fix and add "endian" variable check */
4272 #ifdef TARGET_WORDS_BIGENDIAN
4273 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4274 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4275 #else
4276 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4277 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4278 #endif
4279 } else {
4280 /* RAM case */
4281 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4282 (addr & ~TARGET_PAGE_MASK);
4283 switch (endian) {
4284 case DEVICE_LITTLE_ENDIAN:
4285 val = ldq_le_p(ptr);
4286 break;
4287 case DEVICE_BIG_ENDIAN:
4288 val = ldq_be_p(ptr);
4289 break;
4290 default:
4291 val = ldq_p(ptr);
4292 break;
4295 return val;
4298 uint64_t ldq_phys(target_phys_addr_t addr)
4300 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4303 uint64_t ldq_le_phys(target_phys_addr_t addr)
4305 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4308 uint64_t ldq_be_phys(target_phys_addr_t addr)
4310 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4313 /* XXX: optimize */
4314 uint32_t ldub_phys(target_phys_addr_t addr)
4316 uint8_t val;
4317 cpu_physical_memory_read(addr, &val, 1);
4318 return val;
4321 /* warning: addr must be aligned */
4322 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4323 enum device_endian endian)
4325 int io_index;
4326 uint8_t *ptr;
4327 uint64_t val;
4328 unsigned long pd;
4329 PhysPageDesc *p;
4331 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4332 if (!p) {
4333 pd = IO_MEM_UNASSIGNED;
4334 } else {
4335 pd = p->phys_offset;
4338 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4339 !(pd & IO_MEM_ROMD)) {
4340 /* I/O case */
4341 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4342 if (p)
4343 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4344 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4345 #if defined(TARGET_WORDS_BIGENDIAN)
4346 if (endian == DEVICE_LITTLE_ENDIAN) {
4347 val = bswap16(val);
4349 #else
4350 if (endian == DEVICE_BIG_ENDIAN) {
4351 val = bswap16(val);
4353 #endif
4354 } else {
4355 /* RAM case */
4356 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4357 (addr & ~TARGET_PAGE_MASK);
4358 switch (endian) {
4359 case DEVICE_LITTLE_ENDIAN:
4360 val = lduw_le_p(ptr);
4361 break;
4362 case DEVICE_BIG_ENDIAN:
4363 val = lduw_be_p(ptr);
4364 break;
4365 default:
4366 val = lduw_p(ptr);
4367 break;
4370 return val;
4373 uint32_t lduw_phys(target_phys_addr_t addr)
4375 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4378 uint32_t lduw_le_phys(target_phys_addr_t addr)
4380 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4383 uint32_t lduw_be_phys(target_phys_addr_t addr)
4385 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4388 /* warning: addr must be aligned. The ram page is not masked as dirty
4389 and the code inside is not invalidated. It is useful if the dirty
4390 bits are used to track modified PTEs */
4391 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4393 int io_index;
4394 uint8_t *ptr;
4395 unsigned long pd;
4396 PhysPageDesc *p;
4398 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4399 if (!p) {
4400 pd = IO_MEM_UNASSIGNED;
4401 } else {
4402 pd = p->phys_offset;
4405 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4406 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4407 if (p)
4408 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4409 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4410 } else {
4411 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4412 ptr = qemu_get_ram_ptr(addr1);
4413 stl_p(ptr, val);
4415 if (unlikely(in_migration)) {
4416 if (!cpu_physical_memory_is_dirty(addr1)) {
4417 /* invalidate code */
4418 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4419 /* set dirty bit */
4420 cpu_physical_memory_set_dirty_flags(
4421 addr1, (0xff & ~CODE_DIRTY_FLAG));
4427 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4429 int io_index;
4430 uint8_t *ptr;
4431 unsigned long pd;
4432 PhysPageDesc *p;
4434 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4435 if (!p) {
4436 pd = IO_MEM_UNASSIGNED;
4437 } else {
4438 pd = p->phys_offset;
4441 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4442 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4443 if (p)
4444 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4445 #ifdef TARGET_WORDS_BIGENDIAN
4446 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4447 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4448 #else
4449 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4450 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4451 #endif
4452 } else {
4453 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4454 (addr & ~TARGET_PAGE_MASK);
4455 stq_p(ptr, val);
4459 /* warning: addr must be aligned */
4460 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4461 enum device_endian endian)
4463 int io_index;
4464 uint8_t *ptr;
4465 unsigned long pd;
4466 PhysPageDesc *p;
4468 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4469 if (!p) {
4470 pd = IO_MEM_UNASSIGNED;
4471 } else {
4472 pd = p->phys_offset;
4475 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4476 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4477 if (p)
4478 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4479 #if defined(TARGET_WORDS_BIGENDIAN)
4480 if (endian == DEVICE_LITTLE_ENDIAN) {
4481 val = bswap32(val);
4483 #else
4484 if (endian == DEVICE_BIG_ENDIAN) {
4485 val = bswap32(val);
4487 #endif
4488 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4489 } else {
4490 unsigned long addr1;
4491 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4492 /* RAM case */
4493 ptr = qemu_get_ram_ptr(addr1);
4494 switch (endian) {
4495 case DEVICE_LITTLE_ENDIAN:
4496 stl_le_p(ptr, val);
4497 break;
4498 case DEVICE_BIG_ENDIAN:
4499 stl_be_p(ptr, val);
4500 break;
4501 default:
4502 stl_p(ptr, val);
4503 break;
4505 if (!cpu_physical_memory_is_dirty(addr1)) {
4506 /* invalidate code */
4507 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4508 /* set dirty bit */
4509 cpu_physical_memory_set_dirty_flags(addr1,
4510 (0xff & ~CODE_DIRTY_FLAG));
4515 void stl_phys(target_phys_addr_t addr, uint32_t val)
4517 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4520 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4522 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4525 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4527 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4530 /* XXX: optimize */
4531 void stb_phys(target_phys_addr_t addr, uint32_t val)
4533 uint8_t v = val;
4534 cpu_physical_memory_write(addr, &v, 1);
4537 /* warning: addr must be aligned */
4538 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4539 enum device_endian endian)
4541 int io_index;
4542 uint8_t *ptr;
4543 unsigned long pd;
4544 PhysPageDesc *p;
4546 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4547 if (!p) {
4548 pd = IO_MEM_UNASSIGNED;
4549 } else {
4550 pd = p->phys_offset;
4553 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4554 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4555 if (p)
4556 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4557 #if defined(TARGET_WORDS_BIGENDIAN)
4558 if (endian == DEVICE_LITTLE_ENDIAN) {
4559 val = bswap16(val);
4561 #else
4562 if (endian == DEVICE_BIG_ENDIAN) {
4563 val = bswap16(val);
4565 #endif
4566 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4567 } else {
4568 unsigned long addr1;
4569 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4570 /* RAM case */
4571 ptr = qemu_get_ram_ptr(addr1);
4572 switch (endian) {
4573 case DEVICE_LITTLE_ENDIAN:
4574 stw_le_p(ptr, val);
4575 break;
4576 case DEVICE_BIG_ENDIAN:
4577 stw_be_p(ptr, val);
4578 break;
4579 default:
4580 stw_p(ptr, val);
4581 break;
4583 if (!cpu_physical_memory_is_dirty(addr1)) {
4584 /* invalidate code */
4585 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4586 /* set dirty bit */
4587 cpu_physical_memory_set_dirty_flags(addr1,
4588 (0xff & ~CODE_DIRTY_FLAG));
4593 void stw_phys(target_phys_addr_t addr, uint32_t val)
4595 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4598 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4600 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4603 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4605 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4608 /* XXX: optimize */
4609 void stq_phys(target_phys_addr_t addr, uint64_t val)
4611 val = tswap64(val);
4612 cpu_physical_memory_write(addr, &val, 8);
4615 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4617 val = cpu_to_le64(val);
4618 cpu_physical_memory_write(addr, &val, 8);
4621 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4623 val = cpu_to_be64(val);
4624 cpu_physical_memory_write(addr, &val, 8);
4627 /* virtual memory access for debug (includes writing to ROM) */
4628 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4629 uint8_t *buf, int len, int is_write)
4631 int l;
4632 target_phys_addr_t phys_addr;
4633 target_ulong page;
4635 while (len > 0) {
4636 page = addr & TARGET_PAGE_MASK;
4637 phys_addr = cpu_get_phys_page_debug(env, page);
4638 /* if no physical page mapped, return an error */
4639 if (phys_addr == -1)
4640 return -1;
4641 l = (page + TARGET_PAGE_SIZE) - addr;
4642 if (l > len)
4643 l = len;
4644 phys_addr += (addr & ~TARGET_PAGE_MASK);
4645 if (is_write)
4646 cpu_physical_memory_write_rom(phys_addr, buf, l);
4647 else
4648 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4649 len -= l;
4650 buf += l;
4651 addr += l;
4653 return 0;
4655 #endif
4657 /* in deterministic execution mode, instructions doing device I/Os
4658 must be at the end of the TB */
4659 void cpu_io_recompile(CPUState *env, void *retaddr)
4661 TranslationBlock *tb;
4662 uint32_t n, cflags;
4663 target_ulong pc, cs_base;
4664 uint64_t flags;
4666 tb = tb_find_pc((unsigned long)retaddr);
4667 if (!tb) {
4668 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4669 retaddr);
4671 n = env->icount_decr.u16.low + tb->icount;
4672 cpu_restore_state(tb, env, (unsigned long)retaddr);
4673 /* Calculate how many instructions had been executed before the fault
4674 occurred. */
4675 n = n - env->icount_decr.u16.low;
4676 /* Generate a new TB ending on the I/O insn. */
4677 n++;
4678 /* On MIPS and SH, delay slot instructions can only be restarted if
4679 they were already the first instruction in the TB. If this is not
4680 the first instruction in a TB then re-execute the preceding
4681 branch. */
4682 #if defined(TARGET_MIPS)
4683 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4684 env->active_tc.PC -= 4;
4685 env->icount_decr.u16.low++;
4686 env->hflags &= ~MIPS_HFLAG_BMASK;
4688 #elif defined(TARGET_SH4)
4689 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4690 && n > 1) {
4691 env->pc -= 2;
4692 env->icount_decr.u16.low++;
4693 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4695 #endif
4696 /* This should never happen. */
4697 if (n > CF_COUNT_MASK)
4698 cpu_abort(env, "TB too big during recompile");
4700 cflags = n | CF_LAST_IO;
4701 pc = tb->pc;
4702 cs_base = tb->cs_base;
4703 flags = tb->flags;
4704 tb_phys_invalidate(tb, -1);
4705 /* FIXME: In theory this could raise an exception. In practice
4706 we have already translated the block once so it's probably ok. */
4707 tb_gen_code(env, pc, cs_base, flags, cflags);
4708 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4709 the first in the TB) then we end up generating a whole new TB and
4710 repeating the fault, which is horribly inefficient.
4711 Better would be to execute just this insn uncached, or generate a
4712 second new TB. */
4713 cpu_resume_from_signal(env, NULL);
4716 #if !defined(CONFIG_USER_ONLY)
4718 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4720 int i, target_code_size, max_target_code_size;
4721 int direct_jmp_count, direct_jmp2_count, cross_page;
4722 TranslationBlock *tb;
4724 target_code_size = 0;
4725 max_target_code_size = 0;
4726 cross_page = 0;
4727 direct_jmp_count = 0;
4728 direct_jmp2_count = 0;
4729 for(i = 0; i < nb_tbs; i++) {
4730 tb = &tbs[i];
4731 target_code_size += tb->size;
4732 if (tb->size > max_target_code_size)
4733 max_target_code_size = tb->size;
4734 if (tb->page_addr[1] != -1)
4735 cross_page++;
4736 if (tb->tb_next_offset[0] != 0xffff) {
4737 direct_jmp_count++;
4738 if (tb->tb_next_offset[1] != 0xffff) {
4739 direct_jmp2_count++;
4743 /* XXX: avoid using doubles ? */
4744 cpu_fprintf(f, "Translation buffer state:\n");
4745 cpu_fprintf(f, "gen code size %td/%ld\n",
4746 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4747 cpu_fprintf(f, "TB count %d/%d\n",
4748 nb_tbs, code_gen_max_blocks);
4749 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4750 nb_tbs ? target_code_size / nb_tbs : 0,
4751 max_target_code_size);
4752 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4753 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4754 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4755 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4756 cross_page,
4757 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4758 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4759 direct_jmp_count,
4760 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4761 direct_jmp2_count,
4762 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4763 cpu_fprintf(f, "\nStatistics:\n");
4764 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4765 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4766 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4767 tcg_dump_info(f, cpu_fprintf);
4770 #define MMUSUFFIX _cmmu
4771 #undef GETPC
4772 #define GETPC() NULL
4773 #define env cpu_single_env
4774 #define SOFTMMU_CODE_ACCESS
4776 #define SHIFT 0
4777 #include "softmmu_template.h"
4779 #define SHIFT 1
4780 #include "softmmu_template.h"
4782 #define SHIFT 2
4783 #include "softmmu_template.h"
4785 #define SHIFT 3
4786 #include "softmmu_template.h"
4788 #undef env
4790 #endif