Sort RAMBlocks by ID for migration, not by ram_addr
[qemu-kvm.git] / exec.c
bloba4116d91cf07cbf3f2b32797f02713be63085e19
1 /*
2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "osdep.h"
33 #include "kvm.h"
34 #include "hw/xen.h"
35 #include "qemu-timer.h"
36 #include "memory.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
39 #include <qemu.h>
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
45 #include <sys/time.h>
46 #include <sys/proc.h>
47 #include <machine/profile.h>
48 #define _KERNEL
49 #include <sys/user.h>
50 #undef _KERNEL
51 #undef sigqueue
52 #include <libutil.h>
53 #endif
54 #endif
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
57 #include "trace.h"
58 #endif
60 #define WANT_EXEC_OBSOLETE
61 #include "exec-obsolete.h"
63 //#define DEBUG_TB_INVALIDATE
64 //#define DEBUG_FLUSH
65 //#define DEBUG_TLB
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
70 //#define DEBUG_TLB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
77 #undef DEBUG_TB_CHECK
78 #endif
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 static TranslationBlock *tbs;
83 static int code_gen_max_blocks;
84 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 static int nb_tbs;
86 /* any access to the tbs or the page table must use this lock */
87 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
89 #if defined(__arm__) || defined(__sparc_v9__)
90 /* The prologue must be reachable with a direct jump. ARM and Sparc64
91 have limited branch ranges (possibly also PPC) so place it in a
92 section close to code segment. */
93 #define code_gen_section \
94 __attribute__((__section__(".gen_code"))) \
95 __attribute__((aligned (32)))
96 #elif defined(_WIN32)
97 /* Maximum alignment for Win32 is 16. */
98 #define code_gen_section \
99 __attribute__((aligned (16)))
100 #else
101 #define code_gen_section \
102 __attribute__((aligned (32)))
103 #endif
105 uint8_t code_gen_prologue[1024] code_gen_section;
106 static uint8_t *code_gen_buffer;
107 static unsigned long code_gen_buffer_size;
108 /* threshold to flush the translated code buffer */
109 static unsigned long code_gen_buffer_max_size;
110 static uint8_t *code_gen_ptr;
112 #if !defined(CONFIG_USER_ONLY)
113 int phys_ram_fd;
114 static int in_migration;
116 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
118 static MemoryRegion *system_memory;
119 static MemoryRegion *system_io;
121 #endif
123 CPUState *first_cpu;
124 /* current CPU in the current thread. It is only valid inside
125 cpu_exec() */
126 DEFINE_TLS(CPUState *,cpu_single_env);
127 /* 0 = Do not count executed instructions.
128 1 = Precise instruction counting.
129 2 = Adaptive rate instruction counting. */
130 int use_icount = 0;
132 typedef struct PageDesc {
133 /* list of TBs intersecting this ram page */
134 TranslationBlock *first_tb;
135 /* in order to optimize self modifying code, we count the number
136 of lookups we do to a given page to use a bitmap */
137 unsigned int code_write_count;
138 uint8_t *code_bitmap;
139 #if defined(CONFIG_USER_ONLY)
140 unsigned long flags;
141 #endif
142 } PageDesc;
144 /* In system mode we want L1_MAP to be based on ram offsets,
145 while in user mode we want it to be based on virtual addresses. */
146 #if !defined(CONFIG_USER_ONLY)
147 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
148 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
149 #else
150 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
151 #endif
152 #else
153 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
154 #endif
156 /* Size of the L2 (and L3, etc) page tables. */
157 #define L2_BITS 10
158 #define L2_SIZE (1 << L2_BITS)
160 /* The bits remaining after N lower levels of page tables. */
161 #define P_L1_BITS_REM \
162 ((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
163 #define V_L1_BITS_REM \
164 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
166 /* Size of the L1 page table. Avoid silly small sizes. */
167 #if P_L1_BITS_REM < 4
168 #define P_L1_BITS (P_L1_BITS_REM + L2_BITS)
169 #else
170 #define P_L1_BITS P_L1_BITS_REM
171 #endif
173 #if V_L1_BITS_REM < 4
174 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
175 #else
176 #define V_L1_BITS V_L1_BITS_REM
177 #endif
179 #define P_L1_SIZE ((target_phys_addr_t)1 << P_L1_BITS)
180 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
182 #define P_L1_SHIFT (TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - P_L1_BITS)
183 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
185 unsigned long qemu_real_host_page_size;
186 unsigned long qemu_host_page_size;
187 unsigned long qemu_host_page_mask;
189 /* This is a multi-level map on the virtual address space.
190 The bottom level has pointers to PageDesc. */
191 static void *l1_map[V_L1_SIZE];
193 #if !defined(CONFIG_USER_ONLY)
194 typedef struct PhysPageDesc {
195 /* offset in host memory of the page + io_index in the low bits */
196 ram_addr_t phys_offset;
197 ram_addr_t region_offset;
198 } PhysPageDesc;
200 /* This is a multi-level map on the physical address space.
201 The bottom level has pointers to PhysPageDesc. */
202 static void *l1_phys_map[P_L1_SIZE];
204 static void io_mem_init(void);
205 static void memory_map_init(void);
207 /* io memory support */
208 CPUWriteMemoryFunc *io_mem_write[IO_MEM_NB_ENTRIES][4];
209 CPUReadMemoryFunc *io_mem_read[IO_MEM_NB_ENTRIES][4];
210 void *io_mem_opaque[IO_MEM_NB_ENTRIES];
211 static char io_mem_used[IO_MEM_NB_ENTRIES];
212 static int io_mem_watch;
213 #endif
215 /* log support */
216 #ifdef WIN32
217 static const char *logfilename = "qemu.log";
218 #else
219 static const char *logfilename = "/tmp/qemu.log";
220 #endif
221 FILE *logfile;
222 int loglevel;
223 static int log_append = 0;
225 /* statistics */
226 #if !defined(CONFIG_USER_ONLY)
227 static int tlb_flush_count;
228 #endif
229 static int tb_flush_count;
230 static int tb_phys_invalidate_count;
232 #ifdef _WIN32
233 static void map_exec(void *addr, long size)
235 DWORD old_protect;
236 VirtualProtect(addr, size,
237 PAGE_EXECUTE_READWRITE, &old_protect);
240 #else
241 static void map_exec(void *addr, long size)
243 unsigned long start, end, page_size;
245 page_size = getpagesize();
246 start = (unsigned long)addr;
247 start &= ~(page_size - 1);
249 end = (unsigned long)addr + size;
250 end += page_size - 1;
251 end &= ~(page_size - 1);
253 mprotect((void *)start, end - start,
254 PROT_READ | PROT_WRITE | PROT_EXEC);
256 #endif
258 static void page_init(void)
260 /* NOTE: we can always suppose that qemu_host_page_size >=
261 TARGET_PAGE_SIZE */
262 #ifdef _WIN32
264 SYSTEM_INFO system_info;
266 GetSystemInfo(&system_info);
267 qemu_real_host_page_size = system_info.dwPageSize;
269 #else
270 qemu_real_host_page_size = getpagesize();
271 #endif
272 if (qemu_host_page_size == 0)
273 qemu_host_page_size = qemu_real_host_page_size;
274 if (qemu_host_page_size < TARGET_PAGE_SIZE)
275 qemu_host_page_size = TARGET_PAGE_SIZE;
276 qemu_host_page_mask = ~(qemu_host_page_size - 1);
278 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
280 #ifdef HAVE_KINFO_GETVMMAP
281 struct kinfo_vmentry *freep;
282 int i, cnt;
284 freep = kinfo_getvmmap(getpid(), &cnt);
285 if (freep) {
286 mmap_lock();
287 for (i = 0; i < cnt; i++) {
288 unsigned long startaddr, endaddr;
290 startaddr = freep[i].kve_start;
291 endaddr = freep[i].kve_end;
292 if (h2g_valid(startaddr)) {
293 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
295 if (h2g_valid(endaddr)) {
296 endaddr = h2g(endaddr);
297 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
298 } else {
299 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
300 endaddr = ~0ul;
301 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
302 #endif
306 free(freep);
307 mmap_unlock();
309 #else
310 FILE *f;
312 last_brk = (unsigned long)sbrk(0);
314 f = fopen("/compat/linux/proc/self/maps", "r");
315 if (f) {
316 mmap_lock();
318 do {
319 unsigned long startaddr, endaddr;
320 int n;
322 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
324 if (n == 2 && h2g_valid(startaddr)) {
325 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
327 if (h2g_valid(endaddr)) {
328 endaddr = h2g(endaddr);
329 } else {
330 endaddr = ~0ul;
332 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
334 } while (!feof(f));
336 fclose(f);
337 mmap_unlock();
339 #endif
341 #endif
344 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
346 PageDesc *pd;
347 void **lp;
348 int i;
350 #if defined(CONFIG_USER_ONLY)
351 /* We can't use g_malloc because it may recurse into a locked mutex. */
352 # define ALLOC(P, SIZE) \
353 do { \
354 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
355 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
356 } while (0)
357 #else
358 # define ALLOC(P, SIZE) \
359 do { P = g_malloc0(SIZE); } while (0)
360 #endif
362 /* Level 1. Always allocated. */
363 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
365 /* Level 2..N-1. */
366 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
367 void **p = *lp;
369 if (p == NULL) {
370 if (!alloc) {
371 return NULL;
373 ALLOC(p, sizeof(void *) * L2_SIZE);
374 *lp = p;
377 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
380 pd = *lp;
381 if (pd == NULL) {
382 if (!alloc) {
383 return NULL;
385 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
386 *lp = pd;
389 #undef ALLOC
391 return pd + (index & (L2_SIZE - 1));
394 static inline PageDesc *page_find(tb_page_addr_t index)
396 return page_find_alloc(index, 0);
399 #if !defined(CONFIG_USER_ONLY)
400 static PhysPageDesc *phys_page_find_alloc(target_phys_addr_t index, int alloc)
402 PhysPageDesc *pd;
403 void **lp;
404 int i;
406 /* Level 1. Always allocated. */
407 lp = l1_phys_map + ((index >> P_L1_SHIFT) & (P_L1_SIZE - 1));
409 /* Level 2..N-1. */
410 for (i = P_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
411 void **p = *lp;
412 if (p == NULL) {
413 if (!alloc) {
414 return NULL;
416 *lp = p = g_malloc0(sizeof(void *) * L2_SIZE);
418 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
421 pd = *lp;
422 if (pd == NULL) {
423 int i;
424 int first_index = index & ~(L2_SIZE - 1);
426 if (!alloc) {
427 return NULL;
430 *lp = pd = g_malloc(sizeof(PhysPageDesc) * L2_SIZE);
432 for (i = 0; i < L2_SIZE; i++) {
433 pd[i].phys_offset = IO_MEM_UNASSIGNED;
434 pd[i].region_offset = (first_index + i) << TARGET_PAGE_BITS;
438 return pd + (index & (L2_SIZE - 1));
441 static inline PhysPageDesc *phys_page_find(target_phys_addr_t index)
443 return phys_page_find_alloc(index, 0);
446 static void tlb_protect_code(ram_addr_t ram_addr);
447 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
448 target_ulong vaddr);
449 #define mmap_lock() do { } while(0)
450 #define mmap_unlock() do { } while(0)
451 #endif
453 #define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
455 #if defined(CONFIG_USER_ONLY)
456 /* Currently it is not recommended to allocate big chunks of data in
457 user mode. It will change when a dedicated libc will be used */
458 #define USE_STATIC_CODE_GEN_BUFFER
459 #endif
461 #ifdef USE_STATIC_CODE_GEN_BUFFER
462 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
463 __attribute__((aligned (CODE_GEN_ALIGN)));
464 #endif
466 static void code_gen_alloc(unsigned long tb_size)
468 #ifdef USE_STATIC_CODE_GEN_BUFFER
469 code_gen_buffer = static_code_gen_buffer;
470 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
471 map_exec(code_gen_buffer, code_gen_buffer_size);
472 #else
473 code_gen_buffer_size = tb_size;
474 if (code_gen_buffer_size == 0) {
475 #if defined(CONFIG_USER_ONLY)
476 code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
477 #else
478 /* XXX: needs adjustments */
479 code_gen_buffer_size = (unsigned long)(ram_size / 4);
480 #endif
482 if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
483 code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
484 /* The code gen buffer location may have constraints depending on
485 the host cpu and OS */
486 #if defined(__linux__)
488 int flags;
489 void *start = NULL;
491 flags = MAP_PRIVATE | MAP_ANONYMOUS;
492 #if defined(__x86_64__)
493 flags |= MAP_32BIT;
494 /* Cannot map more than that */
495 if (code_gen_buffer_size > (800 * 1024 * 1024))
496 code_gen_buffer_size = (800 * 1024 * 1024);
497 #elif defined(__sparc_v9__)
498 // Map the buffer below 2G, so we can use direct calls and branches
499 flags |= MAP_FIXED;
500 start = (void *) 0x60000000UL;
501 if (code_gen_buffer_size > (512 * 1024 * 1024))
502 code_gen_buffer_size = (512 * 1024 * 1024);
503 #elif defined(__arm__)
504 /* Keep the buffer no bigger than 16GB to branch between blocks */
505 if (code_gen_buffer_size > 16 * 1024 * 1024)
506 code_gen_buffer_size = 16 * 1024 * 1024;
507 #elif defined(__s390x__)
508 /* Map the buffer so that we can use direct calls and branches. */
509 /* We have a +- 4GB range on the branches; leave some slop. */
510 if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
511 code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
513 start = (void *)0x90000000UL;
514 #endif
515 code_gen_buffer = mmap(start, code_gen_buffer_size,
516 PROT_WRITE | PROT_READ | PROT_EXEC,
517 flags, -1, 0);
518 if (code_gen_buffer == MAP_FAILED) {
519 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
520 exit(1);
523 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
524 || defined(__DragonFly__) || defined(__OpenBSD__) \
525 || defined(__NetBSD__)
527 int flags;
528 void *addr = NULL;
529 flags = MAP_PRIVATE | MAP_ANONYMOUS;
530 #if defined(__x86_64__)
531 /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
532 * 0x40000000 is free */
533 flags |= MAP_FIXED;
534 addr = (void *)0x40000000;
535 /* Cannot map more than that */
536 if (code_gen_buffer_size > (800 * 1024 * 1024))
537 code_gen_buffer_size = (800 * 1024 * 1024);
538 #elif defined(__sparc_v9__)
539 // Map the buffer below 2G, so we can use direct calls and branches
540 flags |= MAP_FIXED;
541 addr = (void *) 0x60000000UL;
542 if (code_gen_buffer_size > (512 * 1024 * 1024)) {
543 code_gen_buffer_size = (512 * 1024 * 1024);
545 #endif
546 code_gen_buffer = mmap(addr, code_gen_buffer_size,
547 PROT_WRITE | PROT_READ | PROT_EXEC,
548 flags, -1, 0);
549 if (code_gen_buffer == MAP_FAILED) {
550 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
551 exit(1);
554 #else
555 code_gen_buffer = g_malloc(code_gen_buffer_size);
556 map_exec(code_gen_buffer, code_gen_buffer_size);
557 #endif
558 #endif /* !USE_STATIC_CODE_GEN_BUFFER */
559 map_exec(code_gen_prologue, sizeof(code_gen_prologue));
560 code_gen_buffer_max_size = code_gen_buffer_size -
561 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
562 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
563 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
566 /* Must be called before using the QEMU cpus. 'tb_size' is the size
567 (in bytes) allocated to the translation buffer. Zero means default
568 size. */
569 void tcg_exec_init(unsigned long tb_size)
571 cpu_gen_init();
572 code_gen_alloc(tb_size);
573 code_gen_ptr = code_gen_buffer;
574 page_init();
575 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
576 /* There's no guest base to take into account, so go ahead and
577 initialize the prologue now. */
578 tcg_prologue_init(&tcg_ctx);
579 #endif
582 bool tcg_enabled(void)
584 return code_gen_buffer != NULL;
587 void cpu_exec_init_all(void)
589 #if !defined(CONFIG_USER_ONLY)
590 memory_map_init();
591 io_mem_init();
592 #endif
595 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
597 static int cpu_common_post_load(void *opaque, int version_id)
599 CPUState *env = opaque;
601 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
602 version_id is increased. */
603 env->interrupt_request &= ~0x01;
604 tlb_flush(env, 1);
606 return 0;
609 static const VMStateDescription vmstate_cpu_common = {
610 .name = "cpu_common",
611 .version_id = 1,
612 .minimum_version_id = 1,
613 .minimum_version_id_old = 1,
614 .post_load = cpu_common_post_load,
615 .fields = (VMStateField []) {
616 VMSTATE_UINT32(halted, CPUState),
617 VMSTATE_UINT32(interrupt_request, CPUState),
618 VMSTATE_END_OF_LIST()
621 #endif
623 CPUState *qemu_get_cpu(int cpu)
625 CPUState *env = first_cpu;
627 while (env) {
628 if (env->cpu_index == cpu)
629 break;
630 env = env->next_cpu;
633 return env;
636 void cpu_exec_init(CPUState *env)
638 CPUState **penv;
639 int cpu_index;
641 #if defined(CONFIG_USER_ONLY)
642 cpu_list_lock();
643 #endif
644 env->next_cpu = NULL;
645 penv = &first_cpu;
646 cpu_index = 0;
647 while (*penv != NULL) {
648 penv = &(*penv)->next_cpu;
649 cpu_index++;
651 env->cpu_index = cpu_index;
652 env->numa_node = 0;
653 QTAILQ_INIT(&env->breakpoints);
654 QTAILQ_INIT(&env->watchpoints);
655 #ifndef CONFIG_USER_ONLY
656 env->thread_id = qemu_get_thread_id();
657 #endif
658 *penv = env;
659 #if defined(CONFIG_USER_ONLY)
660 cpu_list_unlock();
661 #endif
662 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
663 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
664 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
665 cpu_save, cpu_load, env);
666 #endif
669 /* Allocate a new translation block. Flush the translation buffer if
670 too many translation blocks or too much generated code. */
671 static TranslationBlock *tb_alloc(target_ulong pc)
673 TranslationBlock *tb;
675 if (nb_tbs >= code_gen_max_blocks ||
676 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
677 return NULL;
678 tb = &tbs[nb_tbs++];
679 tb->pc = pc;
680 tb->cflags = 0;
681 return tb;
684 void tb_free(TranslationBlock *tb)
686 /* In practice this is mostly used for single use temporary TB
687 Ignore the hard cases and just back up if this TB happens to
688 be the last one generated. */
689 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
690 code_gen_ptr = tb->tc_ptr;
691 nb_tbs--;
695 static inline void invalidate_page_bitmap(PageDesc *p)
697 if (p->code_bitmap) {
698 g_free(p->code_bitmap);
699 p->code_bitmap = NULL;
701 p->code_write_count = 0;
704 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
706 static void page_flush_tb_1 (int level, void **lp)
708 int i;
710 if (*lp == NULL) {
711 return;
713 if (level == 0) {
714 PageDesc *pd = *lp;
715 for (i = 0; i < L2_SIZE; ++i) {
716 pd[i].first_tb = NULL;
717 invalidate_page_bitmap(pd + i);
719 } else {
720 void **pp = *lp;
721 for (i = 0; i < L2_SIZE; ++i) {
722 page_flush_tb_1 (level - 1, pp + i);
727 static void page_flush_tb(void)
729 int i;
730 for (i = 0; i < V_L1_SIZE; i++) {
731 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
735 /* flush all the translation blocks */
736 /* XXX: tb_flush is currently not thread safe */
737 void tb_flush(CPUState *env1)
739 CPUState *env;
740 #if defined(DEBUG_FLUSH)
741 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
742 (unsigned long)(code_gen_ptr - code_gen_buffer),
743 nb_tbs, nb_tbs > 0 ?
744 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
745 #endif
746 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
747 cpu_abort(env1, "Internal error: code buffer overflow\n");
749 nb_tbs = 0;
751 for(env = first_cpu; env != NULL; env = env->next_cpu) {
752 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
755 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
756 page_flush_tb();
758 code_gen_ptr = code_gen_buffer;
759 /* XXX: flush processor icache at this point if cache flush is
760 expensive */
761 tb_flush_count++;
764 #ifdef DEBUG_TB_CHECK
766 static void tb_invalidate_check(target_ulong address)
768 TranslationBlock *tb;
769 int i;
770 address &= TARGET_PAGE_MASK;
771 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
772 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
773 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
774 address >= tb->pc + tb->size)) {
775 printf("ERROR invalidate: address=" TARGET_FMT_lx
776 " PC=%08lx size=%04x\n",
777 address, (long)tb->pc, tb->size);
783 /* verify that all the pages have correct rights for code */
784 static void tb_page_check(void)
786 TranslationBlock *tb;
787 int i, flags1, flags2;
789 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
790 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
791 flags1 = page_get_flags(tb->pc);
792 flags2 = page_get_flags(tb->pc + tb->size - 1);
793 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
794 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
795 (long)tb->pc, tb->size, flags1, flags2);
801 #endif
803 /* invalidate one TB */
804 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
805 int next_offset)
807 TranslationBlock *tb1;
808 for(;;) {
809 tb1 = *ptb;
810 if (tb1 == tb) {
811 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
812 break;
814 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
818 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
820 TranslationBlock *tb1;
821 unsigned int n1;
823 for(;;) {
824 tb1 = *ptb;
825 n1 = (long)tb1 & 3;
826 tb1 = (TranslationBlock *)((long)tb1 & ~3);
827 if (tb1 == tb) {
828 *ptb = tb1->page_next[n1];
829 break;
831 ptb = &tb1->page_next[n1];
835 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
837 TranslationBlock *tb1, **ptb;
838 unsigned int n1;
840 ptb = &tb->jmp_next[n];
841 tb1 = *ptb;
842 if (tb1) {
843 /* find tb(n) in circular list */
844 for(;;) {
845 tb1 = *ptb;
846 n1 = (long)tb1 & 3;
847 tb1 = (TranslationBlock *)((long)tb1 & ~3);
848 if (n1 == n && tb1 == tb)
849 break;
850 if (n1 == 2) {
851 ptb = &tb1->jmp_first;
852 } else {
853 ptb = &tb1->jmp_next[n1];
856 /* now we can suppress tb(n) from the list */
857 *ptb = tb->jmp_next[n];
859 tb->jmp_next[n] = NULL;
863 /* reset the jump entry 'n' of a TB so that it is not chained to
864 another TB */
865 static inline void tb_reset_jump(TranslationBlock *tb, int n)
867 tb_set_jmp_target(tb, n, (unsigned long)(tb->tc_ptr + tb->tb_next_offset[n]));
870 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
872 CPUState *env;
873 PageDesc *p;
874 unsigned int h, n1;
875 tb_page_addr_t phys_pc;
876 TranslationBlock *tb1, *tb2;
878 /* remove the TB from the hash list */
879 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
880 h = tb_phys_hash_func(phys_pc);
881 tb_remove(&tb_phys_hash[h], tb,
882 offsetof(TranslationBlock, phys_hash_next));
884 /* remove the TB from the page list */
885 if (tb->page_addr[0] != page_addr) {
886 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
887 tb_page_remove(&p->first_tb, tb);
888 invalidate_page_bitmap(p);
890 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
891 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
892 tb_page_remove(&p->first_tb, tb);
893 invalidate_page_bitmap(p);
896 tb_invalidated_flag = 1;
898 /* remove the TB from the hash list */
899 h = tb_jmp_cache_hash_func(tb->pc);
900 for(env = first_cpu; env != NULL; env = env->next_cpu) {
901 if (env->tb_jmp_cache[h] == tb)
902 env->tb_jmp_cache[h] = NULL;
905 /* suppress this TB from the two jump lists */
906 tb_jmp_remove(tb, 0);
907 tb_jmp_remove(tb, 1);
909 /* suppress any remaining jumps to this TB */
910 tb1 = tb->jmp_first;
911 for(;;) {
912 n1 = (long)tb1 & 3;
913 if (n1 == 2)
914 break;
915 tb1 = (TranslationBlock *)((long)tb1 & ~3);
916 tb2 = tb1->jmp_next[n1];
917 tb_reset_jump(tb1, n1);
918 tb1->jmp_next[n1] = NULL;
919 tb1 = tb2;
921 tb->jmp_first = (TranslationBlock *)((long)tb | 2); /* fail safe */
923 tb_phys_invalidate_count++;
926 static inline void set_bits(uint8_t *tab, int start, int len)
928 int end, mask, end1;
930 end = start + len;
931 tab += start >> 3;
932 mask = 0xff << (start & 7);
933 if ((start & ~7) == (end & ~7)) {
934 if (start < end) {
935 mask &= ~(0xff << (end & 7));
936 *tab |= mask;
938 } else {
939 *tab++ |= mask;
940 start = (start + 8) & ~7;
941 end1 = end & ~7;
942 while (start < end1) {
943 *tab++ = 0xff;
944 start += 8;
946 if (start < end) {
947 mask = ~(0xff << (end & 7));
948 *tab |= mask;
953 static void build_page_bitmap(PageDesc *p)
955 int n, tb_start, tb_end;
956 TranslationBlock *tb;
958 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
960 tb = p->first_tb;
961 while (tb != NULL) {
962 n = (long)tb & 3;
963 tb = (TranslationBlock *)((long)tb & ~3);
964 /* NOTE: this is subtle as a TB may span two physical pages */
965 if (n == 0) {
966 /* NOTE: tb_end may be after the end of the page, but
967 it is not a problem */
968 tb_start = tb->pc & ~TARGET_PAGE_MASK;
969 tb_end = tb_start + tb->size;
970 if (tb_end > TARGET_PAGE_SIZE)
971 tb_end = TARGET_PAGE_SIZE;
972 } else {
973 tb_start = 0;
974 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
976 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
977 tb = tb->page_next[n];
981 TranslationBlock *tb_gen_code(CPUState *env,
982 target_ulong pc, target_ulong cs_base,
983 int flags, int cflags)
985 TranslationBlock *tb;
986 uint8_t *tc_ptr;
987 tb_page_addr_t phys_pc, phys_page2;
988 target_ulong virt_page2;
989 int code_gen_size;
991 phys_pc = get_page_addr_code(env, pc);
992 tb = tb_alloc(pc);
993 if (!tb) {
994 /* flush must be done */
995 tb_flush(env);
996 /* cannot fail at this point */
997 tb = tb_alloc(pc);
998 /* Don't forget to invalidate previous TB info. */
999 tb_invalidated_flag = 1;
1001 tc_ptr = code_gen_ptr;
1002 tb->tc_ptr = tc_ptr;
1003 tb->cs_base = cs_base;
1004 tb->flags = flags;
1005 tb->cflags = cflags;
1006 cpu_gen_code(env, tb, &code_gen_size);
1007 code_gen_ptr = (void *)(((unsigned long)code_gen_ptr + code_gen_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1009 /* check next page if needed */
1010 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1011 phys_page2 = -1;
1012 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1013 phys_page2 = get_page_addr_code(env, virt_page2);
1015 tb_link_page(tb, phys_pc, phys_page2);
1016 return tb;
1019 /* invalidate all TBs which intersect with the target physical page
1020 starting in range [start;end[. NOTE: start and end must refer to
1021 the same physical page. 'is_cpu_write_access' should be true if called
1022 from a real cpu write access: the virtual CPU will exit the current
1023 TB if code is modified inside this TB. */
1024 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1025 int is_cpu_write_access)
1027 TranslationBlock *tb, *tb_next, *saved_tb;
1028 CPUState *env = cpu_single_env;
1029 tb_page_addr_t tb_start, tb_end;
1030 PageDesc *p;
1031 int n;
1032 #ifdef TARGET_HAS_PRECISE_SMC
1033 int current_tb_not_found = is_cpu_write_access;
1034 TranslationBlock *current_tb = NULL;
1035 int current_tb_modified = 0;
1036 target_ulong current_pc = 0;
1037 target_ulong current_cs_base = 0;
1038 int current_flags = 0;
1039 #endif /* TARGET_HAS_PRECISE_SMC */
1041 p = page_find(start >> TARGET_PAGE_BITS);
1042 if (!p)
1043 return;
1044 if (!p->code_bitmap &&
1045 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1046 is_cpu_write_access) {
1047 /* build code bitmap */
1048 build_page_bitmap(p);
1051 /* we remove all the TBs in the range [start, end[ */
1052 /* XXX: see if in some cases it could be faster to invalidate all the code */
1053 tb = p->first_tb;
1054 while (tb != NULL) {
1055 n = (long)tb & 3;
1056 tb = (TranslationBlock *)((long)tb & ~3);
1057 tb_next = tb->page_next[n];
1058 /* NOTE: this is subtle as a TB may span two physical pages */
1059 if (n == 0) {
1060 /* NOTE: tb_end may be after the end of the page, but
1061 it is not a problem */
1062 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1063 tb_end = tb_start + tb->size;
1064 } else {
1065 tb_start = tb->page_addr[1];
1066 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1068 if (!(tb_end <= start || tb_start >= end)) {
1069 #ifdef TARGET_HAS_PRECISE_SMC
1070 if (current_tb_not_found) {
1071 current_tb_not_found = 0;
1072 current_tb = NULL;
1073 if (env->mem_io_pc) {
1074 /* now we have a real cpu fault */
1075 current_tb = tb_find_pc(env->mem_io_pc);
1078 if (current_tb == tb &&
1079 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1080 /* If we are modifying the current TB, we must stop
1081 its execution. We could be more precise by checking
1082 that the modification is after the current PC, but it
1083 would require a specialized function to partially
1084 restore the CPU state */
1086 current_tb_modified = 1;
1087 cpu_restore_state(current_tb, env, env->mem_io_pc);
1088 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1089 &current_flags);
1091 #endif /* TARGET_HAS_PRECISE_SMC */
1092 /* we need to do that to handle the case where a signal
1093 occurs while doing tb_phys_invalidate() */
1094 saved_tb = NULL;
1095 if (env) {
1096 saved_tb = env->current_tb;
1097 env->current_tb = NULL;
1099 tb_phys_invalidate(tb, -1);
1100 if (env) {
1101 env->current_tb = saved_tb;
1102 if (env->interrupt_request && env->current_tb)
1103 cpu_interrupt(env, env->interrupt_request);
1106 tb = tb_next;
1108 #if !defined(CONFIG_USER_ONLY)
1109 /* if no code remaining, no need to continue to use slow writes */
1110 if (!p->first_tb) {
1111 invalidate_page_bitmap(p);
1112 if (is_cpu_write_access) {
1113 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1116 #endif
1117 #ifdef TARGET_HAS_PRECISE_SMC
1118 if (current_tb_modified) {
1119 /* we generate a block containing just the instruction
1120 modifying the memory. It will ensure that it cannot modify
1121 itself */
1122 env->current_tb = NULL;
1123 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1124 cpu_resume_from_signal(env, NULL);
1126 #endif
1129 /* len must be <= 8 and start must be a multiple of len */
1130 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1132 PageDesc *p;
1133 int offset, b;
1134 #if 0
1135 if (1) {
1136 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1137 cpu_single_env->mem_io_vaddr, len,
1138 cpu_single_env->eip,
1139 cpu_single_env->eip + (long)cpu_single_env->segs[R_CS].base);
1141 #endif
1142 p = page_find(start >> TARGET_PAGE_BITS);
1143 if (!p)
1144 return;
1145 if (p->code_bitmap) {
1146 offset = start & ~TARGET_PAGE_MASK;
1147 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1148 if (b & ((1 << len) - 1))
1149 goto do_invalidate;
1150 } else {
1151 do_invalidate:
1152 tb_invalidate_phys_page_range(start, start + len, 1);
1156 #if !defined(CONFIG_SOFTMMU)
1157 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1158 unsigned long pc, void *puc)
1160 TranslationBlock *tb;
1161 PageDesc *p;
1162 int n;
1163 #ifdef TARGET_HAS_PRECISE_SMC
1164 TranslationBlock *current_tb = NULL;
1165 CPUState *env = cpu_single_env;
1166 int current_tb_modified = 0;
1167 target_ulong current_pc = 0;
1168 target_ulong current_cs_base = 0;
1169 int current_flags = 0;
1170 #endif
1172 addr &= TARGET_PAGE_MASK;
1173 p = page_find(addr >> TARGET_PAGE_BITS);
1174 if (!p)
1175 return;
1176 tb = p->first_tb;
1177 #ifdef TARGET_HAS_PRECISE_SMC
1178 if (tb && pc != 0) {
1179 current_tb = tb_find_pc(pc);
1181 #endif
1182 while (tb != NULL) {
1183 n = (long)tb & 3;
1184 tb = (TranslationBlock *)((long)tb & ~3);
1185 #ifdef TARGET_HAS_PRECISE_SMC
1186 if (current_tb == tb &&
1187 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1188 /* If we are modifying the current TB, we must stop
1189 its execution. We could be more precise by checking
1190 that the modification is after the current PC, but it
1191 would require a specialized function to partially
1192 restore the CPU state */
1194 current_tb_modified = 1;
1195 cpu_restore_state(current_tb, env, pc);
1196 cpu_get_tb_cpu_state(env, &current_pc, &current_cs_base,
1197 &current_flags);
1199 #endif /* TARGET_HAS_PRECISE_SMC */
1200 tb_phys_invalidate(tb, addr);
1201 tb = tb->page_next[n];
1203 p->first_tb = NULL;
1204 #ifdef TARGET_HAS_PRECISE_SMC
1205 if (current_tb_modified) {
1206 /* we generate a block containing just the instruction
1207 modifying the memory. It will ensure that it cannot modify
1208 itself */
1209 env->current_tb = NULL;
1210 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1211 cpu_resume_from_signal(env, puc);
1213 #endif
1215 #endif
1217 /* add the tb in the target page and protect it if necessary */
1218 static inline void tb_alloc_page(TranslationBlock *tb,
1219 unsigned int n, tb_page_addr_t page_addr)
1221 PageDesc *p;
1222 #ifndef CONFIG_USER_ONLY
1223 bool page_already_protected;
1224 #endif
1226 tb->page_addr[n] = page_addr;
1227 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1228 tb->page_next[n] = p->first_tb;
1229 #ifndef CONFIG_USER_ONLY
1230 page_already_protected = p->first_tb != NULL;
1231 #endif
1232 p->first_tb = (TranslationBlock *)((long)tb | n);
1233 invalidate_page_bitmap(p);
1235 #if defined(TARGET_HAS_SMC) || 1
1237 #if defined(CONFIG_USER_ONLY)
1238 if (p->flags & PAGE_WRITE) {
1239 target_ulong addr;
1240 PageDesc *p2;
1241 int prot;
1243 /* force the host page as non writable (writes will have a
1244 page fault + mprotect overhead) */
1245 page_addr &= qemu_host_page_mask;
1246 prot = 0;
1247 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1248 addr += TARGET_PAGE_SIZE) {
1250 p2 = page_find (addr >> TARGET_PAGE_BITS);
1251 if (!p2)
1252 continue;
1253 prot |= p2->flags;
1254 p2->flags &= ~PAGE_WRITE;
1256 mprotect(g2h(page_addr), qemu_host_page_size,
1257 (prot & PAGE_BITS) & ~PAGE_WRITE);
1258 #ifdef DEBUG_TB_INVALIDATE
1259 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1260 page_addr);
1261 #endif
1263 #else
1264 /* if some code is already present, then the pages are already
1265 protected. So we handle the case where only the first TB is
1266 allocated in a physical page */
1267 if (!page_already_protected) {
1268 tlb_protect_code(page_addr);
1270 #endif
1272 #endif /* TARGET_HAS_SMC */
1275 /* add a new TB and link it to the physical page tables. phys_page2 is
1276 (-1) to indicate that only one page contains the TB. */
1277 void tb_link_page(TranslationBlock *tb,
1278 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1280 unsigned int h;
1281 TranslationBlock **ptb;
1283 /* Grab the mmap lock to stop another thread invalidating this TB
1284 before we are done. */
1285 mmap_lock();
1286 /* add in the physical hash table */
1287 h = tb_phys_hash_func(phys_pc);
1288 ptb = &tb_phys_hash[h];
1289 tb->phys_hash_next = *ptb;
1290 *ptb = tb;
1292 /* add in the page list */
1293 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1294 if (phys_page2 != -1)
1295 tb_alloc_page(tb, 1, phys_page2);
1296 else
1297 tb->page_addr[1] = -1;
1299 tb->jmp_first = (TranslationBlock *)((long)tb | 2);
1300 tb->jmp_next[0] = NULL;
1301 tb->jmp_next[1] = NULL;
1303 /* init original jump addresses */
1304 if (tb->tb_next_offset[0] != 0xffff)
1305 tb_reset_jump(tb, 0);
1306 if (tb->tb_next_offset[1] != 0xffff)
1307 tb_reset_jump(tb, 1);
1309 #ifdef DEBUG_TB_CHECK
1310 tb_page_check();
1311 #endif
1312 mmap_unlock();
1315 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1316 tb[1].tc_ptr. Return NULL if not found */
1317 TranslationBlock *tb_find_pc(unsigned long tc_ptr)
1319 int m_min, m_max, m;
1320 unsigned long v;
1321 TranslationBlock *tb;
1323 if (nb_tbs <= 0)
1324 return NULL;
1325 if (tc_ptr < (unsigned long)code_gen_buffer ||
1326 tc_ptr >= (unsigned long)code_gen_ptr)
1327 return NULL;
1328 /* binary search (cf Knuth) */
1329 m_min = 0;
1330 m_max = nb_tbs - 1;
1331 while (m_min <= m_max) {
1332 m = (m_min + m_max) >> 1;
1333 tb = &tbs[m];
1334 v = (unsigned long)tb->tc_ptr;
1335 if (v == tc_ptr)
1336 return tb;
1337 else if (tc_ptr < v) {
1338 m_max = m - 1;
1339 } else {
1340 m_min = m + 1;
1343 return &tbs[m_max];
1346 static void tb_reset_jump_recursive(TranslationBlock *tb);
1348 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1350 TranslationBlock *tb1, *tb_next, **ptb;
1351 unsigned int n1;
1353 tb1 = tb->jmp_next[n];
1354 if (tb1 != NULL) {
1355 /* find head of list */
1356 for(;;) {
1357 n1 = (long)tb1 & 3;
1358 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1359 if (n1 == 2)
1360 break;
1361 tb1 = tb1->jmp_next[n1];
1363 /* we are now sure now that tb jumps to tb1 */
1364 tb_next = tb1;
1366 /* remove tb from the jmp_first list */
1367 ptb = &tb_next->jmp_first;
1368 for(;;) {
1369 tb1 = *ptb;
1370 n1 = (long)tb1 & 3;
1371 tb1 = (TranslationBlock *)((long)tb1 & ~3);
1372 if (n1 == n && tb1 == tb)
1373 break;
1374 ptb = &tb1->jmp_next[n1];
1376 *ptb = tb->jmp_next[n];
1377 tb->jmp_next[n] = NULL;
1379 /* suppress the jump to next tb in generated code */
1380 tb_reset_jump(tb, n);
1382 /* suppress jumps in the tb on which we could have jumped */
1383 tb_reset_jump_recursive(tb_next);
1387 static void tb_reset_jump_recursive(TranslationBlock *tb)
1389 tb_reset_jump_recursive2(tb, 0);
1390 tb_reset_jump_recursive2(tb, 1);
1393 #if defined(TARGET_HAS_ICE)
1394 #if defined(CONFIG_USER_ONLY)
1395 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1397 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1399 #else
1400 static void breakpoint_invalidate(CPUState *env, target_ulong pc)
1402 target_phys_addr_t addr;
1403 target_ulong pd;
1404 ram_addr_t ram_addr;
1405 PhysPageDesc *p;
1407 addr = cpu_get_phys_page_debug(env, pc);
1408 p = phys_page_find(addr >> TARGET_PAGE_BITS);
1409 if (!p) {
1410 pd = IO_MEM_UNASSIGNED;
1411 } else {
1412 pd = p->phys_offset;
1414 ram_addr = (pd & TARGET_PAGE_MASK) | (pc & ~TARGET_PAGE_MASK);
1415 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1417 #endif
1418 #endif /* TARGET_HAS_ICE */
1420 #if defined(CONFIG_USER_ONLY)
1421 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1426 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1427 int flags, CPUWatchpoint **watchpoint)
1429 return -ENOSYS;
1431 #else
1432 /* Add a watchpoint. */
1433 int cpu_watchpoint_insert(CPUState *env, target_ulong addr, target_ulong len,
1434 int flags, CPUWatchpoint **watchpoint)
1436 target_ulong len_mask = ~(len - 1);
1437 CPUWatchpoint *wp;
1439 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1440 if ((len != 1 && len != 2 && len != 4 && len != 8) || (addr & ~len_mask)) {
1441 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1442 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1443 return -EINVAL;
1445 wp = g_malloc(sizeof(*wp));
1447 wp->vaddr = addr;
1448 wp->len_mask = len_mask;
1449 wp->flags = flags;
1451 /* keep all GDB-injected watchpoints in front */
1452 if (flags & BP_GDB)
1453 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1454 else
1455 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1457 tlb_flush_page(env, addr);
1459 if (watchpoint)
1460 *watchpoint = wp;
1461 return 0;
1464 /* Remove a specific watchpoint. */
1465 int cpu_watchpoint_remove(CPUState *env, target_ulong addr, target_ulong len,
1466 int flags)
1468 target_ulong len_mask = ~(len - 1);
1469 CPUWatchpoint *wp;
1471 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1472 if (addr == wp->vaddr && len_mask == wp->len_mask
1473 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1474 cpu_watchpoint_remove_by_ref(env, wp);
1475 return 0;
1478 return -ENOENT;
1481 /* Remove a specific watchpoint by reference. */
1482 void cpu_watchpoint_remove_by_ref(CPUState *env, CPUWatchpoint *watchpoint)
1484 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1486 tlb_flush_page(env, watchpoint->vaddr);
1488 g_free(watchpoint);
1491 /* Remove all matching watchpoints. */
1492 void cpu_watchpoint_remove_all(CPUState *env, int mask)
1494 CPUWatchpoint *wp, *next;
1496 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1497 if (wp->flags & mask)
1498 cpu_watchpoint_remove_by_ref(env, wp);
1501 #endif
1503 /* Add a breakpoint. */
1504 int cpu_breakpoint_insert(CPUState *env, target_ulong pc, int flags,
1505 CPUBreakpoint **breakpoint)
1507 #if defined(TARGET_HAS_ICE)
1508 CPUBreakpoint *bp;
1510 bp = g_malloc(sizeof(*bp));
1512 bp->pc = pc;
1513 bp->flags = flags;
1515 /* keep all GDB-injected breakpoints in front */
1516 if (flags & BP_GDB)
1517 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1518 else
1519 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1521 breakpoint_invalidate(env, pc);
1523 if (breakpoint)
1524 *breakpoint = bp;
1525 return 0;
1526 #else
1527 return -ENOSYS;
1528 #endif
1531 /* Remove a specific breakpoint. */
1532 int cpu_breakpoint_remove(CPUState *env, target_ulong pc, int flags)
1534 #if defined(TARGET_HAS_ICE)
1535 CPUBreakpoint *bp;
1537 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1538 if (bp->pc == pc && bp->flags == flags) {
1539 cpu_breakpoint_remove_by_ref(env, bp);
1540 return 0;
1543 return -ENOENT;
1544 #else
1545 return -ENOSYS;
1546 #endif
1549 /* Remove a specific breakpoint by reference. */
1550 void cpu_breakpoint_remove_by_ref(CPUState *env, CPUBreakpoint *breakpoint)
1552 #if defined(TARGET_HAS_ICE)
1553 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1555 breakpoint_invalidate(env, breakpoint->pc);
1557 g_free(breakpoint);
1558 #endif
1561 /* Remove all matching breakpoints. */
1562 void cpu_breakpoint_remove_all(CPUState *env, int mask)
1564 #if defined(TARGET_HAS_ICE)
1565 CPUBreakpoint *bp, *next;
1567 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1568 if (bp->flags & mask)
1569 cpu_breakpoint_remove_by_ref(env, bp);
1571 #endif
1574 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1575 CPU loop after each instruction */
1576 void cpu_single_step(CPUState *env, int enabled)
1578 #if defined(TARGET_HAS_ICE)
1579 if (env->singlestep_enabled != enabled) {
1580 env->singlestep_enabled = enabled;
1581 if (kvm_enabled())
1582 kvm_update_guest_debug(env, 0);
1583 else {
1584 /* must flush all the translated code to avoid inconsistencies */
1585 /* XXX: only flush what is necessary */
1586 tb_flush(env);
1589 #endif
1592 /* enable or disable low levels log */
1593 void cpu_set_log(int log_flags)
1595 loglevel = log_flags;
1596 if (loglevel && !logfile) {
1597 logfile = fopen(logfilename, log_append ? "a" : "w");
1598 if (!logfile) {
1599 perror(logfilename);
1600 _exit(1);
1602 #if !defined(CONFIG_SOFTMMU)
1603 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
1605 static char logfile_buf[4096];
1606 setvbuf(logfile, logfile_buf, _IOLBF, sizeof(logfile_buf));
1608 #elif defined(_WIN32)
1609 /* Win32 doesn't support line-buffering, so use unbuffered output. */
1610 setvbuf(logfile, NULL, _IONBF, 0);
1611 #else
1612 setvbuf(logfile, NULL, _IOLBF, 0);
1613 #endif
1614 log_append = 1;
1616 if (!loglevel && logfile) {
1617 fclose(logfile);
1618 logfile = NULL;
1622 void cpu_set_log_filename(const char *filename)
1624 logfilename = strdup(filename);
1625 if (logfile) {
1626 fclose(logfile);
1627 logfile = NULL;
1629 cpu_set_log(loglevel);
1632 static void cpu_unlink_tb(CPUState *env)
1634 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1635 problem and hope the cpu will stop of its own accord. For userspace
1636 emulation this often isn't actually as bad as it sounds. Often
1637 signals are used primarily to interrupt blocking syscalls. */
1638 TranslationBlock *tb;
1639 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1641 spin_lock(&interrupt_lock);
1642 tb = env->current_tb;
1643 /* if the cpu is currently executing code, we must unlink it and
1644 all the potentially executing TB */
1645 if (tb) {
1646 env->current_tb = NULL;
1647 tb_reset_jump_recursive(tb);
1649 spin_unlock(&interrupt_lock);
1652 #ifndef CONFIG_USER_ONLY
1653 /* mask must never be zero, except for A20 change call */
1654 static void tcg_handle_interrupt(CPUState *env, int mask)
1656 int old_mask;
1658 old_mask = env->interrupt_request;
1659 env->interrupt_request |= mask;
1662 * If called from iothread context, wake the target cpu in
1663 * case its halted.
1665 if (!qemu_cpu_is_self(env)) {
1666 qemu_cpu_kick(env);
1667 return;
1670 if (use_icount) {
1671 env->icount_decr.u16.high = 0xffff;
1672 if (!can_do_io(env)
1673 && (mask & ~old_mask) != 0) {
1674 cpu_abort(env, "Raised interrupt while not in I/O function");
1676 } else {
1677 cpu_unlink_tb(env);
1681 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1683 #else /* CONFIG_USER_ONLY */
1685 void cpu_interrupt(CPUState *env, int mask)
1687 env->interrupt_request |= mask;
1688 cpu_unlink_tb(env);
1690 #endif /* CONFIG_USER_ONLY */
1692 void cpu_reset_interrupt(CPUState *env, int mask)
1694 env->interrupt_request &= ~mask;
1697 void cpu_exit(CPUState *env)
1699 env->exit_request = 1;
1700 cpu_unlink_tb(env);
1703 const CPULogItem cpu_log_items[] = {
1704 { CPU_LOG_TB_OUT_ASM, "out_asm",
1705 "show generated host assembly code for each compiled TB" },
1706 { CPU_LOG_TB_IN_ASM, "in_asm",
1707 "show target assembly code for each compiled TB" },
1708 { CPU_LOG_TB_OP, "op",
1709 "show micro ops for each compiled TB" },
1710 { CPU_LOG_TB_OP_OPT, "op_opt",
1711 "show micro ops "
1712 #ifdef TARGET_I386
1713 "before eflags optimization and "
1714 #endif
1715 "after liveness analysis" },
1716 { CPU_LOG_INT, "int",
1717 "show interrupts/exceptions in short format" },
1718 { CPU_LOG_EXEC, "exec",
1719 "show trace before each executed TB (lots of logs)" },
1720 { CPU_LOG_TB_CPU, "cpu",
1721 "show CPU state before block translation" },
1722 #ifdef TARGET_I386
1723 { CPU_LOG_PCALL, "pcall",
1724 "show protected mode far calls/returns/exceptions" },
1725 { CPU_LOG_RESET, "cpu_reset",
1726 "show CPU state before CPU resets" },
1727 #endif
1728 #ifdef DEBUG_IOPORT
1729 { CPU_LOG_IOPORT, "ioport",
1730 "show all i/o ports accesses" },
1731 #endif
1732 { 0, NULL, NULL },
1735 static int cmp1(const char *s1, int n, const char *s2)
1737 if (strlen(s2) != n)
1738 return 0;
1739 return memcmp(s1, s2, n) == 0;
1742 /* takes a comma separated list of log masks. Return 0 if error. */
1743 int cpu_str_to_log_mask(const char *str)
1745 const CPULogItem *item;
1746 int mask;
1747 const char *p, *p1;
1749 p = str;
1750 mask = 0;
1751 for(;;) {
1752 p1 = strchr(p, ',');
1753 if (!p1)
1754 p1 = p + strlen(p);
1755 if(cmp1(p,p1-p,"all")) {
1756 for(item = cpu_log_items; item->mask != 0; item++) {
1757 mask |= item->mask;
1759 } else {
1760 for(item = cpu_log_items; item->mask != 0; item++) {
1761 if (cmp1(p, p1 - p, item->name))
1762 goto found;
1764 return 0;
1766 found:
1767 mask |= item->mask;
1768 if (*p1 != ',')
1769 break;
1770 p = p1 + 1;
1772 return mask;
1775 void cpu_abort(CPUState *env, const char *fmt, ...)
1777 va_list ap;
1778 va_list ap2;
1780 va_start(ap, fmt);
1781 va_copy(ap2, ap);
1782 fprintf(stderr, "qemu: fatal: ");
1783 vfprintf(stderr, fmt, ap);
1784 fprintf(stderr, "\n");
1785 #ifdef TARGET_I386
1786 cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
1787 #else
1788 cpu_dump_state(env, stderr, fprintf, 0);
1789 #endif
1790 if (qemu_log_enabled()) {
1791 qemu_log("qemu: fatal: ");
1792 qemu_log_vprintf(fmt, ap2);
1793 qemu_log("\n");
1794 #ifdef TARGET_I386
1795 log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
1796 #else
1797 log_cpu_state(env, 0);
1798 #endif
1799 qemu_log_flush();
1800 qemu_log_close();
1802 va_end(ap2);
1803 va_end(ap);
1804 #if defined(CONFIG_USER_ONLY)
1806 struct sigaction act;
1807 sigfillset(&act.sa_mask);
1808 act.sa_handler = SIG_DFL;
1809 sigaction(SIGABRT, &act, NULL);
1811 #endif
1812 abort();
1815 CPUState *cpu_copy(CPUState *env)
1817 CPUState *new_env = cpu_init(env->cpu_model_str);
1818 CPUState *next_cpu = new_env->next_cpu;
1819 int cpu_index = new_env->cpu_index;
1820 #if defined(TARGET_HAS_ICE)
1821 CPUBreakpoint *bp;
1822 CPUWatchpoint *wp;
1823 #endif
1825 memcpy(new_env, env, sizeof(CPUState));
1827 /* Preserve chaining and index. */
1828 new_env->next_cpu = next_cpu;
1829 new_env->cpu_index = cpu_index;
1831 /* Clone all break/watchpoints.
1832 Note: Once we support ptrace with hw-debug register access, make sure
1833 BP_CPU break/watchpoints are handled correctly on clone. */
1834 QTAILQ_INIT(&env->breakpoints);
1835 QTAILQ_INIT(&env->watchpoints);
1836 #if defined(TARGET_HAS_ICE)
1837 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1838 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1840 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1841 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1842 wp->flags, NULL);
1844 #endif
1846 return new_env;
1849 #if !defined(CONFIG_USER_ONLY)
1851 static inline void tlb_flush_jmp_cache(CPUState *env, target_ulong addr)
1853 unsigned int i;
1855 /* Discard jump cache entries for any tb which might potentially
1856 overlap the flushed page. */
1857 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1858 memset (&env->tb_jmp_cache[i], 0,
1859 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1861 i = tb_jmp_cache_hash_page(addr);
1862 memset (&env->tb_jmp_cache[i], 0,
1863 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1866 static CPUTLBEntry s_cputlb_empty_entry = {
1867 .addr_read = -1,
1868 .addr_write = -1,
1869 .addr_code = -1,
1870 .addend = -1,
1873 /* NOTE: if flush_global is true, also flush global entries (not
1874 implemented yet) */
1875 void tlb_flush(CPUState *env, int flush_global)
1877 int i;
1879 #if defined(DEBUG_TLB)
1880 printf("tlb_flush:\n");
1881 #endif
1882 /* must reset current TB so that interrupts cannot modify the
1883 links while we are modifying them */
1884 env->current_tb = NULL;
1886 for(i = 0; i < CPU_TLB_SIZE; i++) {
1887 int mmu_idx;
1888 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
1889 env->tlb_table[mmu_idx][i] = s_cputlb_empty_entry;
1893 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
1895 env->tlb_flush_addr = -1;
1896 env->tlb_flush_mask = 0;
1897 tlb_flush_count++;
1900 static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
1902 if (addr == (tlb_entry->addr_read &
1903 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1904 addr == (tlb_entry->addr_write &
1905 (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
1906 addr == (tlb_entry->addr_code &
1907 (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
1908 *tlb_entry = s_cputlb_empty_entry;
1912 void tlb_flush_page(CPUState *env, target_ulong addr)
1914 int i;
1915 int mmu_idx;
1917 #if defined(DEBUG_TLB)
1918 printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
1919 #endif
1920 /* Check if we need to flush due to large pages. */
1921 if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
1922 #if defined(DEBUG_TLB)
1923 printf("tlb_flush_page: forced full flush ("
1924 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
1925 env->tlb_flush_addr, env->tlb_flush_mask);
1926 #endif
1927 tlb_flush(env, 1);
1928 return;
1930 /* must reset current TB so that interrupts cannot modify the
1931 links while we are modifying them */
1932 env->current_tb = NULL;
1934 addr &= TARGET_PAGE_MASK;
1935 i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
1936 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
1937 tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
1939 tlb_flush_jmp_cache(env, addr);
1942 /* update the TLBs so that writes to code in the virtual page 'addr'
1943 can be detected */
1944 static void tlb_protect_code(ram_addr_t ram_addr)
1946 cpu_physical_memory_reset_dirty(ram_addr,
1947 ram_addr + TARGET_PAGE_SIZE,
1948 CODE_DIRTY_FLAG);
1951 /* update the TLB so that writes in physical page 'phys_addr' are no longer
1952 tested for self modifying code */
1953 static void tlb_unprotect_code_phys(CPUState *env, ram_addr_t ram_addr,
1954 target_ulong vaddr)
1956 cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
1959 static inline void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry,
1960 unsigned long start, unsigned long length)
1962 unsigned long addr;
1963 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
1964 addr = (tlb_entry->addr_write & TARGET_PAGE_MASK) + tlb_entry->addend;
1965 if ((addr - start) < length) {
1966 tlb_entry->addr_write = (tlb_entry->addr_write & TARGET_PAGE_MASK) | TLB_NOTDIRTY;
1971 /* Note: start and end must be within the same ram block. */
1972 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1973 int dirty_flags)
1975 CPUState *env;
1976 unsigned long length, start1;
1977 int i;
1979 start &= TARGET_PAGE_MASK;
1980 end = TARGET_PAGE_ALIGN(end);
1982 length = end - start;
1983 if (length == 0)
1984 return;
1985 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1987 /* we modify the TLB cache so that the dirty bit will be set again
1988 when accessing the range */
1989 start1 = (unsigned long)qemu_safe_ram_ptr(start);
1990 /* Check that we don't span multiple blocks - this breaks the
1991 address comparisons below. */
1992 if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
1993 != (end - 1) - start) {
1994 abort();
1997 for(env = first_cpu; env != NULL; env = env->next_cpu) {
1998 int mmu_idx;
1999 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2000 for(i = 0; i < CPU_TLB_SIZE; i++)
2001 tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
2002 start1, length);
2007 int cpu_physical_memory_set_dirty_tracking(int enable)
2009 int ret = 0;
2010 in_migration = enable;
2011 if (enable) {
2012 memory_global_dirty_log_start();
2013 } else {
2014 memory_global_dirty_log_stop();
2016 return ret;
2019 int cpu_physical_memory_get_dirty_tracking(void)
2021 return in_migration;
2024 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
2026 ram_addr_t ram_addr;
2027 void *p;
2029 if ((tlb_entry->addr_write & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
2030 p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
2031 + tlb_entry->addend);
2032 ram_addr = qemu_ram_addr_from_host_nofail(p);
2033 if (!cpu_physical_memory_is_dirty(ram_addr)) {
2034 tlb_entry->addr_write |= TLB_NOTDIRTY;
2039 /* update the TLB according to the current state of the dirty bits */
2040 void cpu_tlb_update_dirty(CPUState *env)
2042 int i;
2043 int mmu_idx;
2044 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
2045 for(i = 0; i < CPU_TLB_SIZE; i++)
2046 tlb_update_dirty(&env->tlb_table[mmu_idx][i]);
2050 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
2052 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY))
2053 tlb_entry->addr_write = vaddr;
2056 /* update the TLB corresponding to virtual page vaddr
2057 so that it is no longer dirty */
2058 static inline void tlb_set_dirty(CPUState *env, target_ulong vaddr)
2060 int i;
2061 int mmu_idx;
2063 vaddr &= TARGET_PAGE_MASK;
2064 i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2065 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
2066 tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
2069 /* Our TLB does not support large pages, so remember the area covered by
2070 large pages and trigger a full TLB flush if these are invalidated. */
2071 static void tlb_add_large_page(CPUState *env, target_ulong vaddr,
2072 target_ulong size)
2074 target_ulong mask = ~(size - 1);
2076 if (env->tlb_flush_addr == (target_ulong)-1) {
2077 env->tlb_flush_addr = vaddr & mask;
2078 env->tlb_flush_mask = mask;
2079 return;
2081 /* Extend the existing region to include the new page.
2082 This is a compromise between unnecessary flushes and the cost
2083 of maintaining a full variable size TLB. */
2084 mask &= env->tlb_flush_mask;
2085 while (((env->tlb_flush_addr ^ vaddr) & mask) != 0) {
2086 mask <<= 1;
2088 env->tlb_flush_addr &= mask;
2089 env->tlb_flush_mask = mask;
2092 /* Add a new TLB entry. At most one entry for a given virtual address
2093 is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
2094 supplied size is only used by tlb_flush_page. */
2095 void tlb_set_page(CPUState *env, target_ulong vaddr,
2096 target_phys_addr_t paddr, int prot,
2097 int mmu_idx, target_ulong size)
2099 PhysPageDesc *p;
2100 unsigned long pd;
2101 unsigned int index;
2102 target_ulong address;
2103 target_ulong code_address;
2104 unsigned long addend;
2105 CPUTLBEntry *te;
2106 CPUWatchpoint *wp;
2107 target_phys_addr_t iotlb;
2109 assert(size >= TARGET_PAGE_SIZE);
2110 if (size != TARGET_PAGE_SIZE) {
2111 tlb_add_large_page(env, vaddr, size);
2113 p = phys_page_find(paddr >> TARGET_PAGE_BITS);
2114 if (!p) {
2115 pd = IO_MEM_UNASSIGNED;
2116 } else {
2117 pd = p->phys_offset;
2119 #if defined(DEBUG_TLB)
2120 printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
2121 " prot=%x idx=%d pd=0x%08lx\n",
2122 vaddr, paddr, prot, mmu_idx, pd);
2123 #endif
2125 address = vaddr;
2126 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM && !(pd & IO_MEM_ROMD)) {
2127 /* IO memory case (romd handled later) */
2128 address |= TLB_MMIO;
2130 addend = (unsigned long)qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
2131 if ((pd & ~TARGET_PAGE_MASK) <= IO_MEM_ROM) {
2132 /* Normal RAM. */
2133 iotlb = pd & TARGET_PAGE_MASK;
2134 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM)
2135 iotlb |= IO_MEM_NOTDIRTY;
2136 else
2137 iotlb |= IO_MEM_ROM;
2138 } else {
2139 /* IO handlers are currently passed a physical address.
2140 It would be nice to pass an offset from the base address
2141 of that region. This would avoid having to special case RAM,
2142 and avoid full address decoding in every device.
2143 We can't use the high bits of pd for this because
2144 IO_MEM_ROMD uses these as a ram address. */
2145 iotlb = (pd & ~TARGET_PAGE_MASK);
2146 if (p) {
2147 iotlb += p->region_offset;
2148 } else {
2149 iotlb += paddr;
2153 code_address = address;
2154 /* Make accesses to pages with watchpoints go via the
2155 watchpoint trap routines. */
2156 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2157 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
2158 /* Avoid trapping reads of pages with a write breakpoint. */
2159 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
2160 iotlb = io_mem_watch + paddr;
2161 address |= TLB_MMIO;
2162 break;
2167 index = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
2168 env->iotlb[mmu_idx][index] = iotlb - vaddr;
2169 te = &env->tlb_table[mmu_idx][index];
2170 te->addend = addend - vaddr;
2171 if (prot & PAGE_READ) {
2172 te->addr_read = address;
2173 } else {
2174 te->addr_read = -1;
2177 if (prot & PAGE_EXEC) {
2178 te->addr_code = code_address;
2179 } else {
2180 te->addr_code = -1;
2182 if (prot & PAGE_WRITE) {
2183 if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_ROM ||
2184 (pd & IO_MEM_ROMD)) {
2185 /* Write access calls the I/O callback. */
2186 te->addr_write = address | TLB_MMIO;
2187 } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
2188 !cpu_physical_memory_is_dirty(pd)) {
2189 te->addr_write = address | TLB_NOTDIRTY;
2190 } else {
2191 te->addr_write = address;
2193 } else {
2194 te->addr_write = -1;
2198 #else
2200 void tlb_flush(CPUState *env, int flush_global)
2204 void tlb_flush_page(CPUState *env, target_ulong addr)
2209 * Walks guest process memory "regions" one by one
2210 * and calls callback function 'fn' for each region.
2213 struct walk_memory_regions_data
2215 walk_memory_regions_fn fn;
2216 void *priv;
2217 unsigned long start;
2218 int prot;
2221 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
2222 abi_ulong end, int new_prot)
2224 if (data->start != -1ul) {
2225 int rc = data->fn(data->priv, data->start, end, data->prot);
2226 if (rc != 0) {
2227 return rc;
2231 data->start = (new_prot ? end : -1ul);
2232 data->prot = new_prot;
2234 return 0;
2237 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
2238 abi_ulong base, int level, void **lp)
2240 abi_ulong pa;
2241 int i, rc;
2243 if (*lp == NULL) {
2244 return walk_memory_regions_end(data, base, 0);
2247 if (level == 0) {
2248 PageDesc *pd = *lp;
2249 for (i = 0; i < L2_SIZE; ++i) {
2250 int prot = pd[i].flags;
2252 pa = base | (i << TARGET_PAGE_BITS);
2253 if (prot != data->prot) {
2254 rc = walk_memory_regions_end(data, pa, prot);
2255 if (rc != 0) {
2256 return rc;
2260 } else {
2261 void **pp = *lp;
2262 for (i = 0; i < L2_SIZE; ++i) {
2263 pa = base | ((abi_ulong)i <<
2264 (TARGET_PAGE_BITS + L2_BITS * level));
2265 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2266 if (rc != 0) {
2267 return rc;
2272 return 0;
2275 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2277 struct walk_memory_regions_data data;
2278 unsigned long i;
2280 data.fn = fn;
2281 data.priv = priv;
2282 data.start = -1ul;
2283 data.prot = 0;
2285 for (i = 0; i < V_L1_SIZE; i++) {
2286 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2287 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2288 if (rc != 0) {
2289 return rc;
2293 return walk_memory_regions_end(&data, 0, 0);
2296 static int dump_region(void *priv, abi_ulong start,
2297 abi_ulong end, unsigned long prot)
2299 FILE *f = (FILE *)priv;
2301 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2302 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2303 start, end, end - start,
2304 ((prot & PAGE_READ) ? 'r' : '-'),
2305 ((prot & PAGE_WRITE) ? 'w' : '-'),
2306 ((prot & PAGE_EXEC) ? 'x' : '-'));
2308 return (0);
2311 /* dump memory mappings */
2312 void page_dump(FILE *f)
2314 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2315 "start", "end", "size", "prot");
2316 walk_memory_regions(f, dump_region);
2319 int page_get_flags(target_ulong address)
2321 PageDesc *p;
2323 p = page_find(address >> TARGET_PAGE_BITS);
2324 if (!p)
2325 return 0;
2326 return p->flags;
2329 /* Modify the flags of a page and invalidate the code if necessary.
2330 The flag PAGE_WRITE_ORG is positioned automatically depending
2331 on PAGE_WRITE. The mmap_lock should already be held. */
2332 void page_set_flags(target_ulong start, target_ulong end, int flags)
2334 target_ulong addr, len;
2336 /* This function should never be called with addresses outside the
2337 guest address space. If this assert fires, it probably indicates
2338 a missing call to h2g_valid. */
2339 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2340 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2341 #endif
2342 assert(start < end);
2344 start = start & TARGET_PAGE_MASK;
2345 end = TARGET_PAGE_ALIGN(end);
2347 if (flags & PAGE_WRITE) {
2348 flags |= PAGE_WRITE_ORG;
2351 for (addr = start, len = end - start;
2352 len != 0;
2353 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2354 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2356 /* If the write protection bit is set, then we invalidate
2357 the code inside. */
2358 if (!(p->flags & PAGE_WRITE) &&
2359 (flags & PAGE_WRITE) &&
2360 p->first_tb) {
2361 tb_invalidate_phys_page(addr, 0, NULL);
2363 p->flags = flags;
2367 int page_check_range(target_ulong start, target_ulong len, int flags)
2369 PageDesc *p;
2370 target_ulong end;
2371 target_ulong addr;
2373 /* This function should never be called with addresses outside the
2374 guest address space. If this assert fires, it probably indicates
2375 a missing call to h2g_valid. */
2376 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2377 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2378 #endif
2380 if (len == 0) {
2381 return 0;
2383 if (start + len - 1 < start) {
2384 /* We've wrapped around. */
2385 return -1;
2388 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2389 start = start & TARGET_PAGE_MASK;
2391 for (addr = start, len = end - start;
2392 len != 0;
2393 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2394 p = page_find(addr >> TARGET_PAGE_BITS);
2395 if( !p )
2396 return -1;
2397 if( !(p->flags & PAGE_VALID) )
2398 return -1;
2400 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2401 return -1;
2402 if (flags & PAGE_WRITE) {
2403 if (!(p->flags & PAGE_WRITE_ORG))
2404 return -1;
2405 /* unprotect the page if it was put read-only because it
2406 contains translated code */
2407 if (!(p->flags & PAGE_WRITE)) {
2408 if (!page_unprotect(addr, 0, NULL))
2409 return -1;
2411 return 0;
2414 return 0;
2417 /* called from signal handler: invalidate the code and unprotect the
2418 page. Return TRUE if the fault was successfully handled. */
2419 int page_unprotect(target_ulong address, unsigned long pc, void *puc)
2421 unsigned int prot;
2422 PageDesc *p;
2423 target_ulong host_start, host_end, addr;
2425 /* Technically this isn't safe inside a signal handler. However we
2426 know this only ever happens in a synchronous SEGV handler, so in
2427 practice it seems to be ok. */
2428 mmap_lock();
2430 p = page_find(address >> TARGET_PAGE_BITS);
2431 if (!p) {
2432 mmap_unlock();
2433 return 0;
2436 /* if the page was really writable, then we change its
2437 protection back to writable */
2438 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2439 host_start = address & qemu_host_page_mask;
2440 host_end = host_start + qemu_host_page_size;
2442 prot = 0;
2443 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2444 p = page_find(addr >> TARGET_PAGE_BITS);
2445 p->flags |= PAGE_WRITE;
2446 prot |= p->flags;
2448 /* and since the content will be modified, we must invalidate
2449 the corresponding translated code. */
2450 tb_invalidate_phys_page(addr, pc, puc);
2451 #ifdef DEBUG_TB_CHECK
2452 tb_invalidate_check(addr);
2453 #endif
2455 mprotect((void *)g2h(host_start), qemu_host_page_size,
2456 prot & PAGE_BITS);
2458 mmap_unlock();
2459 return 1;
2461 mmap_unlock();
2462 return 0;
2465 static inline void tlb_set_dirty(CPUState *env,
2466 unsigned long addr, target_ulong vaddr)
2469 #endif /* defined(CONFIG_USER_ONLY) */
2471 #if !defined(CONFIG_USER_ONLY)
2473 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2474 typedef struct subpage_t {
2475 target_phys_addr_t base;
2476 ram_addr_t sub_io_index[TARGET_PAGE_SIZE];
2477 ram_addr_t region_offset[TARGET_PAGE_SIZE];
2478 } subpage_t;
2480 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2481 ram_addr_t memory, ram_addr_t region_offset);
2482 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
2483 ram_addr_t orig_memory,
2484 ram_addr_t region_offset);
2485 #define CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2, \
2486 need_subpage) \
2487 do { \
2488 if (addr > start_addr) \
2489 start_addr2 = 0; \
2490 else { \
2491 start_addr2 = start_addr & ~TARGET_PAGE_MASK; \
2492 if (start_addr2 > 0) \
2493 need_subpage = 1; \
2496 if ((start_addr + orig_size) - addr >= TARGET_PAGE_SIZE) \
2497 end_addr2 = TARGET_PAGE_SIZE - 1; \
2498 else { \
2499 end_addr2 = (start_addr + orig_size - 1) & ~TARGET_PAGE_MASK; \
2500 if (end_addr2 < TARGET_PAGE_SIZE - 1) \
2501 need_subpage = 1; \
2503 } while (0)
2505 /* register physical memory.
2506 For RAM, 'size' must be a multiple of the target page size.
2507 If (phys_offset & ~TARGET_PAGE_MASK) != 0, then it is an
2508 io memory page. The address used when calling the IO function is
2509 the offset from the start of the region, plus region_offset. Both
2510 start_addr and region_offset are rounded down to a page boundary
2511 before calculating this offset. This should not be a problem unless
2512 the low bits of start_addr and region_offset differ. */
2513 void cpu_register_physical_memory_log(target_phys_addr_t start_addr,
2514 ram_addr_t size,
2515 ram_addr_t phys_offset,
2516 ram_addr_t region_offset,
2517 bool log_dirty)
2519 target_phys_addr_t addr, end_addr;
2520 PhysPageDesc *p;
2521 CPUState *env;
2522 ram_addr_t orig_size = size;
2523 subpage_t *subpage;
2525 assert(size);
2527 if (phys_offset == IO_MEM_UNASSIGNED) {
2528 region_offset = start_addr;
2530 region_offset &= TARGET_PAGE_MASK;
2531 size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
2532 end_addr = start_addr + (target_phys_addr_t)size;
2534 addr = start_addr;
2535 do {
2536 p = phys_page_find(addr >> TARGET_PAGE_BITS);
2537 if (p && p->phys_offset != IO_MEM_UNASSIGNED) {
2538 ram_addr_t orig_memory = p->phys_offset;
2539 target_phys_addr_t start_addr2, end_addr2;
2540 int need_subpage = 0;
2542 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr, end_addr2,
2543 need_subpage);
2544 if (need_subpage) {
2545 if (!(orig_memory & IO_MEM_SUBPAGE)) {
2546 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2547 &p->phys_offset, orig_memory,
2548 p->region_offset);
2549 } else {
2550 subpage = io_mem_opaque[(orig_memory & ~TARGET_PAGE_MASK)
2551 >> IO_MEM_SHIFT];
2553 subpage_register(subpage, start_addr2, end_addr2, phys_offset,
2554 region_offset);
2555 p->region_offset = 0;
2556 } else {
2557 p->phys_offset = phys_offset;
2558 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2559 (phys_offset & IO_MEM_ROMD))
2560 phys_offset += TARGET_PAGE_SIZE;
2562 } else {
2563 p = phys_page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2564 p->phys_offset = phys_offset;
2565 p->region_offset = region_offset;
2566 if ((phys_offset & ~TARGET_PAGE_MASK) <= IO_MEM_ROM ||
2567 (phys_offset & IO_MEM_ROMD)) {
2568 phys_offset += TARGET_PAGE_SIZE;
2569 } else {
2570 target_phys_addr_t start_addr2, end_addr2;
2571 int need_subpage = 0;
2573 CHECK_SUBPAGE(addr, start_addr, start_addr2, end_addr,
2574 end_addr2, need_subpage);
2576 if (need_subpage) {
2577 subpage = subpage_init((addr & TARGET_PAGE_MASK),
2578 &p->phys_offset, IO_MEM_UNASSIGNED,
2579 addr & TARGET_PAGE_MASK);
2580 subpage_register(subpage, start_addr2, end_addr2,
2581 phys_offset, region_offset);
2582 p->region_offset = 0;
2586 region_offset += TARGET_PAGE_SIZE;
2587 addr += TARGET_PAGE_SIZE;
2588 } while (addr != end_addr);
2590 /* since each CPU stores ram addresses in its TLB cache, we must
2591 reset the modified entries */
2592 /* XXX: slow ! */
2593 for(env = first_cpu; env != NULL; env = env->next_cpu) {
2594 tlb_flush(env, 1);
2598 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2600 if (kvm_enabled())
2601 kvm_coalesce_mmio_region(addr, size);
2604 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
2606 if (kvm_enabled())
2607 kvm_uncoalesce_mmio_region(addr, size);
2610 void qemu_flush_coalesced_mmio_buffer(void)
2612 if (kvm_enabled())
2613 kvm_flush_coalesced_mmio_buffer();
2616 #if defined(__linux__) && !defined(TARGET_S390X)
2618 #include <sys/vfs.h>
2620 #define HUGETLBFS_MAGIC 0x958458f6
2622 static long gethugepagesize(const char *path)
2624 struct statfs fs;
2625 int ret;
2627 do {
2628 ret = statfs(path, &fs);
2629 } while (ret != 0 && errno == EINTR);
2631 if (ret != 0) {
2632 perror(path);
2633 return 0;
2636 if (fs.f_type != HUGETLBFS_MAGIC)
2637 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2639 return fs.f_bsize;
2642 static void *file_ram_alloc(RAMBlock *block,
2643 ram_addr_t memory,
2644 const char *path)
2646 char *filename;
2647 void *area;
2648 int fd;
2649 #ifdef MAP_POPULATE
2650 int flags;
2651 #endif
2652 unsigned long hpagesize;
2654 hpagesize = gethugepagesize(path);
2655 if (!hpagesize) {
2656 return NULL;
2659 if (memory < hpagesize) {
2660 return NULL;
2663 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2664 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2665 return NULL;
2668 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2669 return NULL;
2672 fd = mkstemp(filename);
2673 if (fd < 0) {
2674 perror("unable to create backing store for hugepages");
2675 free(filename);
2676 return NULL;
2678 unlink(filename);
2679 free(filename);
2681 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2684 * ftruncate is not supported by hugetlbfs in older
2685 * hosts, so don't bother bailing out on errors.
2686 * If anything goes wrong with it under other filesystems,
2687 * mmap will fail.
2689 if (ftruncate(fd, memory))
2690 perror("ftruncate");
2692 #ifdef MAP_POPULATE
2693 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2694 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2695 * to sidestep this quirk.
2697 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2698 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2699 #else
2700 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2701 #endif
2702 if (area == MAP_FAILED) {
2703 perror("file_ram_alloc: can't mmap RAM pages");
2704 close(fd);
2705 return (NULL);
2707 block->fd = fd;
2708 return area;
2710 #endif
2712 static ram_addr_t find_ram_offset(ram_addr_t size)
2714 RAMBlock *block, *next_block;
2715 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2717 if (QLIST_EMPTY(&ram_list.blocks))
2718 return 0;
2720 QLIST_FOREACH(block, &ram_list.blocks, next) {
2721 ram_addr_t end, next = RAM_ADDR_MAX;
2723 end = block->offset + block->length;
2725 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2726 if (next_block->offset >= end) {
2727 next = MIN(next, next_block->offset);
2730 if (next - end >= size && next - end < mingap) {
2731 offset = end;
2732 mingap = next - end;
2736 if (offset == RAM_ADDR_MAX) {
2737 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2738 (uint64_t)size);
2739 abort();
2742 return offset;
2745 static ram_addr_t last_ram_offset(void)
2747 RAMBlock *block;
2748 ram_addr_t last = 0;
2750 QLIST_FOREACH(block, &ram_list.blocks, next)
2751 last = MAX(last, block->offset + block->length);
2753 return last;
2756 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2758 RAMBlock *new_block, *block;
2760 new_block = NULL;
2761 QLIST_FOREACH(block, &ram_list.blocks, next) {
2762 if (block->offset == addr) {
2763 new_block = block;
2764 break;
2767 assert(new_block);
2768 assert(!new_block->idstr[0]);
2770 if (dev && dev->parent_bus && dev->parent_bus->info->get_dev_path) {
2771 char *id = dev->parent_bus->info->get_dev_path(dev);
2772 if (id) {
2773 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2774 g_free(id);
2777 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2779 QLIST_FOREACH(block, &ram_list.blocks, next) {
2780 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2781 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2782 new_block->idstr);
2783 abort();
2788 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2789 MemoryRegion *mr)
2791 RAMBlock *new_block;
2793 size = TARGET_PAGE_ALIGN(size);
2794 new_block = g_malloc0(sizeof(*new_block));
2796 new_block->mr = mr;
2797 new_block->offset = find_ram_offset(size);
2798 if (host) {
2799 new_block->host = host;
2800 new_block->flags |= RAM_PREALLOC_MASK;
2801 } else {
2802 if (mem_path) {
2803 #if defined (__linux__) && !defined(TARGET_S390X)
2804 new_block->host = file_ram_alloc(new_block, size, mem_path);
2805 if (!new_block->host) {
2806 new_block->host = qemu_vmalloc(size);
2807 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2809 #else
2810 fprintf(stderr, "-mem-path option unsupported\n");
2811 exit(1);
2812 #endif
2813 } else {
2814 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2815 /* S390 KVM requires the topmost vma of the RAM to be smaller than
2816 an system defined value, which is at least 256GB. Larger systems
2817 have larger values. We put the guest between the end of data
2818 segment (system break) and this value. We use 32GB as a base to
2819 have enough room for the system break to grow. */
2820 new_block->host = mmap((void*)0x800000000, size,
2821 PROT_EXEC|PROT_READ|PROT_WRITE,
2822 MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
2823 if (new_block->host == MAP_FAILED) {
2824 fprintf(stderr, "Allocating RAM failed\n");
2825 abort();
2827 #else
2828 if (xen_enabled()) {
2829 xen_ram_alloc(new_block->offset, size, mr);
2830 } else {
2831 new_block->host = qemu_vmalloc(size);
2833 #endif
2834 qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
2837 new_block->length = size;
2839 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2841 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2842 last_ram_offset() >> TARGET_PAGE_BITS);
2843 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2844 0xff, size >> TARGET_PAGE_BITS);
2846 if (kvm_enabled())
2847 kvm_setup_guest_memory(new_block->host, size);
2849 return new_block->offset;
2852 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2854 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2857 void qemu_ram_free_from_ptr(ram_addr_t addr)
2859 RAMBlock *block;
2861 QLIST_FOREACH(block, &ram_list.blocks, next) {
2862 if (addr == block->offset) {
2863 QLIST_REMOVE(block, next);
2864 g_free(block);
2865 return;
2870 void qemu_ram_free(ram_addr_t addr)
2872 RAMBlock *block;
2874 QLIST_FOREACH(block, &ram_list.blocks, next) {
2875 if (addr == block->offset) {
2876 QLIST_REMOVE(block, next);
2877 if (block->flags & RAM_PREALLOC_MASK) {
2879 } else if (mem_path) {
2880 #if defined (__linux__) && !defined(TARGET_S390X)
2881 if (block->fd) {
2882 munmap(block->host, block->length);
2883 close(block->fd);
2884 } else {
2885 qemu_vfree(block->host);
2887 #else
2888 abort();
2889 #endif
2890 } else {
2891 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2892 munmap(block->host, block->length);
2893 #else
2894 if (xen_enabled()) {
2895 xen_invalidate_map_cache_entry(block->host);
2896 } else {
2897 qemu_vfree(block->host);
2899 #endif
2901 g_free(block);
2902 return;
2908 #ifndef _WIN32
2909 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2911 RAMBlock *block;
2912 ram_addr_t offset;
2913 int flags;
2914 void *area, *vaddr;
2916 QLIST_FOREACH(block, &ram_list.blocks, next) {
2917 offset = addr - block->offset;
2918 if (offset < block->length) {
2919 vaddr = block->host + offset;
2920 if (block->flags & RAM_PREALLOC_MASK) {
2922 } else {
2923 flags = MAP_FIXED;
2924 munmap(vaddr, length);
2925 if (mem_path) {
2926 #if defined(__linux__) && !defined(TARGET_S390X)
2927 if (block->fd) {
2928 #ifdef MAP_POPULATE
2929 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2930 MAP_PRIVATE;
2931 #else
2932 flags |= MAP_PRIVATE;
2933 #endif
2934 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2935 flags, block->fd, offset);
2936 } else {
2937 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2938 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2939 flags, -1, 0);
2941 #else
2942 abort();
2943 #endif
2944 } else {
2945 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2946 flags |= MAP_SHARED | MAP_ANONYMOUS;
2947 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2948 flags, -1, 0);
2949 #else
2950 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2951 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2952 flags, -1, 0);
2953 #endif
2955 if (area != vaddr) {
2956 fprintf(stderr, "Could not remap addr: "
2957 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2958 length, addr);
2959 exit(1);
2961 qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
2963 return;
2967 #endif /* !_WIN32 */
2969 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2970 With the exception of the softmmu code in this file, this should
2971 only be used for local memory (e.g. video ram) that the device owns,
2972 and knows it isn't going to access beyond the end of the block.
2974 It should not be used for general purpose DMA.
2975 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2977 void *qemu_get_ram_ptr(ram_addr_t addr)
2979 RAMBlock *block;
2981 QLIST_FOREACH(block, &ram_list.blocks, next) {
2982 if (addr - block->offset < block->length) {
2983 /* Move this entry to to start of the list. */
2984 if (block != QLIST_FIRST(&ram_list.blocks)) {
2985 QLIST_REMOVE(block, next);
2986 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2988 if (xen_enabled()) {
2989 /* We need to check if the requested address is in the RAM
2990 * because we don't want to map the entire memory in QEMU.
2991 * In that case just map until the end of the page.
2993 if (block->offset == 0) {
2994 return xen_map_cache(addr, 0, 0);
2995 } else if (block->host == NULL) {
2996 block->host =
2997 xen_map_cache(block->offset, block->length, 1);
3000 return block->host + (addr - block->offset);
3004 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3005 abort();
3007 return NULL;
3010 /* Return a host pointer to ram allocated with qemu_ram_alloc.
3011 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
3013 void *qemu_safe_ram_ptr(ram_addr_t addr)
3015 RAMBlock *block;
3017 QLIST_FOREACH(block, &ram_list.blocks, next) {
3018 if (addr - block->offset < block->length) {
3019 if (xen_enabled()) {
3020 /* We need to check if the requested address is in the RAM
3021 * because we don't want to map the entire memory in QEMU.
3022 * In that case just map until the end of the page.
3024 if (block->offset == 0) {
3025 return xen_map_cache(addr, 0, 0);
3026 } else if (block->host == NULL) {
3027 block->host =
3028 xen_map_cache(block->offset, block->length, 1);
3031 return block->host + (addr - block->offset);
3035 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3036 abort();
3038 return NULL;
3041 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
3042 * but takes a size argument */
3043 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
3045 if (*size == 0) {
3046 return NULL;
3048 if (xen_enabled()) {
3049 return xen_map_cache(addr, *size, 1);
3050 } else {
3051 RAMBlock *block;
3053 QLIST_FOREACH(block, &ram_list.blocks, next) {
3054 if (addr - block->offset < block->length) {
3055 if (addr - block->offset + *size > block->length)
3056 *size = block->length - addr + block->offset;
3057 return block->host + (addr - block->offset);
3061 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
3062 abort();
3066 void qemu_put_ram_ptr(void *addr)
3068 trace_qemu_put_ram_ptr(addr);
3071 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
3073 RAMBlock *block;
3074 uint8_t *host = ptr;
3076 if (xen_enabled()) {
3077 *ram_addr = xen_ram_addr_from_mapcache(ptr);
3078 return 0;
3081 QLIST_FOREACH(block, &ram_list.blocks, next) {
3082 /* This case append when the block is not mapped. */
3083 if (block->host == NULL) {
3084 continue;
3086 if (host - block->host < block->length) {
3087 *ram_addr = block->offset + (host - block->host);
3088 return 0;
3092 return -1;
3095 /* Some of the softmmu routines need to translate from a host pointer
3096 (typically a TLB entry) back to a ram offset. */
3097 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
3099 ram_addr_t ram_addr;
3101 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
3102 fprintf(stderr, "Bad ram pointer %p\n", ptr);
3103 abort();
3105 return ram_addr;
3108 static uint32_t unassigned_mem_readb(void *opaque, target_phys_addr_t addr)
3110 #ifdef DEBUG_UNASSIGNED
3111 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3112 #endif
3113 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3114 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 1);
3115 #endif
3116 return 0;
3119 static uint32_t unassigned_mem_readw(void *opaque, target_phys_addr_t addr)
3121 #ifdef DEBUG_UNASSIGNED
3122 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3123 #endif
3124 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3125 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 2);
3126 #endif
3127 return 0;
3130 static uint32_t unassigned_mem_readl(void *opaque, target_phys_addr_t addr)
3132 #ifdef DEBUG_UNASSIGNED
3133 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
3134 #endif
3135 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3136 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, 4);
3137 #endif
3138 return 0;
3141 static void unassigned_mem_writeb(void *opaque, target_phys_addr_t addr, uint32_t val)
3143 #ifdef DEBUG_UNASSIGNED
3144 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3145 #endif
3146 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3147 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 1);
3148 #endif
3151 static void unassigned_mem_writew(void *opaque, target_phys_addr_t addr, uint32_t val)
3153 #ifdef DEBUG_UNASSIGNED
3154 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3155 #endif
3156 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3157 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 2);
3158 #endif
3161 static void unassigned_mem_writel(void *opaque, target_phys_addr_t addr, uint32_t val)
3163 #ifdef DEBUG_UNASSIGNED
3164 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%x\n", addr, val);
3165 #endif
3166 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
3167 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, 4);
3168 #endif
3171 static CPUReadMemoryFunc * const unassigned_mem_read[3] = {
3172 unassigned_mem_readb,
3173 unassigned_mem_readw,
3174 unassigned_mem_readl,
3177 static CPUWriteMemoryFunc * const unassigned_mem_write[3] = {
3178 unassigned_mem_writeb,
3179 unassigned_mem_writew,
3180 unassigned_mem_writel,
3183 static void notdirty_mem_writeb(void *opaque, target_phys_addr_t ram_addr,
3184 uint32_t val)
3186 int dirty_flags;
3187 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3188 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3189 #if !defined(CONFIG_USER_ONLY)
3190 tb_invalidate_phys_page_fast(ram_addr, 1);
3191 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3192 #endif
3194 stb_p(qemu_get_ram_ptr(ram_addr), val);
3195 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3196 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3197 /* we remove the notdirty callback only if the code has been
3198 flushed */
3199 if (dirty_flags == 0xff)
3200 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3203 static void notdirty_mem_writew(void *opaque, target_phys_addr_t ram_addr,
3204 uint32_t val)
3206 int dirty_flags;
3207 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3208 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3209 #if !defined(CONFIG_USER_ONLY)
3210 tb_invalidate_phys_page_fast(ram_addr, 2);
3211 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3212 #endif
3214 stw_p(qemu_get_ram_ptr(ram_addr), val);
3215 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3216 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3217 /* we remove the notdirty callback only if the code has been
3218 flushed */
3219 if (dirty_flags == 0xff)
3220 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3223 static void notdirty_mem_writel(void *opaque, target_phys_addr_t ram_addr,
3224 uint32_t val)
3226 int dirty_flags;
3227 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3228 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
3229 #if !defined(CONFIG_USER_ONLY)
3230 tb_invalidate_phys_page_fast(ram_addr, 4);
3231 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
3232 #endif
3234 stl_p(qemu_get_ram_ptr(ram_addr), val);
3235 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
3236 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
3237 /* we remove the notdirty callback only if the code has been
3238 flushed */
3239 if (dirty_flags == 0xff)
3240 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
3243 static CPUReadMemoryFunc * const error_mem_read[3] = {
3244 NULL, /* never used */
3245 NULL, /* never used */
3246 NULL, /* never used */
3249 static CPUWriteMemoryFunc * const notdirty_mem_write[3] = {
3250 notdirty_mem_writeb,
3251 notdirty_mem_writew,
3252 notdirty_mem_writel,
3255 /* Generate a debug exception if a watchpoint has been hit. */
3256 static void check_watchpoint(int offset, int len_mask, int flags)
3258 CPUState *env = cpu_single_env;
3259 target_ulong pc, cs_base;
3260 TranslationBlock *tb;
3261 target_ulong vaddr;
3262 CPUWatchpoint *wp;
3263 int cpu_flags;
3265 if (env->watchpoint_hit) {
3266 /* We re-entered the check after replacing the TB. Now raise
3267 * the debug interrupt so that is will trigger after the
3268 * current instruction. */
3269 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3270 return;
3272 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3273 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3274 if ((vaddr == (wp->vaddr & len_mask) ||
3275 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3276 wp->flags |= BP_WATCHPOINT_HIT;
3277 if (!env->watchpoint_hit) {
3278 env->watchpoint_hit = wp;
3279 tb = tb_find_pc(env->mem_io_pc);
3280 if (!tb) {
3281 cpu_abort(env, "check_watchpoint: could not find TB for "
3282 "pc=%p", (void *)env->mem_io_pc);
3284 cpu_restore_state(tb, env, env->mem_io_pc);
3285 tb_phys_invalidate(tb, -1);
3286 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3287 env->exception_index = EXCP_DEBUG;
3288 } else {
3289 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3290 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3292 cpu_resume_from_signal(env, NULL);
3294 } else {
3295 wp->flags &= ~BP_WATCHPOINT_HIT;
3300 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3301 so these check for a hit then pass through to the normal out-of-line
3302 phys routines. */
3303 static uint32_t watch_mem_readb(void *opaque, target_phys_addr_t addr)
3305 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_READ);
3306 return ldub_phys(addr);
3309 static uint32_t watch_mem_readw(void *opaque, target_phys_addr_t addr)
3311 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_READ);
3312 return lduw_phys(addr);
3315 static uint32_t watch_mem_readl(void *opaque, target_phys_addr_t addr)
3317 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_READ);
3318 return ldl_phys(addr);
3321 static void watch_mem_writeb(void *opaque, target_phys_addr_t addr,
3322 uint32_t val)
3324 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x0, BP_MEM_WRITE);
3325 stb_phys(addr, val);
3328 static void watch_mem_writew(void *opaque, target_phys_addr_t addr,
3329 uint32_t val)
3331 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x1, BP_MEM_WRITE);
3332 stw_phys(addr, val);
3335 static void watch_mem_writel(void *opaque, target_phys_addr_t addr,
3336 uint32_t val)
3338 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~0x3, BP_MEM_WRITE);
3339 stl_phys(addr, val);
3342 static CPUReadMemoryFunc * const watch_mem_read[3] = {
3343 watch_mem_readb,
3344 watch_mem_readw,
3345 watch_mem_readl,
3348 static CPUWriteMemoryFunc * const watch_mem_write[3] = {
3349 watch_mem_writeb,
3350 watch_mem_writew,
3351 watch_mem_writel,
3354 static inline uint32_t subpage_readlen (subpage_t *mmio,
3355 target_phys_addr_t addr,
3356 unsigned int len)
3358 unsigned int idx = SUBPAGE_IDX(addr);
3359 #if defined(DEBUG_SUBPAGE)
3360 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3361 mmio, len, addr, idx);
3362 #endif
3364 addr += mmio->region_offset[idx];
3365 idx = mmio->sub_io_index[idx];
3366 return io_mem_read[idx][len](io_mem_opaque[idx], addr);
3369 static inline void subpage_writelen (subpage_t *mmio, target_phys_addr_t addr,
3370 uint32_t value, unsigned int len)
3372 unsigned int idx = SUBPAGE_IDX(addr);
3373 #if defined(DEBUG_SUBPAGE)
3374 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d value %08x\n",
3375 __func__, mmio, len, addr, idx, value);
3376 #endif
3378 addr += mmio->region_offset[idx];
3379 idx = mmio->sub_io_index[idx];
3380 io_mem_write[idx][len](io_mem_opaque[idx], addr, value);
3383 static uint32_t subpage_readb (void *opaque, target_phys_addr_t addr)
3385 return subpage_readlen(opaque, addr, 0);
3388 static void subpage_writeb (void *opaque, target_phys_addr_t addr,
3389 uint32_t value)
3391 subpage_writelen(opaque, addr, value, 0);
3394 static uint32_t subpage_readw (void *opaque, target_phys_addr_t addr)
3396 return subpage_readlen(opaque, addr, 1);
3399 static void subpage_writew (void *opaque, target_phys_addr_t addr,
3400 uint32_t value)
3402 subpage_writelen(opaque, addr, value, 1);
3405 static uint32_t subpage_readl (void *opaque, target_phys_addr_t addr)
3407 return subpage_readlen(opaque, addr, 2);
3410 static void subpage_writel (void *opaque, target_phys_addr_t addr,
3411 uint32_t value)
3413 subpage_writelen(opaque, addr, value, 2);
3416 static CPUReadMemoryFunc * const subpage_read[] = {
3417 &subpage_readb,
3418 &subpage_readw,
3419 &subpage_readl,
3422 static CPUWriteMemoryFunc * const subpage_write[] = {
3423 &subpage_writeb,
3424 &subpage_writew,
3425 &subpage_writel,
3428 static uint32_t subpage_ram_readb(void *opaque, target_phys_addr_t addr)
3430 ram_addr_t raddr = addr;
3431 void *ptr = qemu_get_ram_ptr(raddr);
3432 return ldub_p(ptr);
3435 static void subpage_ram_writeb(void *opaque, target_phys_addr_t addr,
3436 uint32_t value)
3438 ram_addr_t raddr = addr;
3439 void *ptr = qemu_get_ram_ptr(raddr);
3440 stb_p(ptr, value);
3443 static uint32_t subpage_ram_readw(void *opaque, target_phys_addr_t addr)
3445 ram_addr_t raddr = addr;
3446 void *ptr = qemu_get_ram_ptr(raddr);
3447 return lduw_p(ptr);
3450 static void subpage_ram_writew(void *opaque, target_phys_addr_t addr,
3451 uint32_t value)
3453 ram_addr_t raddr = addr;
3454 void *ptr = qemu_get_ram_ptr(raddr);
3455 stw_p(ptr, value);
3458 static uint32_t subpage_ram_readl(void *opaque, target_phys_addr_t addr)
3460 ram_addr_t raddr = addr;
3461 void *ptr = qemu_get_ram_ptr(raddr);
3462 return ldl_p(ptr);
3465 static void subpage_ram_writel(void *opaque, target_phys_addr_t addr,
3466 uint32_t value)
3468 ram_addr_t raddr = addr;
3469 void *ptr = qemu_get_ram_ptr(raddr);
3470 stl_p(ptr, value);
3473 static CPUReadMemoryFunc * const subpage_ram_read[] = {
3474 &subpage_ram_readb,
3475 &subpage_ram_readw,
3476 &subpage_ram_readl,
3479 static CPUWriteMemoryFunc * const subpage_ram_write[] = {
3480 &subpage_ram_writeb,
3481 &subpage_ram_writew,
3482 &subpage_ram_writel,
3485 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3486 ram_addr_t memory, ram_addr_t region_offset)
3488 int idx, eidx;
3490 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3491 return -1;
3492 idx = SUBPAGE_IDX(start);
3493 eidx = SUBPAGE_IDX(end);
3494 #if defined(DEBUG_SUBPAGE)
3495 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3496 mmio, start, end, idx, eidx, memory);
3497 #endif
3498 if ((memory & ~TARGET_PAGE_MASK) == IO_MEM_RAM) {
3499 memory = IO_MEM_SUBPAGE_RAM;
3501 memory = (memory >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3502 for (; idx <= eidx; idx++) {
3503 mmio->sub_io_index[idx] = memory;
3504 mmio->region_offset[idx] = region_offset;
3507 return 0;
3510 static subpage_t *subpage_init (target_phys_addr_t base, ram_addr_t *phys,
3511 ram_addr_t orig_memory,
3512 ram_addr_t region_offset)
3514 subpage_t *mmio;
3515 int subpage_memory;
3517 mmio = g_malloc0(sizeof(subpage_t));
3519 mmio->base = base;
3520 subpage_memory = cpu_register_io_memory(subpage_read, subpage_write, mmio,
3521 DEVICE_NATIVE_ENDIAN);
3522 #if defined(DEBUG_SUBPAGE)
3523 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3524 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3525 #endif
3526 *phys = subpage_memory | IO_MEM_SUBPAGE;
3527 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, orig_memory, region_offset);
3529 return mmio;
3532 static int get_free_io_mem_idx(void)
3534 int i;
3536 for (i = 0; i<IO_MEM_NB_ENTRIES; i++)
3537 if (!io_mem_used[i]) {
3538 io_mem_used[i] = 1;
3539 return i;
3541 fprintf(stderr, "RAN out out io_mem_idx, max %d !\n", IO_MEM_NB_ENTRIES);
3542 return -1;
3546 * Usually, devices operate in little endian mode. There are devices out
3547 * there that operate in big endian too. Each device gets byte swapped
3548 * mmio if plugged onto a CPU that does the other endianness.
3550 * CPU Device swap?
3552 * little little no
3553 * little big yes
3554 * big little yes
3555 * big big no
3558 typedef struct SwapEndianContainer {
3559 CPUReadMemoryFunc *read[3];
3560 CPUWriteMemoryFunc *write[3];
3561 void *opaque;
3562 } SwapEndianContainer;
3564 static uint32_t swapendian_mem_readb (void *opaque, target_phys_addr_t addr)
3566 uint32_t val;
3567 SwapEndianContainer *c = opaque;
3568 val = c->read[0](c->opaque, addr);
3569 return val;
3572 static uint32_t swapendian_mem_readw(void *opaque, target_phys_addr_t addr)
3574 uint32_t val;
3575 SwapEndianContainer *c = opaque;
3576 val = bswap16(c->read[1](c->opaque, addr));
3577 return val;
3580 static uint32_t swapendian_mem_readl(void *opaque, target_phys_addr_t addr)
3582 uint32_t val;
3583 SwapEndianContainer *c = opaque;
3584 val = bswap32(c->read[2](c->opaque, addr));
3585 return val;
3588 static CPUReadMemoryFunc * const swapendian_readfn[3]={
3589 swapendian_mem_readb,
3590 swapendian_mem_readw,
3591 swapendian_mem_readl
3594 static void swapendian_mem_writeb(void *opaque, target_phys_addr_t addr,
3595 uint32_t val)
3597 SwapEndianContainer *c = opaque;
3598 c->write[0](c->opaque, addr, val);
3601 static void swapendian_mem_writew(void *opaque, target_phys_addr_t addr,
3602 uint32_t val)
3604 SwapEndianContainer *c = opaque;
3605 c->write[1](c->opaque, addr, bswap16(val));
3608 static void swapendian_mem_writel(void *opaque, target_phys_addr_t addr,
3609 uint32_t val)
3611 SwapEndianContainer *c = opaque;
3612 c->write[2](c->opaque, addr, bswap32(val));
3615 static CPUWriteMemoryFunc * const swapendian_writefn[3]={
3616 swapendian_mem_writeb,
3617 swapendian_mem_writew,
3618 swapendian_mem_writel
3621 static void swapendian_init(int io_index)
3623 SwapEndianContainer *c = g_malloc(sizeof(SwapEndianContainer));
3624 int i;
3626 /* Swap mmio for big endian targets */
3627 c->opaque = io_mem_opaque[io_index];
3628 for (i = 0; i < 3; i++) {
3629 c->read[i] = io_mem_read[io_index][i];
3630 c->write[i] = io_mem_write[io_index][i];
3632 io_mem_read[io_index][i] = swapendian_readfn[i];
3633 io_mem_write[io_index][i] = swapendian_writefn[i];
3635 io_mem_opaque[io_index] = c;
3638 static void swapendian_del(int io_index)
3640 if (io_mem_read[io_index][0] == swapendian_readfn[0]) {
3641 g_free(io_mem_opaque[io_index]);
3645 /* mem_read and mem_write are arrays of functions containing the
3646 function to access byte (index 0), word (index 1) and dword (index
3647 2). Functions can be omitted with a NULL function pointer.
3648 If io_index is non zero, the corresponding io zone is
3649 modified. If it is zero, a new io zone is allocated. The return
3650 value can be used with cpu_register_physical_memory(). (-1) is
3651 returned if error. */
3652 static int cpu_register_io_memory_fixed(int io_index,
3653 CPUReadMemoryFunc * const *mem_read,
3654 CPUWriteMemoryFunc * const *mem_write,
3655 void *opaque, enum device_endian endian)
3657 int i;
3659 if (io_index <= 0) {
3660 io_index = get_free_io_mem_idx();
3661 if (io_index == -1)
3662 return io_index;
3663 } else {
3664 io_index >>= IO_MEM_SHIFT;
3665 if (io_index >= IO_MEM_NB_ENTRIES)
3666 return -1;
3669 for (i = 0; i < 3; ++i) {
3670 io_mem_read[io_index][i]
3671 = (mem_read[i] ? mem_read[i] : unassigned_mem_read[i]);
3673 for (i = 0; i < 3; ++i) {
3674 io_mem_write[io_index][i]
3675 = (mem_write[i] ? mem_write[i] : unassigned_mem_write[i]);
3677 io_mem_opaque[io_index] = opaque;
3679 switch (endian) {
3680 case DEVICE_BIG_ENDIAN:
3681 #ifndef TARGET_WORDS_BIGENDIAN
3682 swapendian_init(io_index);
3683 #endif
3684 break;
3685 case DEVICE_LITTLE_ENDIAN:
3686 #ifdef TARGET_WORDS_BIGENDIAN
3687 swapendian_init(io_index);
3688 #endif
3689 break;
3690 case DEVICE_NATIVE_ENDIAN:
3691 default:
3692 break;
3695 return (io_index << IO_MEM_SHIFT);
3698 int cpu_register_io_memory(CPUReadMemoryFunc * const *mem_read,
3699 CPUWriteMemoryFunc * const *mem_write,
3700 void *opaque, enum device_endian endian)
3702 return cpu_register_io_memory_fixed(0, mem_read, mem_write, opaque, endian);
3705 void cpu_unregister_io_memory(int io_table_address)
3707 int i;
3708 int io_index = io_table_address >> IO_MEM_SHIFT;
3710 swapendian_del(io_index);
3712 for (i=0;i < 3; i++) {
3713 io_mem_read[io_index][i] = unassigned_mem_read[i];
3714 io_mem_write[io_index][i] = unassigned_mem_write[i];
3716 io_mem_opaque[io_index] = NULL;
3717 io_mem_used[io_index] = 0;
3720 static void io_mem_init(void)
3722 int i;
3724 cpu_register_io_memory_fixed(IO_MEM_ROM, error_mem_read,
3725 unassigned_mem_write, NULL,
3726 DEVICE_NATIVE_ENDIAN);
3727 cpu_register_io_memory_fixed(IO_MEM_UNASSIGNED, unassigned_mem_read,
3728 unassigned_mem_write, NULL,
3729 DEVICE_NATIVE_ENDIAN);
3730 cpu_register_io_memory_fixed(IO_MEM_NOTDIRTY, error_mem_read,
3731 notdirty_mem_write, NULL,
3732 DEVICE_NATIVE_ENDIAN);
3733 cpu_register_io_memory_fixed(IO_MEM_SUBPAGE_RAM, subpage_ram_read,
3734 subpage_ram_write, NULL,
3735 DEVICE_NATIVE_ENDIAN);
3736 for (i=0; i<5; i++)
3737 io_mem_used[i] = 1;
3739 io_mem_watch = cpu_register_io_memory(watch_mem_read,
3740 watch_mem_write, NULL,
3741 DEVICE_NATIVE_ENDIAN);
3744 static void memory_map_init(void)
3746 system_memory = g_malloc(sizeof(*system_memory));
3747 memory_region_init(system_memory, "system", INT64_MAX);
3748 set_system_memory_map(system_memory);
3750 system_io = g_malloc(sizeof(*system_io));
3751 memory_region_init(system_io, "io", 65536);
3752 set_system_io_map(system_io);
3755 MemoryRegion *get_system_memory(void)
3757 return system_memory;
3760 MemoryRegion *get_system_io(void)
3762 return system_io;
3765 #endif /* !defined(CONFIG_USER_ONLY) */
3767 /* physical memory access (slow version, mainly for debug) */
3768 #if defined(CONFIG_USER_ONLY)
3769 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
3770 uint8_t *buf, int len, int is_write)
3772 int l, flags;
3773 target_ulong page;
3774 void * p;
3776 while (len > 0) {
3777 page = addr & TARGET_PAGE_MASK;
3778 l = (page + TARGET_PAGE_SIZE) - addr;
3779 if (l > len)
3780 l = len;
3781 flags = page_get_flags(page);
3782 if (!(flags & PAGE_VALID))
3783 return -1;
3784 if (is_write) {
3785 if (!(flags & PAGE_WRITE))
3786 return -1;
3787 /* XXX: this code should not depend on lock_user */
3788 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3789 return -1;
3790 memcpy(p, buf, l);
3791 unlock_user(p, addr, l);
3792 } else {
3793 if (!(flags & PAGE_READ))
3794 return -1;
3795 /* XXX: this code should not depend on lock_user */
3796 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3797 return -1;
3798 memcpy(buf, p, l);
3799 unlock_user(p, addr, 0);
3801 len -= l;
3802 buf += l;
3803 addr += l;
3805 return 0;
3808 #else
3809 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
3810 int len, int is_write)
3812 int l, io_index;
3813 uint8_t *ptr;
3814 uint32_t val;
3815 target_phys_addr_t page;
3816 ram_addr_t pd;
3817 PhysPageDesc *p;
3819 while (len > 0) {
3820 page = addr & TARGET_PAGE_MASK;
3821 l = (page + TARGET_PAGE_SIZE) - addr;
3822 if (l > len)
3823 l = len;
3824 p = phys_page_find(page >> TARGET_PAGE_BITS);
3825 if (!p) {
3826 pd = IO_MEM_UNASSIGNED;
3827 } else {
3828 pd = p->phys_offset;
3831 if (is_write) {
3832 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
3833 target_phys_addr_t addr1 = addr;
3834 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3835 if (p)
3836 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3837 /* XXX: could force cpu_single_env to NULL to avoid
3838 potential bugs */
3839 if (l >= 4 && ((addr1 & 3) == 0)) {
3840 /* 32 bit write access */
3841 val = ldl_p(buf);
3842 io_mem_write[io_index][2](io_mem_opaque[io_index], addr1, val);
3843 l = 4;
3844 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3845 /* 16 bit write access */
3846 val = lduw_p(buf);
3847 io_mem_write[io_index][1](io_mem_opaque[io_index], addr1, val);
3848 l = 2;
3849 } else {
3850 /* 8 bit write access */
3851 val = ldub_p(buf);
3852 io_mem_write[io_index][0](io_mem_opaque[io_index], addr1, val);
3853 l = 1;
3855 } else {
3856 ram_addr_t addr1;
3857 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3858 /* RAM case */
3859 ptr = qemu_get_ram_ptr(addr1);
3860 memcpy(ptr, buf, l);
3861 if (!cpu_physical_memory_is_dirty(addr1)) {
3862 /* invalidate code */
3863 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
3864 /* set dirty bit */
3865 cpu_physical_memory_set_dirty_flags(
3866 addr1, (0xff & ~CODE_DIRTY_FLAG));
3868 qemu_put_ram_ptr(ptr);
3870 } else {
3871 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
3872 !(pd & IO_MEM_ROMD)) {
3873 target_phys_addr_t addr1 = addr;
3874 /* I/O case */
3875 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
3876 if (p)
3877 addr1 = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
3878 if (l >= 4 && ((addr1 & 3) == 0)) {
3879 /* 32 bit read access */
3880 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr1);
3881 stl_p(buf, val);
3882 l = 4;
3883 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3884 /* 16 bit read access */
3885 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr1);
3886 stw_p(buf, val);
3887 l = 2;
3888 } else {
3889 /* 8 bit read access */
3890 val = io_mem_read[io_index][0](io_mem_opaque[io_index], addr1);
3891 stb_p(buf, val);
3892 l = 1;
3894 } else {
3895 /* RAM case */
3896 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK);
3897 memcpy(buf, ptr + (addr & ~TARGET_PAGE_MASK), l);
3898 qemu_put_ram_ptr(ptr);
3901 len -= l;
3902 buf += l;
3903 addr += l;
3907 /* used for ROM loading : can write in RAM and ROM */
3908 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
3909 const uint8_t *buf, int len)
3911 int l;
3912 uint8_t *ptr;
3913 target_phys_addr_t page;
3914 unsigned long pd;
3915 PhysPageDesc *p;
3917 while (len > 0) {
3918 page = addr & TARGET_PAGE_MASK;
3919 l = (page + TARGET_PAGE_SIZE) - addr;
3920 if (l > len)
3921 l = len;
3922 p = phys_page_find(page >> TARGET_PAGE_BITS);
3923 if (!p) {
3924 pd = IO_MEM_UNASSIGNED;
3925 } else {
3926 pd = p->phys_offset;
3929 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM &&
3930 (pd & ~TARGET_PAGE_MASK) != IO_MEM_ROM &&
3931 !(pd & IO_MEM_ROMD)) {
3932 /* do nothing */
3933 } else {
3934 unsigned long addr1;
3935 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
3936 /* ROM/RAM case */
3937 ptr = qemu_get_ram_ptr(addr1);
3938 memcpy(ptr, buf, l);
3939 qemu_put_ram_ptr(ptr);
3941 len -= l;
3942 buf += l;
3943 addr += l;
3947 typedef struct {
3948 void *buffer;
3949 target_phys_addr_t addr;
3950 target_phys_addr_t len;
3951 } BounceBuffer;
3953 static BounceBuffer bounce;
3955 typedef struct MapClient {
3956 void *opaque;
3957 void (*callback)(void *opaque);
3958 QLIST_ENTRY(MapClient) link;
3959 } MapClient;
3961 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3962 = QLIST_HEAD_INITIALIZER(map_client_list);
3964 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3966 MapClient *client = g_malloc(sizeof(*client));
3968 client->opaque = opaque;
3969 client->callback = callback;
3970 QLIST_INSERT_HEAD(&map_client_list, client, link);
3971 return client;
3974 void cpu_unregister_map_client(void *_client)
3976 MapClient *client = (MapClient *)_client;
3978 QLIST_REMOVE(client, link);
3979 g_free(client);
3982 static void cpu_notify_map_clients(void)
3984 MapClient *client;
3986 while (!QLIST_EMPTY(&map_client_list)) {
3987 client = QLIST_FIRST(&map_client_list);
3988 client->callback(client->opaque);
3989 cpu_unregister_map_client(client);
3993 /* Map a physical memory region into a host virtual address.
3994 * May map a subset of the requested range, given by and returned in *plen.
3995 * May return NULL if resources needed to perform the mapping are exhausted.
3996 * Use only for reads OR writes - not for read-modify-write operations.
3997 * Use cpu_register_map_client() to know when retrying the map operation is
3998 * likely to succeed.
4000 void *cpu_physical_memory_map(target_phys_addr_t addr,
4001 target_phys_addr_t *plen,
4002 int is_write)
4004 target_phys_addr_t len = *plen;
4005 target_phys_addr_t todo = 0;
4006 int l;
4007 target_phys_addr_t page;
4008 unsigned long pd;
4009 PhysPageDesc *p;
4010 ram_addr_t raddr = RAM_ADDR_MAX;
4011 ram_addr_t rlen;
4012 void *ret;
4014 while (len > 0) {
4015 page = addr & TARGET_PAGE_MASK;
4016 l = (page + TARGET_PAGE_SIZE) - addr;
4017 if (l > len)
4018 l = len;
4019 p = phys_page_find(page >> TARGET_PAGE_BITS);
4020 if (!p) {
4021 pd = IO_MEM_UNASSIGNED;
4022 } else {
4023 pd = p->phys_offset;
4026 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4027 if (todo || bounce.buffer) {
4028 break;
4030 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
4031 bounce.addr = addr;
4032 bounce.len = l;
4033 if (!is_write) {
4034 cpu_physical_memory_read(addr, bounce.buffer, l);
4037 *plen = l;
4038 return bounce.buffer;
4040 if (!todo) {
4041 raddr = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4044 len -= l;
4045 addr += l;
4046 todo += l;
4048 rlen = todo;
4049 ret = qemu_ram_ptr_length(raddr, &rlen);
4050 *plen = rlen;
4051 return ret;
4054 /* Unmaps a memory region previously mapped by cpu_physical_memory_map().
4055 * Will also mark the memory as dirty if is_write == 1. access_len gives
4056 * the amount of memory that was actually read or written by the caller.
4058 void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
4059 int is_write, target_phys_addr_t access_len)
4061 if (buffer != bounce.buffer) {
4062 if (is_write) {
4063 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
4064 while (access_len) {
4065 unsigned l;
4066 l = TARGET_PAGE_SIZE;
4067 if (l > access_len)
4068 l = access_len;
4069 if (!cpu_physical_memory_is_dirty(addr1)) {
4070 /* invalidate code */
4071 tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
4072 /* set dirty bit */
4073 cpu_physical_memory_set_dirty_flags(
4074 addr1, (0xff & ~CODE_DIRTY_FLAG));
4076 addr1 += l;
4077 access_len -= l;
4080 if (xen_enabled()) {
4081 xen_invalidate_map_cache_entry(buffer);
4083 return;
4085 if (is_write) {
4086 cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
4088 qemu_vfree(bounce.buffer);
4089 bounce.buffer = NULL;
4090 cpu_notify_map_clients();
4093 /* warning: addr must be aligned */
4094 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
4095 enum device_endian endian)
4097 int io_index;
4098 uint8_t *ptr;
4099 uint32_t val;
4100 unsigned long pd;
4101 PhysPageDesc *p;
4103 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4104 if (!p) {
4105 pd = IO_MEM_UNASSIGNED;
4106 } else {
4107 pd = p->phys_offset;
4110 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4111 !(pd & IO_MEM_ROMD)) {
4112 /* I/O case */
4113 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4114 if (p)
4115 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4116 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4117 #if defined(TARGET_WORDS_BIGENDIAN)
4118 if (endian == DEVICE_LITTLE_ENDIAN) {
4119 val = bswap32(val);
4121 #else
4122 if (endian == DEVICE_BIG_ENDIAN) {
4123 val = bswap32(val);
4125 #endif
4126 } else {
4127 /* RAM case */
4128 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4129 (addr & ~TARGET_PAGE_MASK);
4130 switch (endian) {
4131 case DEVICE_LITTLE_ENDIAN:
4132 val = ldl_le_p(ptr);
4133 break;
4134 case DEVICE_BIG_ENDIAN:
4135 val = ldl_be_p(ptr);
4136 break;
4137 default:
4138 val = ldl_p(ptr);
4139 break;
4142 return val;
4145 uint32_t ldl_phys(target_phys_addr_t addr)
4147 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4150 uint32_t ldl_le_phys(target_phys_addr_t addr)
4152 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4155 uint32_t ldl_be_phys(target_phys_addr_t addr)
4157 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
4160 /* warning: addr must be aligned */
4161 static inline uint64_t ldq_phys_internal(target_phys_addr_t addr,
4162 enum device_endian endian)
4164 int io_index;
4165 uint8_t *ptr;
4166 uint64_t val;
4167 unsigned long pd;
4168 PhysPageDesc *p;
4170 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4171 if (!p) {
4172 pd = IO_MEM_UNASSIGNED;
4173 } else {
4174 pd = p->phys_offset;
4177 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4178 !(pd & IO_MEM_ROMD)) {
4179 /* I/O case */
4180 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4181 if (p)
4182 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4184 /* XXX This is broken when device endian != cpu endian.
4185 Fix and add "endian" variable check */
4186 #ifdef TARGET_WORDS_BIGENDIAN
4187 val = (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr) << 32;
4188 val |= io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4);
4189 #else
4190 val = io_mem_read[io_index][2](io_mem_opaque[io_index], addr);
4191 val |= (uint64_t)io_mem_read[io_index][2](io_mem_opaque[io_index], addr + 4) << 32;
4192 #endif
4193 } else {
4194 /* RAM case */
4195 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4196 (addr & ~TARGET_PAGE_MASK);
4197 switch (endian) {
4198 case DEVICE_LITTLE_ENDIAN:
4199 val = ldq_le_p(ptr);
4200 break;
4201 case DEVICE_BIG_ENDIAN:
4202 val = ldq_be_p(ptr);
4203 break;
4204 default:
4205 val = ldq_p(ptr);
4206 break;
4209 return val;
4212 uint64_t ldq_phys(target_phys_addr_t addr)
4214 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4217 uint64_t ldq_le_phys(target_phys_addr_t addr)
4219 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4222 uint64_t ldq_be_phys(target_phys_addr_t addr)
4224 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
4227 /* XXX: optimize */
4228 uint32_t ldub_phys(target_phys_addr_t addr)
4230 uint8_t val;
4231 cpu_physical_memory_read(addr, &val, 1);
4232 return val;
4235 /* warning: addr must be aligned */
4236 static inline uint32_t lduw_phys_internal(target_phys_addr_t addr,
4237 enum device_endian endian)
4239 int io_index;
4240 uint8_t *ptr;
4241 uint64_t val;
4242 unsigned long pd;
4243 PhysPageDesc *p;
4245 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4246 if (!p) {
4247 pd = IO_MEM_UNASSIGNED;
4248 } else {
4249 pd = p->phys_offset;
4252 if ((pd & ~TARGET_PAGE_MASK) > IO_MEM_ROM &&
4253 !(pd & IO_MEM_ROMD)) {
4254 /* I/O case */
4255 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4256 if (p)
4257 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4258 val = io_mem_read[io_index][1](io_mem_opaque[io_index], addr);
4259 #if defined(TARGET_WORDS_BIGENDIAN)
4260 if (endian == DEVICE_LITTLE_ENDIAN) {
4261 val = bswap16(val);
4263 #else
4264 if (endian == DEVICE_BIG_ENDIAN) {
4265 val = bswap16(val);
4267 #endif
4268 } else {
4269 /* RAM case */
4270 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4271 (addr & ~TARGET_PAGE_MASK);
4272 switch (endian) {
4273 case DEVICE_LITTLE_ENDIAN:
4274 val = lduw_le_p(ptr);
4275 break;
4276 case DEVICE_BIG_ENDIAN:
4277 val = lduw_be_p(ptr);
4278 break;
4279 default:
4280 val = lduw_p(ptr);
4281 break;
4284 return val;
4287 uint32_t lduw_phys(target_phys_addr_t addr)
4289 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
4292 uint32_t lduw_le_phys(target_phys_addr_t addr)
4294 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
4297 uint32_t lduw_be_phys(target_phys_addr_t addr)
4299 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
4302 /* warning: addr must be aligned. The ram page is not masked as dirty
4303 and the code inside is not invalidated. It is useful if the dirty
4304 bits are used to track modified PTEs */
4305 void stl_phys_notdirty(target_phys_addr_t addr, uint32_t val)
4307 int io_index;
4308 uint8_t *ptr;
4309 unsigned long pd;
4310 PhysPageDesc *p;
4312 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4313 if (!p) {
4314 pd = IO_MEM_UNASSIGNED;
4315 } else {
4316 pd = p->phys_offset;
4319 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4320 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4321 if (p)
4322 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4323 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4324 } else {
4325 unsigned long addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4326 ptr = qemu_get_ram_ptr(addr1);
4327 stl_p(ptr, val);
4329 if (unlikely(in_migration)) {
4330 if (!cpu_physical_memory_is_dirty(addr1)) {
4331 /* invalidate code */
4332 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4333 /* set dirty bit */
4334 cpu_physical_memory_set_dirty_flags(
4335 addr1, (0xff & ~CODE_DIRTY_FLAG));
4341 void stq_phys_notdirty(target_phys_addr_t addr, uint64_t val)
4343 int io_index;
4344 uint8_t *ptr;
4345 unsigned long pd;
4346 PhysPageDesc *p;
4348 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4349 if (!p) {
4350 pd = IO_MEM_UNASSIGNED;
4351 } else {
4352 pd = p->phys_offset;
4355 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4356 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4357 if (p)
4358 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4359 #ifdef TARGET_WORDS_BIGENDIAN
4360 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val >> 32);
4361 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val);
4362 #else
4363 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4364 io_mem_write[io_index][2](io_mem_opaque[io_index], addr + 4, val >> 32);
4365 #endif
4366 } else {
4367 ptr = qemu_get_ram_ptr(pd & TARGET_PAGE_MASK) +
4368 (addr & ~TARGET_PAGE_MASK);
4369 stq_p(ptr, val);
4373 /* warning: addr must be aligned */
4374 static inline void stl_phys_internal(target_phys_addr_t addr, uint32_t val,
4375 enum device_endian endian)
4377 int io_index;
4378 uint8_t *ptr;
4379 unsigned long pd;
4380 PhysPageDesc *p;
4382 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4383 if (!p) {
4384 pd = IO_MEM_UNASSIGNED;
4385 } else {
4386 pd = p->phys_offset;
4389 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4390 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4391 if (p)
4392 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4393 #if defined(TARGET_WORDS_BIGENDIAN)
4394 if (endian == DEVICE_LITTLE_ENDIAN) {
4395 val = bswap32(val);
4397 #else
4398 if (endian == DEVICE_BIG_ENDIAN) {
4399 val = bswap32(val);
4401 #endif
4402 io_mem_write[io_index][2](io_mem_opaque[io_index], addr, val);
4403 } else {
4404 unsigned long addr1;
4405 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4406 /* RAM case */
4407 ptr = qemu_get_ram_ptr(addr1);
4408 switch (endian) {
4409 case DEVICE_LITTLE_ENDIAN:
4410 stl_le_p(ptr, val);
4411 break;
4412 case DEVICE_BIG_ENDIAN:
4413 stl_be_p(ptr, val);
4414 break;
4415 default:
4416 stl_p(ptr, val);
4417 break;
4419 if (!cpu_physical_memory_is_dirty(addr1)) {
4420 /* invalidate code */
4421 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
4422 /* set dirty bit */
4423 cpu_physical_memory_set_dirty_flags(addr1,
4424 (0xff & ~CODE_DIRTY_FLAG));
4429 void stl_phys(target_phys_addr_t addr, uint32_t val)
4431 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4434 void stl_le_phys(target_phys_addr_t addr, uint32_t val)
4436 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4439 void stl_be_phys(target_phys_addr_t addr, uint32_t val)
4441 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4444 /* XXX: optimize */
4445 void stb_phys(target_phys_addr_t addr, uint32_t val)
4447 uint8_t v = val;
4448 cpu_physical_memory_write(addr, &v, 1);
4451 /* warning: addr must be aligned */
4452 static inline void stw_phys_internal(target_phys_addr_t addr, uint32_t val,
4453 enum device_endian endian)
4455 int io_index;
4456 uint8_t *ptr;
4457 unsigned long pd;
4458 PhysPageDesc *p;
4460 p = phys_page_find(addr >> TARGET_PAGE_BITS);
4461 if (!p) {
4462 pd = IO_MEM_UNASSIGNED;
4463 } else {
4464 pd = p->phys_offset;
4467 if ((pd & ~TARGET_PAGE_MASK) != IO_MEM_RAM) {
4468 io_index = (pd >> IO_MEM_SHIFT) & (IO_MEM_NB_ENTRIES - 1);
4469 if (p)
4470 addr = (addr & ~TARGET_PAGE_MASK) + p->region_offset;
4471 #if defined(TARGET_WORDS_BIGENDIAN)
4472 if (endian == DEVICE_LITTLE_ENDIAN) {
4473 val = bswap16(val);
4475 #else
4476 if (endian == DEVICE_BIG_ENDIAN) {
4477 val = bswap16(val);
4479 #endif
4480 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val);
4481 } else {
4482 unsigned long addr1;
4483 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
4484 /* RAM case */
4485 ptr = qemu_get_ram_ptr(addr1);
4486 switch (endian) {
4487 case DEVICE_LITTLE_ENDIAN:
4488 stw_le_p(ptr, val);
4489 break;
4490 case DEVICE_BIG_ENDIAN:
4491 stw_be_p(ptr, val);
4492 break;
4493 default:
4494 stw_p(ptr, val);
4495 break;
4497 if (!cpu_physical_memory_is_dirty(addr1)) {
4498 /* invalidate code */
4499 tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
4500 /* set dirty bit */
4501 cpu_physical_memory_set_dirty_flags(addr1,
4502 (0xff & ~CODE_DIRTY_FLAG));
4507 void stw_phys(target_phys_addr_t addr, uint32_t val)
4509 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4512 void stw_le_phys(target_phys_addr_t addr, uint32_t val)
4514 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4517 void stw_be_phys(target_phys_addr_t addr, uint32_t val)
4519 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4522 /* XXX: optimize */
4523 void stq_phys(target_phys_addr_t addr, uint64_t val)
4525 val = tswap64(val);
4526 cpu_physical_memory_write(addr, &val, 8);
4529 void stq_le_phys(target_phys_addr_t addr, uint64_t val)
4531 val = cpu_to_le64(val);
4532 cpu_physical_memory_write(addr, &val, 8);
4535 void stq_be_phys(target_phys_addr_t addr, uint64_t val)
4537 val = cpu_to_be64(val);
4538 cpu_physical_memory_write(addr, &val, 8);
4541 /* virtual memory access for debug (includes writing to ROM) */
4542 int cpu_memory_rw_debug(CPUState *env, target_ulong addr,
4543 uint8_t *buf, int len, int is_write)
4545 int l;
4546 target_phys_addr_t phys_addr;
4547 target_ulong page;
4549 while (len > 0) {
4550 page = addr & TARGET_PAGE_MASK;
4551 phys_addr = cpu_get_phys_page_debug(env, page);
4552 /* if no physical page mapped, return an error */
4553 if (phys_addr == -1)
4554 return -1;
4555 l = (page + TARGET_PAGE_SIZE) - addr;
4556 if (l > len)
4557 l = len;
4558 phys_addr += (addr & ~TARGET_PAGE_MASK);
4559 if (is_write)
4560 cpu_physical_memory_write_rom(phys_addr, buf, l);
4561 else
4562 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4563 len -= l;
4564 buf += l;
4565 addr += l;
4567 return 0;
4569 #endif
4571 /* in deterministic execution mode, instructions doing device I/Os
4572 must be at the end of the TB */
4573 void cpu_io_recompile(CPUState *env, void *retaddr)
4575 TranslationBlock *tb;
4576 uint32_t n, cflags;
4577 target_ulong pc, cs_base;
4578 uint64_t flags;
4580 tb = tb_find_pc((unsigned long)retaddr);
4581 if (!tb) {
4582 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4583 retaddr);
4585 n = env->icount_decr.u16.low + tb->icount;
4586 cpu_restore_state(tb, env, (unsigned long)retaddr);
4587 /* Calculate how many instructions had been executed before the fault
4588 occurred. */
4589 n = n - env->icount_decr.u16.low;
4590 /* Generate a new TB ending on the I/O insn. */
4591 n++;
4592 /* On MIPS and SH, delay slot instructions can only be restarted if
4593 they were already the first instruction in the TB. If this is not
4594 the first instruction in a TB then re-execute the preceding
4595 branch. */
4596 #if defined(TARGET_MIPS)
4597 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4598 env->active_tc.PC -= 4;
4599 env->icount_decr.u16.low++;
4600 env->hflags &= ~MIPS_HFLAG_BMASK;
4602 #elif defined(TARGET_SH4)
4603 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4604 && n > 1) {
4605 env->pc -= 2;
4606 env->icount_decr.u16.low++;
4607 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4609 #endif
4610 /* This should never happen. */
4611 if (n > CF_COUNT_MASK)
4612 cpu_abort(env, "TB too big during recompile");
4614 cflags = n | CF_LAST_IO;
4615 pc = tb->pc;
4616 cs_base = tb->cs_base;
4617 flags = tb->flags;
4618 tb_phys_invalidate(tb, -1);
4619 /* FIXME: In theory this could raise an exception. In practice
4620 we have already translated the block once so it's probably ok. */
4621 tb_gen_code(env, pc, cs_base, flags, cflags);
4622 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4623 the first in the TB) then we end up generating a whole new TB and
4624 repeating the fault, which is horribly inefficient.
4625 Better would be to execute just this insn uncached, or generate a
4626 second new TB. */
4627 cpu_resume_from_signal(env, NULL);
4630 #if !defined(CONFIG_USER_ONLY)
4632 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4634 int i, target_code_size, max_target_code_size;
4635 int direct_jmp_count, direct_jmp2_count, cross_page;
4636 TranslationBlock *tb;
4638 target_code_size = 0;
4639 max_target_code_size = 0;
4640 cross_page = 0;
4641 direct_jmp_count = 0;
4642 direct_jmp2_count = 0;
4643 for(i = 0; i < nb_tbs; i++) {
4644 tb = &tbs[i];
4645 target_code_size += tb->size;
4646 if (tb->size > max_target_code_size)
4647 max_target_code_size = tb->size;
4648 if (tb->page_addr[1] != -1)
4649 cross_page++;
4650 if (tb->tb_next_offset[0] != 0xffff) {
4651 direct_jmp_count++;
4652 if (tb->tb_next_offset[1] != 0xffff) {
4653 direct_jmp2_count++;
4657 /* XXX: avoid using doubles ? */
4658 cpu_fprintf(f, "Translation buffer state:\n");
4659 cpu_fprintf(f, "gen code size %td/%ld\n",
4660 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4661 cpu_fprintf(f, "TB count %d/%d\n",
4662 nb_tbs, code_gen_max_blocks);
4663 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4664 nb_tbs ? target_code_size / nb_tbs : 0,
4665 max_target_code_size);
4666 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4667 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4668 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4669 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4670 cross_page,
4671 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4672 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4673 direct_jmp_count,
4674 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4675 direct_jmp2_count,
4676 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4677 cpu_fprintf(f, "\nStatistics:\n");
4678 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4679 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4680 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4681 tcg_dump_info(f, cpu_fprintf);
4684 #define MMUSUFFIX _cmmu
4685 #undef GETPC
4686 #define GETPC() NULL
4687 #define env cpu_single_env
4688 #define SOFTMMU_CODE_ACCESS
4690 #define SHIFT 0
4691 #include "softmmu_template.h"
4693 #define SHIFT 1
4694 #include "softmmu_template.h"
4696 #define SHIFT 2
4697 #include "softmmu_template.h"
4699 #define SHIFT 3
4700 #include "softmmu_template.h"
4702 #undef env
4704 #endif